0

Working in Django Rest Framework (DRF), django-filter, and PostgreSQL, and having an issue with one of our endpoints.

Assume the following:

# models.py
class Company(models.Model):
    name = models.CharField(max_length=50)


class Venue(models.Model):
    company = models.ForeignKey(to="Company", on_delete=models.CASCADE)
    name = models.CharField(max_length=50)

# create some data

company1 = Company.objects.create(name="Proper Ltd")
company2 = Company.objects.create(name="MyCompany Ltd")

Venue.objects.create(name="Venue #1", company=company1)
Venue.objects.create(name="Venue #2", company=company1)
Venue.objects.create(name="Property #1", company=company2)
Venue.objects.create(name="Property #2", company=company2)

# viewset
class CompanyViewSet(viewsets.ReadOnlyModelViewSet):
    serializer_class = CompanyVenueSearchSerializer
    queryset = (
        Venue.objects.all()
        .select_related("company")
        .order_by("company__name")
    )
    permission_classes = (ReadOnly,)
    http_method_names = ["get"]
    filter_backends = (filters.DjangoFilterBackend,)
    filterset_class = CompanyVenueListFilter
    pagination_class = None

# filterset
class CompanyVenueListFilter(filters.FilterSet):
    text = filters.CharFilter(method="name_search")

    def name_search(self, qs, name, value):
        return qs.filter(
            Q(name__icontains=value)
            | Q(company__name__icontains=value)
        )

    class Meta:
        model = Venue
        fields = [
            "name",
            "company__name",
        ]

# serializer
class CompanyVenueSearchSerializer(serializers.ModelSerializer):
    company_id = serializers.IntegerField(source="company.pk")
    company_name = serializers.CharField(source="company.name")
    venue_id = serializers.IntegerField(source="pk")
    venue_name = serializers.CharField(source="name")

    class Meta:
        model = Venue
        fields = (
            "company_id",
            "company_name",
            "venue_id",
            "venue_name",
        )

We now want to allow the user to filter the results by sending a query in the request, e.g. curl -X GET https://example.com/api/company/?text=pr.

The serializer result will look something like:

[
   {
      "company_id":1,
      "company_name":"Proper Ltd",
      "venue_id":1,
      "venue_name":"Venue #1"
   },
   {  // update ORM to exclude this dict
      "company_id":1,
      "company_name":"Proper Ltd",
      "venue_id":2,
      "venue_name":"Venue #1"
   },
   {
      "company_id":2,
      "company_name":"MyCompany Ltd",
      "venue_id":3,
      "venue_name":"Property #1"
   },
   {
      "company_id":2,
      "company_name":"MyCompany Ltd",
      "venue_id":4,
      "venue_name":"Property #1"
   }
]

Expected result:

Want to rewrite the ORM query so that if the filter ("pr") matches the venue__name, return all venues. But if the filter matches the company__name, only return it once, thus in the example above the second dict in the list would be excluded/removed.

Is this possible?

kunambi
  • 756
  • 1
  • 10
  • 25

2 Answers2

1

What you can do is to filter Company that matches name filtering and annotate them with the first related Venue and then combine it's results with the second requirement to return venue with name=value

from django.db.models import OuterRef, Q, Subquery

value = "pr"
first_venue = Venue.objects.filter(company__in=OuterRef("id")).order_by("id")
company_qs = Company.objects.filter(name__icontains=value).annotate(
    first_venue_id=Subquery(first_venue.values("id")[:1])
)
venue_qs = Venue.objects.filter(
    Q(name__icontains=value)
    | Q(id__in=company_qs.values_list("first_venue_id", flat=True))
)

The query executed when accessing values of venue_qs looks like

SELECT
    "venues_venue"."id",
    "venues_venue"."company_id",
    "venues_venue"."name"
FROM
    "venues_venue"
WHERE
    (
        UPPER("venues_venue"."name"::TEXT) LIKE UPPER(% pr %)
        OR "venues_venue"."id" IN (
            SELECT
                (
                    SELECT
                        U0."id"
                    FROM
                        "venues_venue" U0
                    WHERE
                        U0."company_id" IN (V0."id")
                    ORDER BY
                        U0."id" ASC
                    LIMIT
                        1
                ) AS "first_venue_id"
            FROM
                "venues_company" V0
            WHERE
                UPPER(V0."name"::TEXT) LIKE UPPER(% pr %)
        )
    )

This is how the filter should look like

class CompanyVenueListFilter(filters.FilterSet):
    text = filters.CharFilter(method="name_search")

    def name_search(self, qs, name, value):
        first_venue = Venue.objects.filter(company__in=OuterRef("id")).order_by("id")
        company_qs = Company.objects.filter(name__icontains=value).annotate(
            first_venue_id=Subquery(first_venue.values("id")[:1])
        )
        return qs.filter(
            Q(name__icontains=value)
            | Q(id__in=company_qs.values_list("first_venue_id", flat=True))
        )

    class Meta:
        model = Venue
        fields = [
            "name",
            "company__name",
        ]

Update for Django 3.2.16

Seems like the query above will not work for such version because it generated a query without parentheses in WHERE clause around V0."id", chunk of query looks like

WHERE
    U0."company_id" IN V0."id"

and it makes PostgreSQL complain with error

ERROR: syntax error at or near "V0"
LINE 17: U0."company_id" IN V0."id" 

For Django==3.2.16 the filtering method in CompanyVenueListFilter could look like following:

    def name_search(self, qs, name, value):
        company_qs = Company.objects.filter(name__icontains=value)
        venues_qs = (
            Venue.objects.filter(company__in=company_qs)
            .order_by("company_id", "id")
            .distinct("company_id")
        )
        return qs.filter(Q(name__icontains=value) | Q(id__in=venues_qs.values_list("id")))

The answer is based on other stackoverflow answer and django docs

wiaterb
  • 496
  • 3
  • 8
  • On first attempt, we received `AttributeError: 'QuerySet' object has no attribute 'objects'`. After removing `.objects` this error shows up: `syntax error at or near "V0" LINE 1: ... FROM "clients_venue" U0 WHERE U0."company_id" IN V0."id" OR...`. This is the generated SQL: https://pastebin.com/bDDCb9kU – kunambi Jan 08 '23 at 13:33
  • @kunambi The first error was mistake in my code. I cannot make any sense from the second error line you shared. What do you get if you run this query straight in postgres without using django models? – wiaterb Jan 08 '23 at 14:23
  • The query which you posted works perfectly well, but it isn't the same which is generated by Django ORM. When I run your query, I can see `venue.id`, `venue.company_id`, and `venue.name`. However, I believe that the error from Django comes from when trying to access `company.name`? – kunambi Jan 08 '23 at 14:29
  • So, could you please share the query that is generated by django orm in your case? I have taken a look at the pastebin and the second query there looks extremely alike the query I shared in the answer above. What are those queries from pastebin? – wiaterb Jan 08 '23 at 14:37
  • I have updated the pastebin with the resulting query generated by Django ORM, I hope the line breaks and comments will help: https://pastebin.com/bDDCb9kU – kunambi Jan 08 '23 at 14:45
  • The query you shared works perfectly fine in postgres in my instance. What error does postgres return in your case when you remove those comments from the pastebin and execute via pgcli or pgadmin? What postgres version do you use? – wiaterb Jan 08 '23 at 15:19
  • Let us [continue this discussion in chat](https://chat.stackoverflow.com/rooms/250943/discussion-between-kunambi-and-wiaterb). – kunambi Jan 08 '23 at 15:30
0

We have a temporary solution, which we're a bit wary about but it seems to do its job. Won't tag this answer as accepted as we're still hoping that someone has a more pythonic/djangoistic solution to the problem.

# viewset
class CompanyViewSet(viewsets.ReadOnlyModelViewSet):
    serializer_class = CompanyVenueSearchSerializer
    queryset = (
        Venue.objects.all()
        .select_related("company")
        .order_by("company__name")
    )
    permission_classes = (ReadOnly,)
    http_method_names = ["get"]
    filter_backends = (filters.DjangoFilterBackend,)
    filterset_class = CompanyVenueListFilter
    pagination_class = None

    def list(self, request, *args, **kwargs):
        queryset = self.filter_queryset(self.get_queryset())
        serializer = self.get_serializer(queryset, many=True)
        text = request.GET.get("text").lower()
        first_idx = 0
        to_remove = []

        for data in serializer.data:
            if text in data.get("name").lower() and text not in data.get("venue_name").lower():
                if data.get("id") != first_idx:
                    """We don't want to remove the first hit of a company whose name matches"""
                    first_idx = data.get("id")
                    continue
                to_remove.append((data.get("id"), data.get("venue_id")))

        return Response(
            [
                data
                for data in serializer.data
                if (data.get("id"), data.get("venue_id")) not in to_remove
            ],
            status=status.HTTP_200_OK,
        )
kunambi
  • 756
  • 1
  • 10
  • 25