1

Models:

class GradePolicy(models.Model):
    name = models.CharField(max_length=30)
    minScore = models.DecimalField(default=0, max_digits=4, decimal_places=1, db_column="minscore")
    maxScore = models.DecimalField(max_digits=4, decimal_places=1, default=100, db_column="maxscore")
    policyChoices = (
        ('Numerical', 'Numerical'),
        ('Textual', 'Textual')
    )
    type = models.CharField(max_length=30, default='Textual', choices=policyChoices)


class GradeLevel(models.Model):
    name = models.CharField(max_length=30)
    score = models.DecimalField(decimal_places=2, max_digits=5)
    abbreviation = models.CharField(max_length=4)
    policy = models.ForeignKey(GradePolicy, null=True, blank=True,
                           on_delete=models.DO_NOTHING)  

Views:

class GradePolicyViewSet(viewsets.ModelViewSet):
    """
    Retrieve grade policies
    """
    queryset = GradePolicy.objects.prefetch_related('gradelevel_set').order_by('pk')
    serializer_class = GradePolicySerializer

class GradeLevelViewSet(viewsets.ModelViewSet):
    queryset = GradeLevel.objects.all().order_by('pk')
    serializer_class = GradeLevelSerializer

Serializers:

class GradeLevelSerializer(serializers.HyperlinkedModelSerializer):
    url = serializers.HyperlinkedIdentityField(view_name="gbook:gradelevels-detail")
    policy = serializers.PrimaryKeyRelatedField(read_only=True)

    class Meta:
        model = GradeLevel
        fields = ['pk', 'url', 'name', 'score', 'abbreviation', 'policy']


class GradePolicySerializer(serializers.HyperlinkedModelSerializer):
    url = serializers.HyperlinkedIdentityField(view_name="gbook:gradepolicies-detail")
    levels = serializers.SerializerMethodField()

    def get_levels(self, obj):
        policy: GradePolicy = obj
        levels = policy.gradelevel_set.prefetch_related('policy').order_by("-score").all()
        return GradeLevelSerializer(levels, many=True, context={'request': None}).data

    class Meta:
        model = GradePolicy
        fields = ['pk', 'url', 'name', 'minScore', 'maxScore', 'type', 'levels']

I'm getting reports from Sentry that calling /gradepolicies, which is the list endpoint for GradePolicy, is causing N+1 queries. Looking at the queries, that does seem to be the case:

(0.004) SELECT "gbook_gradepolicy"."id", "gbook_gradepolicy"."name", "gbook_gradepolicy"."minscore", "gbook_gradepolicy"."maxscore", "gbook_gradepolicy"."type" FROM "gbook_gradepolicy" ORDER BY "gbook_gradepolicy"."id" ASC; args=()

(0.005) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 2 ORDER BY "gbook_gradelevel"."score" DESC; args=(2,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 3 ORDER BY "gbook_gradelevel"."score" DESC; args=(3,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 4 ORDER BY "gbook_gradelevel"."score" DESC; args=(4,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 5 ORDER BY "gbook_gradelevel"."score" DESC; args=(5,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 6 ORDER BY "gbook_gradelevel"."score" DESC; args=(6,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 7 ORDER BY "gbook_gradelevel"."score" DESC; args=(7,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 8 ORDER BY "gbook_gradelevel"."score" DESC; args=(8,)

To prevent this, I'm prefetching as shown in the code above. The result of that is:

(0.002) SELECT "gbook_gradepolicy"."id", "gbook_gradepolicy"."name", "gbook_gradepolicy"."minscore", "gbook_gradepolicy"."maxscore", "gbook_gradepolicy"."type" FROM "gbook_gradepolicy" ORDER BY "gbook_gradepolicy"."id" ASC; args=()

(0.004) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" IN (2, 3, 4, 5, 6, 7, 8); args=(2, 3, 4, 5, 6, 7, 8)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 2 ORDER BY "gbook_gradelevel"."score" DESC; args=(2,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 3 ORDER BY "gbook_gradelevel"."score" DESC; args=(3,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 4 ORDER BY "gbook_gradelevel"."score" DESC; args=(4,)

(0.000) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 5 ORDER BY "gbook_gradelevel"."score" DESC; args=(5,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 6 ORDER BY "gbook_gradelevel"."score" DESC; args=(6,)

(0.000) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 7 ORDER BY "gbook_gradelevel"."score" DESC; args=(7,)

(0.000) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 8 ORDER BY "gbook_gradelevel"."score" DESC; args=(8,)

I see in there what looks like the result of the prefetch ((0.004) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" IN (2, 3, 4, 5, 6, 7, 8); args=(2, 3, 4, 5, 6, 7, 8), but it still performs all of the extra queries below.

How can I get rid of the N+1 query issue with this lookup?

DeltaG
  • 760
  • 2
  • 9
  • 28
  • Why are you using `prefetch_related` when you can use `select_related` ? – Ahtisham Aug 05 '23 at 15:44
  • In which spot? There are two – DeltaG Aug 05 '23 at 15:46
  • 1
    I believe the issue is in `get_levels` method. You don't have to call the queryset for each instance of `GradePolicy` – Ahtisham Aug 05 '23 at 15:51
  • I think that you're correct about the cause of the extra queries, but I wasn't able to get the levels attached to the serializer any other way. Is there another path? – DeltaG Aug 05 '23 at 15:53
  • Have you replaced `levels = serializers.SerializerMethodField()` to `levels = GradeLevelSerializer(many=True)` and remove `get_levels` method – Ahtisham Aug 05 '23 at 16:12
  • If I make that replacement, I get ```Got AttributeError when attempting to get a value for field `levels` on serializer `GradePolicySerializer`.The serializer field might be named incorrectly and not match any attribute or key on the `GradePolicy` instance. Original exception text was: 'GradePolicy' object has no attribute 'levels'.``` The fact that there's no `levels` field on the policy object was the reason I added the getter method – DeltaG Aug 05 '23 at 16:15
  • This works, though! ```gradelevel_set = GradeLevelSerializer(many=True) class Meta: model = GradePolicy fields = ['pk', 'url', 'name', 'minScore', 'maxScore', 'type', 'gradelevel_set']``` – DeltaG Aug 05 '23 at 16:17
  • Oh sorry my bad `prefetch_related` is fine as you are doing reverse foreign key relationship in viewset. – Ahtisham Aug 05 '23 at 16:26

1 Answers1

2

It is the get_levels method that is slowing down your API. You don't need it just replace it with your GradeLevelSerializer like this:

class GradePolicySerializer(serializers.HyperlinkedModelSerializer):
    url = serializers.HyperlinkedIdentityField(view_name="gbook:gradepolicies-detail")
    levels = GradeLevelSerializer(many=True, read_only=True)

    class Meta:
        model = GradePolicy
        fields = ['pk', 'url', 'name', 'minScore', 'maxScore', 'type', 'levels']

The prefetch_related is fine in viewset as you are doing reverse relationship.

You can specify the ordering on score as well in your viewset using Prefetch

By default DRF doesn't optimize the database hits for more take a look here

You can also update related name of your reverse relationship like this:

class GradeLevel(models.Model):
  ...
  policy = models.ForeignKey(GradePolicy, null=True, blank=True, on_delete=models.DO_NOTHING, related_name='levels')
Ahtisham
  • 9,170
  • 4
  • 43
  • 57
  • This almost works. You have to add `source` parameter: `levels = GradeLevelSerializer(source='gradelevel_set', many=True)`. With that change, I'll accept this answer - thanks for the help! – DeltaG Aug 05 '23 at 17:47
  • Did you test how many queries are happening now ? And how much performance improvement you have achieved ? – Ahtisham Aug 05 '23 at 18:28
  • The extra queries are definitely gone now. The performace hasn't changed all that much. I have a few much larger queries in production that I'm going to apply this to that will hopefully show some performance gains. – DeltaG Aug 05 '23 at 19:18
  • Yes! In production, same query from this morning to today: 39.2s, 34.8s -> 2.23s, 2.71s – DeltaG Aug 05 '23 at 20:36
  • You mean now because of removing `get_levels` change it is taking 2 seconds instead of 39 seconds right ? – Ahtisham Aug 06 '23 at 02:51
  • It's actually a different query, the prefetch on two levels of nesting did it – DeltaG Aug 06 '23 at 03:24