DjangoのQuerySetのannotateとfilterの順番について


今日ドキュメント読んで、なるほどと思ったのでメモ

以下のようなモデルとデータがあるとして、Publisherごとにratingが3.0以上のBookの数をカウントしたい場合

class Publisher(models.Model):
    name = models.CharField(max_length=300)


class Book(models.Model):
    isbn = models.CharField(max_length=9)
    name = models.CharField(max_length=300)
    price = models.DecimalField(max_digits=10, decimal_places=2)
    rating = models.FloatField()
    publisher = models.ForeignKey(Publisher)
[{"pk": 1, "model": "books.publisher", "fields": {"name": "test1"}},
 {"pk": 2, "model": "books.publisher", "fields": {"name": "test2"}},
 {"pk": 1, "model": "books.book", "fields": {"rating": 5.0, "price": "2000", "isbn": "12345", "name": "book1", "publisher": 1}},
 {"pk": 2, "model": "books.book", "fields": {"rating": 2.0, "price": "3500", "isbn": "12346", "name": "book2", "publisher": 1}},
 {"pk": 3, "model": "books.book", "fields": {"rating": 4.0, "price": "2400", "isbn": "12347", "name": "book3", "publisher": 2}},
 {"pk": 4, "model": "books.book", "fields": {"rating": 5.0, "price": "3800", "isbn": "12348", "name": "book4", "publisher": 2}}
]


以下のようにannotateを先に呼ぶと期待した結果にならない

q = Publisher.objects.annotate(num_books=Count('book')).filter(book__rating__gt=3.0)
[r.num_books for r in q]
# => [2, 4]

ドキュメントには、「annotateがクエリに対して適用されたとき、annotationの処理は、その時点でのクエリの状態に対して実行される」とあるので、annotateにfilterを効かせるには以下のように先にfilterを呼び出す必要がある

q2 = Publisher.objects.filter(book__rating__gt=3.0).annotate(num_books=Count('book'))
[r.num_books for r in q2]
# => [1,2]


発行されるSQLを確認してみると以下のように異なる

-- annotateが先
-- Publisher.objects.annotate(num_books=Count('book')).filter(book__rating__gt=3.0) 

SELECT "books_publisher"."id", "books_publisher"."name", COUNT("books_book"."id") AS "num_books"
FROM "books_publisher"
LEFT OUTER JOIN "books_book" ON ("books_publisher"."id" = "books_book"."publisher_id")
INNER JOIN "books_book" T3 ON ("books_publisher"."id" = T3."publisher_id")
WHERE T3."rating" > 3.0
GROUP BY "books_publisher"."id", "books_publisher"."name";

--  filterが先
--  Publisher.objects.filter(book__rating__gt=3.0).annotate(num_books=Count('book'))

SELECT "books_publisher"."id", "books_publisher"."name", COUNT("books_book"."id") AS "num_books"
FROM "books_publisher"
LEFT OUTER JOIN "books_book" ON ("books_publisher"."id" = "books_book"."publisher_id")
WHERE "books_book"."rating" > 3.0
GROUP BY "books_publisher"."id", "books_publisher"."name";

全然違うので要注意


参考)

http://docs.nullpobug.com/django-doc-ja/trunk/topics/db/aggregation.html#order-of-annotate-and-filter-clauses