Jump to edit to see more real-life code example, that doesn't work after changing the query order
Here are my models:
class ModelA(models.Model):
field_1a = models.CharField(max_length=32)
field_2a = models.CharField(max_length=32)
class ModelB(models.Model):
field_1b = models.CharField(max_length=32)
field_2b = models.CharField(max_length=32)
Now, create 2 instances each:
ModelA.objects.create(field_1a="1a1", field_2a="1a2")
ModelA.objects.create(field_1a="2a1", field_2a="2a2")
ModelB.objects.create(field_1b="1b1", field_2b="1b2")
ModelB.objects.create(field_1b="2b1", field_2b="2b2")
If I'll query for only one model with annotations, I get something like that:
>>> ModelA.objects.all().annotate(field1=F("field_1a"), field2=F("field_2a")).values("field1", "field2")
[{"field1": "1a1", "field2": "1a2"}, {"field1": "2a1", "field2": "2a2"}]
This is correct behavior. The problem starts, when I want to get union of those two models:
# model A first, with annotate
query = ModelA.objects.all().annotate(field1=F("field_1a"), field2=F("field_2a"))
# now union with model B, also annotated
query = query.union(ModelB.objects.all().annotate(field1=F("field_1b"), field2=F("field_2b")))
# get only field1 and field2
query = query.values("field1", "field2")
# the results are skewed:
assert list(query) == [
{"field1": 1, "field2": "1a1"},
{"field1": 1, "field2": "1b1"},
{"field1": 2, "field2": "2a1"},
{"field1": 2, "field2": "2b1"},
]
The assert passes correctly, which means that the results are wrong. It seems like the values()
didn't match the variable name, it just iterated over the object as on a tuple. The value of field1
is actually the object's ID, and field2
is field1
.
This is pretty easy to fix in such simple models, but my real models are quite complex, and they have a different number of fields. How do I union them correctly?
EDIT
Below you can find an extended example that fails regardless of the order of union()
and values()
- the models are slightly bigger now, and it seems that the different fields count somehow confuses Django:
# models
class ModelA(models.Model):
field_1a = models.CharField(max_length=32)
field_1aa = models.CharField(max_length=32, null=True)
field_1aaa = models.CharField(max_length=32, null=True)
field_2a = models.CharField(max_length=32)
extra_a = models.CharField(max_length=32)
class ModelB(models.Model):
extra = models.CharField(max_length=32)
field_1b = models.CharField(max_length=32)
field_2b = models.CharField(max_length=32)
# test
ModelA.objects.create(field_1a="1a1", field_2a="1a2", extra_a="1extra")
ModelA.objects.create(field_1a="2a1", field_2a="2a2", extra_a="2extra")
ModelB.objects.create(field_1b="1b1", field_2b="1b2", extra="3extra")
ModelB.objects.create(field_1b="2b1", field_2b="2b2", extra="4extra")
values = ("field1", "field2", "extra")
query = (
ModelA.objects.all()
.annotate(
field1=F("field_1a"), field2=F("field_2a"), extra=F("extra_a")
)
.values(*values)
)
query = query.union(
ModelB.objects.all()
.annotate(field1=F("field_1b"), field2=F("field_2b"))
.values(*values)
)
# outcome
assert list(query) == [
{"field1": "1a1", "field2": "1a2", "extra": "1extra"},
{"field1": "2a1", "field2": "2a2", "extra": "2extra"},
{"field1": "3extra", "field2": "1b1", "extra": "1b2"},
{"field1": "4extra", "field2": "2b1", "extra": "2b2"},
]