I'm running a query on elasticsearch using function_score where the value of a nested field is used for the calculation (price in this case). Which of the following is a better way to index the data?
{
"name": "John",
"age": 50,
"country": "US",
"subscription": {
"Plan1": {
"price": 100,
"date": "June 5th"
},
"Plan2": {
"price": 50,
"date": "June 6th"
}
}
}
OR
{
"name": "John",
"age": 50,
"country": "US",
"subscription": [
{
"name": "Plan1",
"price": 100,
"date": "June 5th"
},
{
"name": "Plan2"
"price": 50,
"date": "June 6th"
}
]
}
The query would be filtering on the "plan name" and "price", and "price" will be used for score calculation. The number of plans maybe upwards of 20.
Edit 1: Sample query for approach 2
{
"query": {
"function_score": {
"query": {
"bool": {
"filter": [
{
"range": {
"createdatutc": {
"gte": "2022-11-01T00:00:00.000Z",
"lt": "2023-05-06T00:00:00.000Z",
"format": "strict_date_optional_time"
}
}
},
{
"terms": {
"country": [
"US"
]
}
},
{
"term": {
"subscription.name": {
"value": "Plan1"
}
}
}
]
}
},
"functions": [
{
"filter": {
"query_string": {
"default_field": "name",
"query": "\"john\""
}
},
"script_score": {
"script": {
"lang": "painless",
"source": "for (item in params._source.subscription) {if (item.name == 'Plan1') {return item.price}}"
}
}
}
],
"score_mode": "sum",
"boost_mode": "replace"
}
}
}