We're creating some packages, but that process is currently rather slow, because of the sheer amount of data being sent between microservices. Therefore, I have pruned the information being sent between those microservices and instead want to enrich the documents with the necessary information directly from within ElasticSearch. This gives documents of the following shape:
{
"_index" : "packages-2022.02.28",
"_type" : "_doc",
"_id" : "SG_DH-8019-ao-74783-20220315-12",
"_score" : 1.0,
"_source" : {
"id" : "SG_DH-8019-ao-74783-20220315-12",
"updatedOn" : "2022-02-28T14:45:57.7511562+01:00",
"code" : "SG",
"createdDate" : "2022-02-28T15:17:48.2571391+01:00",
"content" : {
"contentId" : "74783",
"units" : [
{
"id" : "HB_DBL.ST_RO_NFP",
"globalId" : "74783_HB_DBL.ST_RO_NFP",
"globalIntId" : -592692223,
"forPackaging" : false
},
{
"id" : "HB_DBL.ST_BB_NFP",
"globalId" : "74783_HB_DBL.ST_BB_NFP",
"globalIntId" : 446952442,
"forPackaging" : false
},
{
"id" : "HB_DBL.ST_AI_NFP",
"globalId" : "74783_HB_DBL.ST_AI_NFP",
"globalIntId" : -1174348304,
"forPackaging" : false
},
{
"id" : "HB_DBL.SU_RO_NFP",
"globalId" : "74783_HB_DBL.SU_RO_NFP",
"globalIntId" : -2111509049,
"forPackaging" : false
},
{
"id" : "HB_DBL.SU_BB_NFP",
"globalId" : "74783_HB_DBL.SU_BB_NFP",
"globalIntId" : 307969427,
"forPackaging" : false
},
{
"id" : "HB_DBL.SU_AI_NFP",
"globalId" : "74783_HB_DBL.SU_AI_NFP",
"globalIntId" : 1418623211,
"forPackaging" : false
},
{
"id" : "HB_DBL.PO-1_RO_NFP",
"globalId" : "74783_HB_DBL.PO-1_RO_NFP",
"globalIntId" : 1328251159,
"forPackaging" : false
},
{
"id" : "HB_DBL.PO-1_BB_NFP",
"globalId" : "74783_HB_DBL.PO-1_BB_NFP",
"globalIntId" : -1228155826,
"forPackaging" : false
},
{
"id" : "HB_DBL.PO-1_AI_NFP",
"globalId" : "74783_HB_DBL.PO-1_AI_NFP",
"globalIntId" : 749215308,
"forPackaging" : false
},
{
"id" : "HB_DBL.OF_RO_NFP",
"globalId" : "74783_HB_DBL.OF_RO_NFP",
"globalIntId" : 1981865239,
"forPackaging" : false
},
{
"id" : "HB_DBL.OF_BB_NFP",
"globalId" : "74783_HB_DBL.OF_BB_NFP",
"globalIntId" : 545563435,
"forPackaging" : false
},
{
"id" : "HB_DBL.OF_AI_NFP",
"globalId" : "74783_HB_DBL.OF_AI_NFP",
"globalIntId" : -481310774,
"forPackaging" : false
}
]
"duration" : {
"value" : 12,
"durationType" : "Day"
}
},
"generatedInfo" : {
"productGroupName" : null,
"subProductGroupName" : "Foo",
"version" : 0
}
}
}
]
with information from an enrich policy's index of the shape (when queried):
{
"_index" : ".enrich-package-enrich-1646044129711",
"_type" : "_doc",
"_id" : "zt_gP38BZeMUiw0-LxLa",
"_score" : 1.0,
"_source" : {
"contentId" : "365114",
"name" : "PackageName",
"board" : [
"B1",
"B2"
],
"units" : [
{
"price" : [
{
"margin" : 0,
"combination" : 10000,
"value" : 189030,
"currency" : "EUR"
}
],
"id" : "W2M_AX2_SC_NFP",
"globalId" : "365114_W2M_AX2_SC_NFP",
"globalIntId" : -988330164,
"name" : "UnitName",
"prop1": "Foo",
"prop2": "Bar"
}
]
}
}
]
I originally could get this working. However, when enriching, I only want to keep the units with the same global ID as those in the document to save. To this end, I have tried also enriching each unit with a simple Enrich processor and a ForEach processor referencing the enrich policy, matching on globalId
and have even attempted matching on its hash code globalIntId
(although in even in the latter case I would often get the error that it 'is not an integer', even though it clearly is one). This separate enrich-policy index has a shape similar to the following:
{
"_index" : ".enrich-package-unit-enrich-1646044158417",
"_type" : "_doc",
"_id" : "dN_gP38BZeMUiw0-t2Io",
"_score" : 1.0,
"_source" : {
"units" : [
{
"price" : [
{
"margin" : 0,
"combination" : 10000,
"value" : 189030,
"currency" : "EUR"
}
],
"globalId" : "365114_W2M_AX2_SC_NFP",
"globalIntId" : -988330164,
"name" : "UnitName",
"prop1": "Foo",
"prop2": "Bar",
"id" : "W2M_AX2_SC_NFP"
}
]
}
}
]
I have also tried to use Painless script, but so far my experience hasn't been exactly painless (pun intended). Every time I would try to access any data (I've tried various ways I encountered), I would get nothing but compilation errors. Also, given that I'm working on making this process faster, I'm a bit worried about performance here if I were to get it to work. I've read that Painless is fast, yet I've also heard it's actually fairly slow (I think compared to using processors, not necessarily other scripts).
Now, I'm at a loss about how to get this to work. I would prefer to do this without scripting if possible. However, if it is only possible using scripting, that's okay as long as the performance is acceptable. I'm using Elastic 7.12.
Update 1:
I'm creating the enrich policy from C# using Nest like so:
var enrichPolicyRequest = new PutEnrichPolicyRequest(enrichPolicyName)
{
Match = new MyPackageBedEnrichPolicy(index)
};
var putEnrichPolicyResponse = await elasticClient.Enrich.PutPolicyAsync(enrichPolicyRequest);
var executeEnrichPolicyResponse = await elasticClient.Enrich.ExecutePolicyAsync(enrichPolicyName);
...
public class MyPackageBedEnrichPolicy : IEnrichPolicy
{
public MyPackageBedEnrichPolicy(string index)
{
Indices = index;
MatchField = "contentId";
EnrichFields = new[] { "name", "board", "units" };
}
public Indices Indices { get; set; }
public Field MatchField { get; set; }
public Fields EnrichFields { get; set; }
public string Query { get; set; }
}
and the index for the units very similarly, but with
public class MyPackageUnitEnrichPolicy : IEnrichPolicy
{
public MyPackageUnitEnrichPolicy(string index)
{
Indices = index;
MatchField = "units.globalId";
EnrichFields = new[] { "units" };
}
...
For now, I have created the ingest processors in Kibana for easier prototyping, though I will have take care of that using Nest later as well. I have defined them basically as follows:
This is the definition of the ingest pipeline in JSON:
[
{
"enrich": {
"field": "content.contentId",
"policy_name": "enrichPolicyName",
"target_field": "enrichTest"
}
},
{
"foreach": {
"field": "content.units.globalId",
"processor": {
"enrich": {
"field": "content.units.globalId",
"policy_name": "unitEnrichPolicyName",
"target_field": "enrichTest.units",
"tag": "enrich-units-on-globalId-processor"
}
}
}
}
]