0

I store a user into Elasticsearch as:

{
"name": "Alice",
"following": ["Bob", "Charlie", etc...]
}

When I read it back, I'd like to get:

{
"name": "Alice",
"following": ["Bob", "Charlie", etc...],
"followers": X
}

where X should be the number of other users that follow Alice.

Donald
  • 1

1 Answers1

0

Use this Python script:

from elasticsearch import client, helpers

es = client.Elasticsearch()

INDEX='service'
TYPE='user'
PARAM='following'

def _query(entries):
    params = [dict(match={PARAM: entry}) for entry in entries]
    return dict(size=0, query={'bool': {'should': params}})

def _result(query):
    result = es.search(index=INDEX, body=query)
    return result['hits']['total']

def _action(entry, result):
    return {
        '_op_type': 'update',
        '_id': entry,
        'script': {
            'inline': 'ctx._source.followers = params.count',
            'params': {
                'count': result
            }
        }
    }

def _update():
    updates = entries = 0

    scan = helpers.scan(es, index=INDEX, doc_type=TYPE)
    for doc in scan:
        user = doc['_id']
        src = doc['_source']
        count = src.get('followers', 0)

        query = _query([user])
        result = _result(query)
        if result != count:
            updates += 1
            yield _action(user, result)

        entries += 1
    print 'entries:', entries
    print 'updates:', updates

def run():
    helpers.bulk(es, _update(), index=INDEX, doc_type=TYPE)

if __name__ == '__main__':
    run()
Donald
  • 1