0

I have this JSON:

{
    "statussalida": "",
    "registros": [
        {
            "marca": 24,
            "codigo": 6,
            "precio": 71.9,
            "precionormal": 71.9,
            "descripcion": "FERNET IMPERIO",
            "presentacion": "950 CC.",
            "pesable": 0,
            "pesableporunidad": 0,
            "nombremarca": "COMERCIAL ARGENTINA SRL",
            "marcareal": 0,
            "cantidad": 950,
            "coeficiente": 1000,
            "preciopor": 75.68,
            "unidaddmedida": "cc",
            "cantidadmayorista": 0,
            "preciomayorista": 71.9,
            "etiquetamedida": "Precio x 1000 cc",
            "foto": "/api/v1/imagenes/eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpYXQiOjE2NDAyNzE1MDAsInBhZHJlIjoyODU3MzgyLCJpbnN0YW5jaWEiOjB9.0IBNNbEZXE4dKmyHxQ_oP-8HXMfEm80fI4kBCJSFaZY",
            "usafoto": 0
        },
        {
            "marca": 60,
            "codigo": 888,
            "precio": 274.99,
            "precionormal": 274.99,
            "descripcion": "ACEITE COCINERO DE OLIVA PET.",
            "presentacion": "1 LT",
            "pesable": 0,
            "pesableporunidad": 0,
            "nombremarca": "MOLINOS RIO DE LA PLATA S.A.",
            "marcareal": 0,
            "cantidad": 1,
            "coeficiente": 1,
            "preciopor": 274.99,
            "unidaddmedida": "Litros",
            "cantidadmayorista": 0,
            "preciomayorista": 274.99,
            "etiquetamedida": "Precio x 1 Litro",
            "foto": "/api/v1/imagenes/eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpYXQiOjE2NDAyNzE1MDAsInBhZHJlIjoyODU3MzgyLCJpbnN0YW5jaWEiOjB9.0IBNNbEZXE4dKmyHxQ_oP-8HXMfEm80fI4kBCJSFaZY",
            "usafoto": 0
        },
        {
            "marca": 60,
            "codigo": 2368,
            "precio": 82.99,
            "precionormal": 82.99,
            "descripcion": "ACEITE COCINERO",
            "presentacion": "1,5 LT.",
            "pesable": 0,
            "pesableporunidad": 0,
            "nombremarca": "MOLINOS RIO DE LA PLATA S.A.",
            "marcareal": 0,
            "cantidad": 1500,
            "coeficiente": 1000,
            "preciopor": 55.33,
            "unidaddmedida": "cc",
            "cantidadmayorista": 0,
            "preciomayorista": 82.99,
            "etiquetamedida": "Precio x 1000 cc",
            "foto": "/api/v1/imagenes/eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpYXQiOjE2NDAyNzE1MDAsInBhZHJlIjoyODU3MzgyLCJpbnN0YW5jaWEiOjB9.0IBNNbEZXE4dKmyHxQ_oP-8HXMfEm80fI4kBCJSFaZY",
            "usafoto": 0
        },
        {
            "marca": 60,
            "codigo": 2502,
            "precio": 73.49,
            "precionormal": 73.49,
            "descripcion": "*ACEITE COCINERO MEZCLA",
            "presentacion": "1500 CC.",
            "pesable": 0,
            "pesableporunidad": 0,
            "nombremarca": "MOLINOS RIO DE LA PLATA S.A.",
            "marcareal": 0,
            "cantidad": 1500,
            "coeficiente": 1000,
            "preciopor": 48.99,
            "unidaddmedida": "cc",
            "cantidadmayorista": 0,
            "preciomayorista": 73.49,
            "etiquetamedida": "Precio x 1000 cc",
            "foto": "/api/v1/imagenes/eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpYXQiOjE2NDAyNzE1MDAsInBhZHJlIjoyODU3MzgyLCJpbnN0YW5jaWEiOjMyNzM3fQ.HCyWPsopIAY03WZYRJ7tySUpCOE8Pq_1Ja-IFFLc0RY",
            "usafoto": 0
        },
        {
            "marca": 60,
            "codigo": 6318,
            "precio": 27.99,
            "precionormal": 27.99,
            "descripcion": "*HARINA FAVORITA 000",
            "presentacion": "1KG.",
            "pesable": 0,
            "pesableporunidad": 0,
            "nombremarca": "MOLINOS RIO DE LA PLATA S.A.",
            "marcareal": 0,
            "cantidad": 1,
            "coeficiente": 1,
            "preciopor": 27.99,
            "unidaddmedida": "Kg",
            "cantidadmayorista": 0,
            "preciomayorista": 27.99,
            "etiquetamedida": "Precio x 1 Kg",
            "foto": "/api/v1/imagenes/eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpYXQiOjE2NDAyNzE1MDAsInBhZHJlIjoyODU3MzgyLCJpbnN0YW5jaWEiOjB9.0IBNNbEZXE4dKmyHxQ_oP-8HXMfEm80fI4kBCJSFaZY",
            "usafoto": 0
        },
        {
            "marca": 60,
            "codigo": 20636,
            "precio": 167.9,
            "precionormal": 167.9,
            "descripcion": "ACEITE COCINERO",
            "presentacion": "3000 CC",
            "pesable": 0,
            "pesableporunidad": 0,
            "nombremarca": "MOLINOS RIO DE LA PLATA S.A.",
            "marcareal": 0,
            "cantidad": 3000,
            "coeficiente": 1000,
            "preciopor": 55.97,
            "unidaddmedida": "cc",
            "cantidadmayorista": 0,
            "preciomayorista": 167.9,
            "etiquetamedida": "Precio x 1000 cc",
            "foto": "/api/v1/imagenes/eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpYXQiOjE2NDAyNzE1MDAsInBhZHJlIjoyODU3MzgyLCJpbnN0YW5jaWEiOjM3MjkwfQ.ie1dLHrL3EKFYIflt0JYzI_UQzHYsmoUc_CtA2cWYd4",
            "usafoto": 0
        },
        {
            "marca": 60,
            "codigo": 21821,
            "precio": 136.99,
            "precionormal": 136.99,
            "descripcion": "*ACEITE COCINERO OLIVA EX VIRGEN",
            "presentacion": "500 CC",
            "pesable": 0,
            "pesableporunidad": 0,
            "nombremarca": "MOLINOS RIO DE LA PLATA S.A.",
            "marcareal": 0,
            "cantidad": 500,
            "coeficiente": 1000,
            "preciopor": 273.98,
            "unidaddmedida": "cc",
            "cantidadmayorista": 0,
            "preciomayorista": 136.99,
            "etiquetamedida": "Precio x 1000 cc",
            "foto": "/api/v1/imagenes/eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpYXQiOjE2NDAyNzE1MDAsInBhZHJlIjoyODU3MzgyLCJpbnN0YW5jaWEiOjM5Njc5fQ.nAMCSgGSWfcEM9D_pSFjTHQccBHrx90oSFuEl410WiM",
            "usafoto": 0
        },
        {
            "marca": 60,
            "codigo": 22840,
            "precio": 38.9,
            "precionormal": 38.9,
            "descripcion": "*ACEITE IDEAL MEZCLA",
            "presentacion": "1500 CC",
            "pesable": 0,
            "pesableporunidad": 0,
            "nombremarca": "MOLINOS RIO DE LA PLATA S.A.",
            "marcareal": 0,
            "cantidad": 1500,
            "coeficiente": 1000,
            "preciopor": 25.93,
            "unidaddmedida": "cc",
            "cantidadmayorista": 0,
            "preciomayorista": 38.9,
            "etiquetamedida": "Precio x 1000 cc",
            "foto": "/api/v1/imagenes/eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpYXQiOjE2NDAyNzE1MDAsInBhZHJlIjoyODU3MzgyLCJpbnN0YW5jaWEiOjI2MDEyfQ.q44kAmHfbI3y_7KyuxUot-ALnYXWAxZy8QmjMg7w5gY",
            "usafoto": 0
        }
    ]
}

But that is repeated for 32.000 articles.

They know if there is a function that given an input example: 'Ace' return all articles in json format that contain those letters, how to do with SQL searches.

I tried jmespath, jsonpath-ng, ijson and some more, but I can't do what I need.

The question is why I consume that json from an api and that json I have to return to an ecommerce when they want to search for an article and it should be as fast as possible.

json = requests.get('http://xxx.xxx.x.xx/api/v1/articulosweb')
a = jmespath.search("descripcion == 'Fer'", json.content)

and here I would like you to return all the articles that contain 'fer' in their description.

martineau
  • 119,623
  • 25
  • 170
  • 301

1 Answers1

1

This looks like a job for jq (json-query), which has a standalone package https://stedolan.github.io/jq/ and Python bindings https://pypi.org/project/jq/

It is also not hard to do this with pure-python. The following code gives examples of both:

import jq
import json


# Grab the text of the json
with open('test.json', 'r') as file:
    json_text = file.read()

# Create a JQ query
query = jq.compile('.registros[] | select(.descripcion | ascii_downcase | test(".*fer.*"))')

# Execute the JQ Query (does not require a json.loads call)
found = query.input(text=json_text).all()


# Or, use json.loads and just use basic python
data = json.loads(json_text)
found2 = []
for item in data['registros']:
    if 'fer' in item['descripcion'].lower():
        found2.append(item)


assert found == found2

EDIT:

It might be the case that Pure python is just faster, even though JQ is a compiled binary. I multipled the size of the data by 10000, and did a benchmark using timerit. The pure-python solution seemed to be much faster, and when changing the loop into a list-comprehension, it goes even faster.

import copy

data_big = copy.deepcopy(data)
data_big['registros'] = [item for _ in range(10000) for item in data['registros']]
json_text_big = json.dumps(data_big)


import timerit
ti = timerit.Timerit(3, bestof=1, verbose=2)

for timer in ti.reset('pure-python'):
    with timer:
        found3 = []
        data_big = json.loads(json_text_big)
        for item in data_big['registros']:
            if 'fer' in item['descripcion'].lower():
                found3.append(item)

for timer in ti.reset('pure-python-list-comprehension'):
    with timer:
        data_big = json.loads(json_text_big)
        found5 = [item for item in data_big['registros'] if 'fer' in item['descripcion'].lower()]


for timer in ti.reset('with jq'):
    with timer:
        found4 = query.input(text=json_text_big).all()

assert found4 == found3

Results:

Timed pure-python for: 3 loops, best of 1
    time per loop: best=273.871 ms, mean=327.134 ± 47.9 ms
Timed pure-python-list-comprehension for: 3 loops, best of 1
    time per loop: best=10.701 ms, mean=18.303 ± 10.0 ms
Timed with jq for: 3 loops, best of 1
    time per loop: best=5.298 s, mean=5.600 ± 0.2 s
Erotemic
  • 4,806
  • 4
  • 39
  • 80
  • And then there are list comprehensions [ item for item in data['registros'] if 'fer' in item['descripcion'].lower()] or to make it more readable refractor the condition to a function [ item for item in data['registros'] if has_fer_description(item)] Easy to read. Easy to understand. – Thomas Junk Dec 23 '21 at 20:59
  • 1
    Yes, the list comprehension is also faster. `found5 = [item for item in data_big['registros'] if 'fer' in item['descripcion'].lower()]` clocks in at 10.701ms. 20x better than the for loop, and 500x faster than the jq method (I wonder if JQ query can be optimized?). Adding that to the answer. – Erotemic Dec 23 '21 at 21:03