0

I am facing a memory leak issue and I am not understanding where exactly can be the problem. I am simply doing a collection.find(query) multiple times in a for loop. I am using python 3.6.5 and motor 2.5.1 to query a collection. The following code snippet should reproduce this behaviour.

from motor.core import AgnosticClient, AgnosticCollection
from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorCollection
from memory_profiler import profile
from asyncio import AbstractEventLoop

 class TestMemoryLeak:
    def __init__(
        self,
        client: AgnosticClient,
        collection: AgnosticCollection
    ):  
        self.client = client
        self.collection = collection

    @profile
    def get_docs_default(self, loop:AbstractEventLoop, query:str):
        try:
            cursor = self.collection.find(query)
            results = loop.run_until_complete(cursor.to_list(length=None))
            if len(results) > 0:
                print(len(results))
        except Exception as e:
            print(f"Error finding docs for query {query} : {e}")
            pass

    @profile
    def batch_enrich(
        self,
        queries: Sequence[str],
    ) -> None:

        loop = self.client.get_io_loop()

        for idx, query in enumerate(queries):
            self.get_docs_default(loop, query)

if __name__ == "__main__":

    import os
    print(os.getpid())
    client = AsyncIOMotorClient("mongodb:...")
    collection = client.get_database("db").get_collection("collection")
    memory_leak = TestMemoryLeak(client, collection)
    query = "{'somefield': {'$gte': value1, '$lte': value2}}"
    memory_leak.batch_enrich([query]*100) # to simulate multiple queries 

The memory keeps piling up on each pass of the for loop. Memory vs time

I also tracked the memory using memprofiler and these are the logs obtained.

----------------------------------------------
1143
Filename: scripts/memory-investigation/test_memory_enrichment_get_docs.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    36    135.5 MiB    135.5 MiB           1       @profile
    37                                             def get_docs_default(self, batch, loop):
    38    135.5 MiB      0.0 MiB           1           query = self.criteria.by_time_range(batch).query
    39    135.5 MiB      0.0 MiB           1           try:
    40    135.5 MiB      0.0 MiB           1               cursor = self.collection.find(query)
    41    379.9 MiB    244.4 MiB           1               results = loop.run_until_complete(cursor.to_list(length=None))
    42    379.9 MiB      0.0 MiB           1               if len(results) > 0:
    43    379.9 MiB      0.0 MiB           1                   print(len(results))
    44                                                 except Exception as e:
    45                                                     print(f"Error in finding Document entry with range {batch.json()} : {e}")
    46                                                     pass


----------------------------------------------
----------------------------------------------
1102
Filename: scripts/memory-investigation/test_memory_enrichment_get_docs.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    36    184.2 MiB    184.2 MiB           1       @profile
    37                                             def get_docs_default(self, batch, loop):
    38    184.2 MiB      0.0 MiB           1           query = self.criteria.by_time_range(batch).query
    39    184.2 MiB      0.0 MiB           1           try:
    40    184.2 MiB      0.0 MiB           1               cursor = self.collection.find(query)
    41    425.4 MiB    241.3 MiB           1               results = loop.run_until_complete(cursor.to_list(length=None))
    42    425.4 MiB      0.0 MiB           1               if len(results) > 0:
    43    425.4 MiB      0.0 MiB           1                   print(len(results))
    44                                                 except Exception as e:
    45                                                     print(f"Error in finding Document entry with range {batch.json()} : {e}")
    46                                                     pass


----------------------------------------------
----------------------------------------------
856
Filename: scripts/memory-investigation/test_memory_enrichment_get_docs.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    36    225.4 MiB    225.4 MiB           1       @profile
    37                                             def get_docs_default(self, batch, loop):
    38    225.4 MiB      0.0 MiB           1           query = self.criteria.by_time_range(batch).query
    39    225.4 MiB      0.0 MiB           1           try:
    40    225.4 MiB      0.0 MiB           1               cursor = self.collection.find(query)
    41    384.1 MiB    158.7 MiB           1               results = loop.run_until_complete(cursor.to_list(length=None))
    42    384.1 MiB      0.0 MiB           1               if len(results) > 0:
    43    384.1 MiB      0.0 MiB           1                   print(len(results))
    44                                                 except Exception as e:
    45                                                     print(f"Error in finding Document entry with range {batch.json()} : {e}")
    46                                                     pass


----------------------------------------------
----------------------------------------------
618
Filename: scripts/memory-investigation/test_memory_enrichment_get_docs.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    36    233.6 MiB    233.6 MiB           1       @profile
    37                                             def get_docs_default(self, batch, loop):
    38    233.6 MiB      0.0 MiB           1           query = self.criteria.by_time_range(batch).query
    39    233.6 MiB      0.0 MiB           1           try:
    40    233.6 MiB      0.0 MiB           1               cursor = self.collection.find(query)
    41    346.5 MiB    112.9 MiB           1               results = loop.run_until_complete(cursor.to_list(length=None))
    42    346.5 MiB      0.0 MiB           1               if len(results) > 0:
    43    346.5 MiB      0.0 MiB           1                   print(len(results))
    44                                                 except Exception as e:
    45                                                     print(f"Error in finding Document entry with range {batch.json()} : {e}")
    46                                                     pass


----------------------------------------------
Filename: scripts/memory-investigation/test_memory_enrichment_get_docs.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    73    135.5 MiB    135.5 MiB           1       @profile
    74                                             def test_mimic_batch_enrich(
    75                                                 self,
    76                                                 batch_time_range: Sequence[Range],
    77                                                 mode: str
    78                                             ) -> None:
    79    135.5 MiB      0.0 MiB           1           fn = self.get_func_by_mode(mode)
    80    135.5 MiB      0.0 MiB           1           print(f"{mode}: {fn}")
    81    135.5 MiB      0.0 MiB           1           loop = self.client.get_io_loop()
    82                                         
    83    234.9 MiB      0.0 MiB           5           for idx, batch in enumerate(batch_time_range):
    84    233.6 MiB      0.0 MiB           4               print('----------------------------------------------')
    87    234.9 MiB     99.4 MiB           4               fn(batch, loop)
    90    234.9 MiB      0.0 MiB           4               print('----------------------------------------------')
    

Notice that at every for loop memory keeps increasing. I have no clue why is this happening. The cursors should automatically close when I call to_list() method. So it should not be the cursors. Any insights would be appreciated!

Siddhant Tandon
  • 651
  • 4
  • 15

0 Answers0