Why does this LCS (Longest Common Subsequence) Python implementation with memoization performs badly?

Question

I am learning Dynamic Programming and came across the LCS (Longest Common Subsequence) algorithm.

I have implemented several versions of it in Python, to see how implementations differ from each other and how they perform.

Here is the code:

import time
import sys

sys.setrecursionlimit(50000)


def current_milli_time(): return time.time() * 1000


def memoize_decorator(fn):
    cache = {}

    def inner_fn(*args):
        if args in cache:
            return cache[args]
        ret = fn(*args)
        cache[args] = ret
        return ret

    inner_fn.__name__ = fn.__name__
    inner_fn.__doc__ = fn.__doc__
    return inner_fn


class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'


class TestORedAssertValue:
    def __init__(self, *ored_assert_values):
        self.__values = ored_assert_values

    def assert_value(self, value):
        for self_value in self.__values:
            if self_value == value:
                return True
        return False

    def values(self):
        return list(self.__values)


def test_assert(label, value, assertValue, *args):
    ok = False
    assertToPrint = assertValue
    if isinstance(assertValue, TestORedAssertValue):
        ok = assertValue.assert_value(value)
        assertToPrint = " OR ".join(map(str, list(assertValue.values())))
    elif value == assertValue:
        ok = True
    if ok:
        print('#', label, ' - ', bcolors.OKGREEN, 'ok', bcolors.ENDC, sep='')
    else:
        print('#', label, ' - args: ', ", ".join(map(str, args)), ', expected: ', assertToPrint,
              ', got: ', value, ' - ', bcolors.FAIL, 'fail', bcolors.ENDC, sep='')


def measure_time_decorator(fn):
    def inner(*args):
        start = current_milli_time()
        ret = fn(*args)
        end = current_milli_time()
        print('time: ', end - start, ' ms', sep='', end=' - ')
        return ret
    inner.__name__ = fn.__name__
    inner.__doc__ = fn.__doc__
    return inner


def test(fn):
    print()
    print("Testing:", fn.__name__)

    fn = measure_time_decorator(fn)

    A = ['A', 'C', 'B', 'D', 'E', 'G', 'C', 'E', 'D', 'B', 'G']
    B = ['B', 'E', 'G', 'C', 'F', 'E', 'U', 'B', 'K']
    assertRes = ['B', 'E', 'G', 'C', 'E', 'B']
    res = fn(A, B)
    test_assert('testcase 1', res, assertRes, A, B)

    A = ['A', 'B']
    B = []
    assertRes = []
    res = fn(A, B)
    test_assert('testcase 2', res, assertRes, A, B)

    A = []
    B = []
    assertRes = []
    res = fn(A, B)
    test_assert('testcase 3', res, assertRes, A, B)

    A = [1, 2]
    B = [1, 2]
    assertRes = [1, 2]
    res = fn(A, B)
    test_assert('testcase 4', res, assertRes, A, B)

    A = [1, 2]
    B = [1, 2]
    assertRes = [1, 2]
    res = fn(A, B)
    test_assert('testcase 5', res, assertRes, A, B)

    A = ['A', 'B', 'C', 'E', 'F', 'G', 'H', 'I', 'L']
    B = ['A', 'B', 'C', 'E', 'F', 'G', 'H', 'I', 'L']
    assertRes = ['A', 'B', 'C', 'E', 'F', 'G', 'H', 'I', 'L']
    res = fn(A, B)
    test_assert('testcase 6', res, assertRes, A, B)

    A = [x for x in range(3000)]
    B = A
    assertRes = A
    res = fn(A, B)
    test_assert('testcase 7', res, assertRes, A, B)

    A = [x for x in range(3000)]
    B = list(reversed(A))
    assertRes = TestORedAssertValue([0], [2999])
    res = fn(A, B)
    test_assert('testcase 8', res, assertRes, A, B)


def longest_common_subsequence(A, B):
    N = len(A)
    M = len(B)
    res_matrix = [[[]] * (M + 1) for i in range(N + 1)]
    for i in range(1, N + 1):
        for j in range(1, M + 1):
            if A[i - 1] == B[j - 1]:
                res_matrix[i][j] = res_matrix[i - 1][j - 1] + [A[i - 1]]
            else:
                res_matrix[i][j] = res_matrix[i][j - 1] if (
                    len(res_matrix[i][j - 1])
                    >
                    len(res_matrix[i - 1][j])
                ) else res_matrix[i - 1][j]
    return res_matrix[-1][-1]


def longest_common_subsequence_recursive_memoized(A, B):
    N = len(A)
    M = len(B)
    if N <= 0 or M <= 0:
        return []
    res_matrix = [[[]] * M for i in range(N)]

    @memoize_decorator
    def recursion(i, j):
        if i <= -1 or j <= -1:
            return []
        elif A[i] == B[j]:
            res_matrix[i][j] = recursion(i - 1, j - 1) + [A[i]]
        else:
            prev1 = recursion(i - 1, j)
            prev2 = recursion(i, j - 1)
            res_matrix[i][j] = prev1 if (
                len(prev1)
                >
                len(prev2)
            ) else prev2
        return res_matrix[i][j]

    recursion(N - 1, M - 1)
    return res_matrix[-1][-1]


def longest_common_subsequence_recursive_memoized_mit(A, B):
    N = len(A)
    M = len(B)
    if N <= 0 or M <= 0:
        return []
    res_matrix = [[None] * M for i in range(N)]

    def lcs(i, j):
        if i <= -1 or j <= -1:
            return []
        if res_matrix[i][j] == None:
            if A[i] == B[j]:
                res_matrix[i][j] = lcs(i - 1, j - 1) + [A[i]]
            else:
                prev1 = lcs(i - 1, j)
                prev2 = lcs(i, j - 1)
                res_matrix[i][j] = prev1 if (
                    len(prev1)
                    >
                    len(prev2)
                ) else prev2
        return res_matrix[i][j]

    return lcs(N - 1, M - 1)


if __name__ == "__main__":
    test(longest_common_subsequence_recursive_memoized)
    test(longest_common_subsequence_recursive_memoized_mit)
    test(longest_common_subsequence)
    print()

The important functions are:

longest_common_subsequence_recursive_memoized: Uses recursion with memoization through a memoize_decorator;
longest_common_subsequence_recursive_memoized_mit: Uses recursion with memoization achieved by directly checking res_matrix (inspired by this MIT lecture -> https://youtu.be/V5hZoJ6uK-s?t=3228);
longest_common_subsequence: A dynamic programming implementation using a n * m matrix without recursion;

If you run the code above (e.g. python longest_common_subsequence.py), you will see an output similar to the following:

$ python longest_common_subsequence.py

Testing: longest_common_subsequence_recursive_memoized
time: 0.22802734375 ms - #testcase 1 - ok
time: 0.0048828125 ms - #testcase 2 - ok
time: 0.003173828125 ms - #testcase 3 - ok
time: 0.02099609375 ms - #testcase 4 - ok
time: 0.01806640625 ms - #testcase 5 - ok
time: 0.046875 ms - #testcase 6 - ok
time: 328.40087890625 ms - #testcase 7 - ok
time: 105788.96801757812 ms - #testcase 8 - ok

Testing: longest_common_subsequence_recursive_memoized_mit
time: 0.22607421875 ms - #testcase 1 - ok
time: 0.0048828125 ms - #testcase 2 - ok
time: 0.003173828125 ms - #testcase 3 - ok
time: 0.031005859375 ms - #testcase 4 - ok
time: 0.01416015625 ms - #testcase 5 - ok
time: 0.041015625 ms - #testcase 6 - ok
time: 255.93994140625 ms - #testcase 7 - ok
time: 26466.174072265625 ms - #testcase 8 - ok

Testing: longest_common_subsequence
time: 0.159912109375 ms - #testcase 1 - ok
time: 0.011962890625 ms - #testcase 2 - ok
time: 0.009033203125 ms - #testcase 3 - ok
time: 0.015869140625 ms - #testcase 4 - ok
time: 0.015869140625 ms - #testcase 5 - ok
time: 0.1279296875 ms - #testcase 6 - ok
time: 10227.974853515625 ms - #testcase 7 - ok
time: 9605.087158203125 ms - #testcase 8 - ok

The interesting part is testcase 8. You can see that for this testcase longest_common_subsequence_recursive_memoized performs poorly (105788.96801757812 ms ~= 105.8 seconds), while for the other two functions it takes no more than 30 seconds (longest_common_subsequence being the best one by taking around 10 seconds to complete).

My question is: why does longest_common_subsequence_recursive_memoized perform so badly for testcase 8 while the implementation is quite similar to longest_common_subsequence_recursive_memoized_mit?

It still uses memoization, though, instead of directly accessing res_matrix and return values from there, it uses a decorator which wraps the recursive function to cache the results of the computation and return them right away when computations already computed before are required.

Thank you for your attention.

EDIT: After several trials, I found out that the performance problem seems to be related to the @memoize_decorator function.

If I rewrite the longest_common_subsequence_recursive_memoized function adding the equivalent test used in longest_common_subsequence_recursive_memoized_mit (MIT version):

def longest_common_subsequence_recursive_memoized(A, B):
    N = len(A)
    M = len(B)
    if N <= 0 or M <= 0:
        return []
    res_matrix = [[None] * M for i in range(N)]

    @memoize_decorator
    def lcs(i, j):
        if i <= -1 or j <= -1:
            return []
        if res_matrix[i][j] == None:
            if A[i] == B[j]:
                res_matrix[i][j] = lcs(i - 1, j - 1) + [A[i]]
            else:
                prev1 = lcs(i - 1, j)
                prev2 = lcs(i, j - 1)
                res_matrix[i][j] = prev1 if (
                    len(prev1)
                    >
                    len(prev2)
                ) else prev2
        return res_matrix[i][j]

    lcs(N - 1, M - 1)
    return res_matrix[-1][-1]

Even with this modification, the function is still slow for testcase 8:

...
time: 95080.388671875 ms - #testcase 8 - ok
...

And if I comment the @memoize_decorator line:

def longest_common_subsequence_recursive_memoized(A, B):
    N = len(A)
    M = len(B)
    if N <= 0 or M <= 0:
        return []
    res_matrix = [[None] * M for i in range(N)]

    # @memoize_decorator <--- Comment
    def lcs(i, j):
        if i <= -1 or j <= -1:
            return []
        if res_matrix[i][j] == None:
            if A[i] == B[j]:
                res_matrix[i][j] = lcs(i - 1, j - 1) + [A[i]]
            else:
                prev1 = lcs(i - 1, j)
                prev2 = lcs(i, j - 1)
                res_matrix[i][j] = prev1 if (
                    len(prev1)
                    >
                    len(prev2)
                ) else prev2
        return res_matrix[i][j]

    lcs(N - 1, M - 1)
    return res_matrix[-1][-1]

Then the function is far way faster for testcase 8 (~24 secs instead of ~95 secs):

...
time: 24229.078857421875 ms - #testcase 8 - ok
...

So I guess the performance drawbacks come from memoize_decorator, as if I remove it, I get a performance gain. Strange enough, as memoization should speed up repetitive computations.

But in the case of this modified longest_common_subsequence_recursive_memoized function it doesn't really matter if its inner function is memoized with memoize_decorator, as at this point because longest_common_subsequence_recursive_memoized already uses memoization on its own through res_matrix and the if res_matrix[i][j] == None: test.

So my final diagnosis is that the ~95 secs are related to the code in inner_fn:

    ...
        if args in cache:
            return cache[args]
        ret = fn(*args)
        cache[args] = ret
        return ret
    ...

On the other hand, if I leave longest_common_subsequence_recursive_memoized as it was before but without @memoize_decorator (renaming it to longest_common_subsequence_recursive):

def longest_common_subsequence_recursive(A, B):
    N = len(A)
    M = len(B)
    if N <= 0 or M <= 0:
        return []
    res_matrix = [[[]] * M for i in range(N)]

    # @memoize_decorator <--- Without memoization
    def recursion(i, j):
        if i <= -1 or j <= -1:
            return []
        elif A[i] == B[j]:
            res_matrix[i][j] = recursion(i - 1, j - 1) + [A[i]]
        else:
            prev1 = recursion(i - 1, j)
            prev2 = recursion(i, j - 1)
            res_matrix[i][j] = prev1 if (
                len(prev1)
                >
                len(prev2)
            ) else prev2
        return res_matrix[i][j]

    recursion(N - 1, M - 1)
    return res_matrix[-1][-1]

Then the execution time is even worse and it gets far way more than 95 seconds to execute.

Memoization implemented with a decorator in this case seems to be something in between the naive recursive unoptimized solution (longest_common_subsequence_recursive) and the better optimized solutions (the MIT inline memoization version and the dynamic programming one).

Would be happy to hear what you think about this.

Thanks.

score 0 · Answer 1 · answered Dec 15 '19 at 22:37

0

My question is: why does longest_common_subsequence_recursive_memoized perform so badly for testcase 8 while the implementation is quite similar to longest_common_subsequence_recursive_memoized_mit?

The primary performance difference I find between the two is that the MIT version has:

res_matrix = [[None] * M for i in range(N)]
...
if res_matrix[i][j] == None:

while yours uses:

res_matrix = [[[]] * M for i in range(N)]

and no equivalent test. If we modify yours to incorporate the same initialization and test as MIT, and decorate with Python's own rlu_cache() from functools, so we can interrogate the cache, we get:

CacheInfo(hits=380620, misses=17610383, maxsize=128, currsize=128)
time: 19195.7060546875 ms - #testcase 8 - ok

Increasing cache size from the default 128 improves performance:

CacheInfo(hits=8988004, misses=9002999, maxsize=4096, currsize=4096)
time: 16645.57080078125 ms - #testcase 8 - ok

But only to a point and then makes no further difference.

answered Dec 15 '19 at 22:37

cdlane

40,441
5
32
81

Thank you for your reply. `while yours uses: ... and no equivalent test.` Thank you for your reply. The question that comes to my mind then is why doesn't the `if args in cache:` in `@memoize_decorator(recursion)` of `longest_common_subsequence_recursive_memoized ` work almost equivalently as the `if res_matrix[i][j] == None:` condition in `longest_common_subsequence_recursive_memoized_mit` ? Even if the memoized `recusion` function returns saving the return value in the cache, it seems that the cache doesn't boost the execution... This is the main difference I still don't understand – tonix Dec 15 '19 at 22:57
Could you provide an example showing the differences between the two approaches? – tonix Dec 16 '19 at 14:53
I tried to play with the code again and it seems that the overhead comes from the `@memoize_decorator` decorator function. Even if I make `longest_common_subsequence_recursive_memoized` the same as the MIT version, if I wrap the inner `recursion()` function with `@memoize_decorator`, it takes up to 90 seconds to complete for testcase 8... – tonix Dec 22 '19 at 15:52
If I comment the `@memoize_decorator` line, then `longest_common_subsequence_recursive_memoized` (modified to be identical to the MIT function with the equivalent `if res_matrix[i][j] == None:` test) runs within 25 seconds... Could it be because of the `cache` data structure used in `memoize_decorator`? – tonix Dec 22 '19 at 16:13

Why does this LCS (Longest Common Subsequence) Python implementation with memoization performs badly?

1 Answers1