How can I resolve a 'PermissionError' when using sequential pattern mining with SPMF on Windows?

Question

When I try the code in the link below, I get the following error on line 39. The name of the temp file changes every time. How can I resolve this error?

https://github.com/dmeoli/OnlineRetail/blob/master/DM_Group18_TASK4/sequential_pattern_mining.ipynb

def spmf_encode_with_timestamp(dataset):
    items = sorted(set(item.replace(' ', '_') for sequence in dataset 
                       for event in sequence 
                       for item in (event[1] if isinstance(event, tuple) else event)))
    labels_dict = dict(zip(items, range(len(items))))
    spmf_str = '@CONVERTED_FROM_TEXT' + '\n'
    for item, idx in labels_dict.items():
        spmf_str += '@ITEM=' + str(idx) + '=' + item + '\n'
    for sequence in dataset:
        for t, event in sequence if isinstance(sequence[0], tuple) else enumerate(sequence):
            spmf_str += '<' + str(t) + '>' + ' '
            for item in event:
                spmf_str += str(labels_dict[item.replace(' ', '_')]) + ' '
            spmf_str += '-1' + ' '
        spmf_str += '-2' + '\n'
    return spmf_str

dataset = [
    [['a'], ['a', 'b', 'c'], ['a', 'c'], ['c']],
    [['a'], ['c'], ['b', 'c']], 
    [['a', 'b'], ['d'], ['c'], ['b'], ['c']], 
    [['a'], ['c'], ['b'], ['c']]
]
def is_subsequence(main_sequence, subsequence):
    def is_subsequence_recursive(subsequence_clone, start=0):
        if not subsequence_clone:
            return True
        first_elem = set(subsequence_clone.pop(0))
        for i in range(start, len(main_sequence)):
            if set(main_sequence[i]).issuperset(first_elem):
                return is_subsequence_recursive(subsequence_clone, i + 1)
        return False
    return is_subsequence_recursive(subsequence.copy())
sequence = [['a'], ['b', 'c'], ['d'], ['a', 'e']]
is_subsequence(sequence, [['a'], ['b', 'c'], ['e']])
is_subsequence(sequence, [['a'], ['b', 'd']])
def sequence_length(sequence):
    return sum(len(i) for i in sequence)
sequence_length([['a'], ['b', 'c'], ['a'], ['b', 'c', 'd']])
def supports(sequence, cand_seq, max_span=np.inf, min_gap=0, max_gap=np.inf):
    for idx, event in enumerate(sequence):
        i = 0
        if set(event[1] if isinstance(event, tuple) else event).issuperset(cand_seq[i]):
            min_t = event[0] if isinstance(event, tuple) else idx
            i += 1
            if i == len(cand_seq):
                return True
            prev_t = event[0] if isinstance(event, tuple) else idx
            for t, itemset in (sequence[idx + 1:] if isinstance(sequence[idx], tuple) 
                               else enumerate(sequence[idx + 1:], start=idx + 1)):
                if not t - prev_t > min_gap:
                    continue
                if not t - prev_t <= max_gap:
                    break
                if t - min_t > max_span:
                    break
                if set(itemset).issuperset(cand_seq[i]):
                    i += 1
                if i == len(cand_seq):
                    return True
    return False
sequence = [[1, 3], [3, 4], [4], [5], [6, 7], [8]]
print(supports(sequence, [[3], [4]], max_span=3))
print(supports(sequence, [[3], [6]], max_span=3))
print(supports(sequence, [[1, 3], [6]], max_span=3))
print(supports(sequence, [[3], [6]], min_gap=1))
print(supports(sequence, [[3], [6]], max_gap=3))
print(supports(sequence, [[6], [8]], min_gap=1))
print(supports(sequence, [[6], [8]], max_gap=3))
print(supports(sequence, [[1, 3], [6]], min_gap=1))
print(supports(sequence, [[1, 3], [6]], max_gap=3))
print(supports(sequence, [[1], [3], [8]], min_gap=1))
print(supports(sequence, [[1], [3], [8]], max_gap=3))
supports([[2, 4], [3, 5, 6], [4, 7], [4, 5], [8]], [[6], [5]], max_span=4, min_gap=0, max_gap=2)
supports([[1], [2], [3], [4], [5]], [[1], [4]], max_span=4, min_gap=0, max_gap=2)
supports([[1], [2, 3], [3, 4], [4, 5]], [[2], [3], [5]], max_span=4, min_gap=0, max_gap=2)
supports([[1, 2], [3], [2, 3], [3, 4], [2, 4], (6, [4, 5])], [[1, 2], [5]], max_span=4, min_gap=0, max_gap=2)
def count_support(dataset, cand_seq, max_span=np.inf, min_gap=0, max_gap=np.inf):
    if max_span == np.inf and min_gap == 0 and max_gap == np.inf:
        return sum(1 for seq in dataset if is_subsequence([event[1] for event in seq] if isinstance(seq[0], tuple) else seq, cand_seq))
    else:
        return sum(1 for seq in dataset if supports(seq, cand_seq, max_span, min_gap, max_gap))
count_support(dataset, [['b']])
count_support(dataset, [['a'], ['b', 'c']])
def gen_cands_for_pair(cand1, cand2):
    cand1_clone = copy.deepcopy(cand1)
    cand2_clone = copy.deepcopy(cand2)
    if len(cand1[0]) == 1:
        cand1_clone.pop(0)
    else:
        cand1_clone[0] = cand1_clone[0][1:]
    if len(cand2[-1]) == 1:
        cand2_clone.pop(-1)
    else:
        cand2_clone[-1] = cand2_clone[-1][:-1]
    if not cand1_clone == cand2_clone:
        return []
    else:
        new_cand = copy.deepcopy(cand1)
        if len(cand2[-1]) == 1:
            new_cand.append(cand2[-1])
        else:
            new_cand[-1].extend([cand2[-1][-1]])
        return new_cand
candA = [['a'], ['b', 'c'], ['d']]
candB = [['b', 'c'], ['d', 'e']]
gen_cands_for_pair(candA, candB)
candA = [['a'], ['b', 'c'], ['d']]
candC = [['b', 'c'], ['d'], ['e']]
gen_cands_for_pair(candA, candC)
candA = [['a'], ['b', 'c'], ['d']]
candD = [['a'], ['b', 'c'], ['e']]
gen_cands_for_pair(candA, candD)
def gen_cands(last_lvl_cands):
    k = sequence_length(last_lvl_cands[0]) + 1
    if k == 2:
        flat_short_cands = [item for sublist2 in last_lvl_cands for sublist1 in sublist2 for item in sublist1]
        result = [[[a, b]] for a in flat_short_cands for b in flat_short_cands if b > a]
        result.extend([[[a], [b]] for a in flat_short_cands for b in flat_short_cands])
        return result
    else:
        cands = []
        for i in range(0, len(last_lvl_cands)):
            for j in range(0, len(last_lvl_cands)):
                new_cand = gen_cands_for_pair(last_lvl_cands[i], last_lvl_cands[j])
                if not new_cand == []:
                    cands.append(new_cand)
        cands.sort()
        return cands
last_lvl_freq_patterns = [
    [['a', 'b']], 
    [['b', 'c']], 
    [['a'], ['b']], 
    [['a'], ['c']], 
    [['b'], ['c']], 
    [['c'], ['b']], 
    [['c'], ['c']]
]
new_cands = gen_cands(last_lvl_freq_patterns)
new_cands
def gen_direct_subsequences(sequence):
    result = []
    for i, itemset in enumerate(sequence):
        if len(itemset) == 1:
            seq_clone = copy.deepcopy(sequence)
            seq_clone.pop(i)
            result.append(seq_clone)
        else:
            for j in range(len(itemset)):
                seq_clone = copy.deepcopy(sequence)
                seq_clone[i].pop(j)
                result.append(seq_clone)
    return result
def gen_contiguous_direct_subsequences(sequence):
    result = []
    for i, itemset in enumerate(sequence):
        if i == 0 or i == len(sequence) - 1:
            if len(itemset) == 1:
                seq_clone = copy.deepcopy(sequence)
                seq_clone.pop(i)
                result.append(seq_clone)
            else:
                for j in range(len(itemset)):
                    seq_clone = copy.deepcopy(sequence)
                    seq_clone[i].pop(j)
                    result.append(seq_clone)
        else:
            if len(itemset) > 1:
                for j in range(len(itemset)):
                    seq_clone = copy.deepcopy(sequence)
                    seq_clone[i].pop(j)
                    result.append(seq_clone)
    return result
def prune_cands(last_lvl_cands, cands_gen, max_gap=np.inf):
    return [cand for cand in cands_gen if all(x in last_lvl_cands for x in (gen_contiguous_direct_subsequences(cand) if max_gap != np.inf 
                                                                            else gen_direct_subsequences(cand)))]
cands_pruned = prune_cands(last_lvl_freq_patterns, new_cands)
cands_pruned
min_sup = 0.5
cands_counts = [(s, count_support(dataset, s)) for s in cands_pruned]
result_lvl = [(i, count) for i, count in cands_counts if count >= min_sup * len(dataset)]
result_lvl
def gsp(dataset, min_sup, max_span=np.inf, min_gap=0, max_gap=np.inf, verbose=False):
    overall = []
    min_sup *= len(dataset)
    items = sorted(set([item for sequence in dataset
                        for event in sequence
                        for item in (event[1] if isinstance(event, tuple) else event)]))
    single_item_sequences = [[[item]] for item in items]
    single_item_counts = [(s, count_support(dataset, s)) for s in single_item_sequences]
    single_item_counts = [(i, count) for i, count in single_item_counts if count >= min_sup]
    overall.append(single_item_counts)
    if verbose > 0:
        print('Result, lvl 1: ' + str(overall[0]))
    k = 1
    while overall[k - 1]:
        last_lvl_cands = [x[0] for x in overall[k - 1]]
        cands_gen = gen_cands(last_lvl_cands)
        cands_pruned = prune_cands(last_lvl_cands, cands_gen, max_gap)
        cands_counts = [(s, count_support(dataset, s, max_span, min_gap, max_gap)) for s in cands_pruned]
        result_lvl = [(i, count) for i, count in cands_counts if count >= min_sup]
        if verbose > 0:
            print('Result, lvl ' + str(k + 1) + ': ' + str(result_lvl))
            if verbose > 1:
                print('Candidates generated, lvl ' + str(k + 1) + ': ' + str(cands_gen))
                print('Candidates pruned, lvl ' + str(k + 1) + ': ' + str(cands_pruned))
        overall.append(result_lvl)
        k += 1
    overall = overall[:-1]
    overall = [item for sublist in overall for item in sublist]
    overall.sort(key=lambda tup: (tup[1], neg(sequence_length(tup[0]))), reverse=True)
    return overall
gsp(dataset, min_sup=0.5, verbose=2)
spmf_dataset = spmf_encode_with_timestamp(dataset)
print(spmf_dataset)
spmf = Spmf('GSP', input_direct=spmf_dataset, arguments=[0.5])
spmf.run()
freq_patterns = spmf.to_pandas_dataframe()
freq_patterns = [([event.split() for event in sequence], sup) 
                 for sequence, sup in zip(freq_patterns.pattern, freq_patterns.sup)]
freq_patterns.sort(key=lambda tup: (tup[1], neg(sequence_length(tup[0]))), reverse=True)
freq_patterns

After this line, I get the following error:

Traceback (most recent call last):

File ~\anaconda3\lib\site-packages\spyder_kernels\py3compat.py:356 in compat_exec exec(code, globals, locals)

File c:\users\lenovo.spyder-py3\untitled1.py:228 spmf = Spmf('GSP', input_direct=spmf_dataset, arguments=[0.5])

File ~\anaconda3\lib\site-packages\spmf_init_.py:46 in init self.input_ = self.handle_input(

File ~\anaconda3\lib\site-packages\spmf_init_.py:62 in handle_input return self.write_temp_input_file(input_direct,

File ~\anaconda3\lib\site-packages\spmf_init_.py:87 in write_temp_input_file os.rename(name, name + file_ending)

PermissionError: [WinError 32] This process cannot access the file because the file is being used by another process: 'C:\Users\LENOVO\AppData\Local\Temp\tmp_7b5ygrv' -> 'C:\Users\LENOVO\AppData\Local\Temp\tmp_7b5ygrv.txt'

I made sure no other application was open and restarted the program. But the problem is not solved.

@MichaelRovinsky Hi, I added the code. If you can help it would be great! Thank you :) — Doğa, May 27 '23 at 15:22

How can I resolve a 'PermissionError' when using sequential pattern mining with SPMF on Windows?

0 Answers0