When I try the code in the link below, I get the following error on line 39. The name of the temp file changes every time. How can I resolve this error?
https://github.com/dmeoli/OnlineRetail/blob/master/DM_Group18_TASK4/sequential_pattern_mining.ipynb
def spmf_encode_with_timestamp(dataset):
items = sorted(set(item.replace(' ', '_') for sequence in dataset
for event in sequence
for item in (event[1] if isinstance(event, tuple) else event)))
labels_dict = dict(zip(items, range(len(items))))
spmf_str = '@CONVERTED_FROM_TEXT' + '\n'
for item, idx in labels_dict.items():
spmf_str += '@ITEM=' + str(idx) + '=' + item + '\n'
for sequence in dataset:
for t, event in sequence if isinstance(sequence[0], tuple) else enumerate(sequence):
spmf_str += '<' + str(t) + '>' + ' '
for item in event:
spmf_str += str(labels_dict[item.replace(' ', '_')]) + ' '
spmf_str += '-1' + ' '
spmf_str += '-2' + '\n'
return spmf_str
dataset = [
[['a'], ['a', 'b', 'c'], ['a', 'c'], ['c']],
[['a'], ['c'], ['b', 'c']],
[['a', 'b'], ['d'], ['c'], ['b'], ['c']],
[['a'], ['c'], ['b'], ['c']]
]
def is_subsequence(main_sequence, subsequence):
def is_subsequence_recursive(subsequence_clone, start=0):
if not subsequence_clone:
return True
first_elem = set(subsequence_clone.pop(0))
for i in range(start, len(main_sequence)):
if set(main_sequence[i]).issuperset(first_elem):
return is_subsequence_recursive(subsequence_clone, i + 1)
return False
return is_subsequence_recursive(subsequence.copy())
sequence = [['a'], ['b', 'c'], ['d'], ['a', 'e']]
is_subsequence(sequence, [['a'], ['b', 'c'], ['e']])
is_subsequence(sequence, [['a'], ['b', 'd']])
def sequence_length(sequence):
return sum(len(i) for i in sequence)
sequence_length([['a'], ['b', 'c'], ['a'], ['b', 'c', 'd']])
def supports(sequence, cand_seq, max_span=np.inf, min_gap=0, max_gap=np.inf):
for idx, event in enumerate(sequence):
i = 0
if set(event[1] if isinstance(event, tuple) else event).issuperset(cand_seq[i]):
min_t = event[0] if isinstance(event, tuple) else idx
i += 1
if i == len(cand_seq):
return True
prev_t = event[0] if isinstance(event, tuple) else idx
for t, itemset in (sequence[idx + 1:] if isinstance(sequence[idx], tuple)
else enumerate(sequence[idx + 1:], start=idx + 1)):
if not t - prev_t > min_gap:
continue
if not t - prev_t <= max_gap:
break
if t - min_t > max_span:
break
if set(itemset).issuperset(cand_seq[i]):
i += 1
if i == len(cand_seq):
return True
return False
sequence = [[1, 3], [3, 4], [4], [5], [6, 7], [8]]
print(supports(sequence, [[3], [4]], max_span=3))
print(supports(sequence, [[3], [6]], max_span=3))
print(supports(sequence, [[1, 3], [6]], max_span=3))
print(supports(sequence, [[3], [6]], min_gap=1))
print(supports(sequence, [[3], [6]], max_gap=3))
print(supports(sequence, [[6], [8]], min_gap=1))
print(supports(sequence, [[6], [8]], max_gap=3))
print(supports(sequence, [[1, 3], [6]], min_gap=1))
print(supports(sequence, [[1, 3], [6]], max_gap=3))
print(supports(sequence, [[1], [3], [8]], min_gap=1))
print(supports(sequence, [[1], [3], [8]], max_gap=3))
supports([[2, 4], [3, 5, 6], [4, 7], [4, 5], [8]], [[6], [5]], max_span=4, min_gap=0, max_gap=2)
supports([[1], [2], [3], [4], [5]], [[1], [4]], max_span=4, min_gap=0, max_gap=2)
supports([[1], [2, 3], [3, 4], [4, 5]], [[2], [3], [5]], max_span=4, min_gap=0, max_gap=2)
supports([[1, 2], [3], [2, 3], [3, 4], [2, 4], (6, [4, 5])], [[1, 2], [5]], max_span=4, min_gap=0, max_gap=2)
def count_support(dataset, cand_seq, max_span=np.inf, min_gap=0, max_gap=np.inf):
if max_span == np.inf and min_gap == 0 and max_gap == np.inf:
return sum(1 for seq in dataset if is_subsequence([event[1] for event in seq] if isinstance(seq[0], tuple) else seq, cand_seq))
else:
return sum(1 for seq in dataset if supports(seq, cand_seq, max_span, min_gap, max_gap))
count_support(dataset, [['b']])
count_support(dataset, [['a'], ['b', 'c']])
def gen_cands_for_pair(cand1, cand2):
cand1_clone = copy.deepcopy(cand1)
cand2_clone = copy.deepcopy(cand2)
if len(cand1[0]) == 1:
cand1_clone.pop(0)
else:
cand1_clone[0] = cand1_clone[0][1:]
if len(cand2[-1]) == 1:
cand2_clone.pop(-1)
else:
cand2_clone[-1] = cand2_clone[-1][:-1]
if not cand1_clone == cand2_clone:
return []
else:
new_cand = copy.deepcopy(cand1)
if len(cand2[-1]) == 1:
new_cand.append(cand2[-1])
else:
new_cand[-1].extend([cand2[-1][-1]])
return new_cand
candA = [['a'], ['b', 'c'], ['d']]
candB = [['b', 'c'], ['d', 'e']]
gen_cands_for_pair(candA, candB)
candA = [['a'], ['b', 'c'], ['d']]
candC = [['b', 'c'], ['d'], ['e']]
gen_cands_for_pair(candA, candC)
candA = [['a'], ['b', 'c'], ['d']]
candD = [['a'], ['b', 'c'], ['e']]
gen_cands_for_pair(candA, candD)
def gen_cands(last_lvl_cands):
k = sequence_length(last_lvl_cands[0]) + 1
if k == 2:
flat_short_cands = [item for sublist2 in last_lvl_cands for sublist1 in sublist2 for item in sublist1]
result = [[[a, b]] for a in flat_short_cands for b in flat_short_cands if b > a]
result.extend([[[a], [b]] for a in flat_short_cands for b in flat_short_cands])
return result
else:
cands = []
for i in range(0, len(last_lvl_cands)):
for j in range(0, len(last_lvl_cands)):
new_cand = gen_cands_for_pair(last_lvl_cands[i], last_lvl_cands[j])
if not new_cand == []:
cands.append(new_cand)
cands.sort()
return cands
last_lvl_freq_patterns = [
[['a', 'b']],
[['b', 'c']],
[['a'], ['b']],
[['a'], ['c']],
[['b'], ['c']],
[['c'], ['b']],
[['c'], ['c']]
]
new_cands = gen_cands(last_lvl_freq_patterns)
new_cands
def gen_direct_subsequences(sequence):
result = []
for i, itemset in enumerate(sequence):
if len(itemset) == 1:
seq_clone = copy.deepcopy(sequence)
seq_clone.pop(i)
result.append(seq_clone)
else:
for j in range(len(itemset)):
seq_clone = copy.deepcopy(sequence)
seq_clone[i].pop(j)
result.append(seq_clone)
return result
def gen_contiguous_direct_subsequences(sequence):
result = []
for i, itemset in enumerate(sequence):
if i == 0 or i == len(sequence) - 1:
if len(itemset) == 1:
seq_clone = copy.deepcopy(sequence)
seq_clone.pop(i)
result.append(seq_clone)
else:
for j in range(len(itemset)):
seq_clone = copy.deepcopy(sequence)
seq_clone[i].pop(j)
result.append(seq_clone)
else:
if len(itemset) > 1:
for j in range(len(itemset)):
seq_clone = copy.deepcopy(sequence)
seq_clone[i].pop(j)
result.append(seq_clone)
return result
def prune_cands(last_lvl_cands, cands_gen, max_gap=np.inf):
return [cand for cand in cands_gen if all(x in last_lvl_cands for x in (gen_contiguous_direct_subsequences(cand) if max_gap != np.inf
else gen_direct_subsequences(cand)))]
cands_pruned = prune_cands(last_lvl_freq_patterns, new_cands)
cands_pruned
min_sup = 0.5
cands_counts = [(s, count_support(dataset, s)) for s in cands_pruned]
result_lvl = [(i, count) for i, count in cands_counts if count >= min_sup * len(dataset)]
result_lvl
def gsp(dataset, min_sup, max_span=np.inf, min_gap=0, max_gap=np.inf, verbose=False):
overall = []
min_sup *= len(dataset)
items = sorted(set([item for sequence in dataset
for event in sequence
for item in (event[1] if isinstance(event, tuple) else event)]))
single_item_sequences = [[[item]] for item in items]
single_item_counts = [(s, count_support(dataset, s)) for s in single_item_sequences]
single_item_counts = [(i, count) for i, count in single_item_counts if count >= min_sup]
overall.append(single_item_counts)
if verbose > 0:
print('Result, lvl 1: ' + str(overall[0]))
k = 1
while overall[k - 1]:
last_lvl_cands = [x[0] for x in overall[k - 1]]
cands_gen = gen_cands(last_lvl_cands)
cands_pruned = prune_cands(last_lvl_cands, cands_gen, max_gap)
cands_counts = [(s, count_support(dataset, s, max_span, min_gap, max_gap)) for s in cands_pruned]
result_lvl = [(i, count) for i, count in cands_counts if count >= min_sup]
if verbose > 0:
print('Result, lvl ' + str(k + 1) + ': ' + str(result_lvl))
if verbose > 1:
print('Candidates generated, lvl ' + str(k + 1) + ': ' + str(cands_gen))
print('Candidates pruned, lvl ' + str(k + 1) + ': ' + str(cands_pruned))
overall.append(result_lvl)
k += 1
overall = overall[:-1]
overall = [item for sublist in overall for item in sublist]
overall.sort(key=lambda tup: (tup[1], neg(sequence_length(tup[0]))), reverse=True)
return overall
gsp(dataset, min_sup=0.5, verbose=2)
spmf_dataset = spmf_encode_with_timestamp(dataset)
print(spmf_dataset)
spmf = Spmf('GSP', input_direct=spmf_dataset, arguments=[0.5])
spmf.run()
freq_patterns = spmf.to_pandas_dataframe()
freq_patterns = [([event.split() for event in sequence], sup)
for sequence, sup in zip(freq_patterns.pattern, freq_patterns.sup)]
freq_patterns.sort(key=lambda tup: (tup[1], neg(sequence_length(tup[0]))), reverse=True)
freq_patterns
After this line, I get the following error:
Traceback (most recent call last):
File ~\anaconda3\lib\site-packages\spyder_kernels\py3compat.py:356 in compat_exec exec(code, globals, locals)
File c:\users\lenovo.spyder-py3\untitled1.py:228 spmf = Spmf('GSP', input_direct=spmf_dataset, arguments=[0.5])
File ~\anaconda3\lib\site-packages\spmf_init_.py:46 in init self.input_ = self.handle_input(
File ~\anaconda3\lib\site-packages\spmf_init_.py:62 in handle_input return self.write_temp_input_file(input_direct,
File ~\anaconda3\lib\site-packages\spmf_init_.py:87 in write_temp_input_file os.rename(name, name + file_ending)
PermissionError: [WinError 32] This process cannot access the file because the file is being used by another process: 'C:\Users\LENOVO\AppData\Local\Temp\tmp_7b5ygrv' -> 'C:\Users\LENOVO\AppData\Local\Temp\tmp_7b5ygrv.txt'
I made sure no other application was open and restarted the program. But the problem is not solved.