2

I have the following list A including tuples, and I would like to slice A into a list of lists as seen in B. The logic is that if the first and the fourth elements of the tuples are repeating, pack the group as a list inside list A.

A = [(1, 'C-30219', 'C-30060', 'C-6235d935d39c258876476e35a7acfd69-1-1', 2),
     (1, 'C-30060', 'C-30022', 'C-6235d935d39c258876476e35a7acfd69-1-1', 3),
     (1, 'C-30022', 'C-30205', 'C-6235d935d39c258876476e35a7acfd69-1-1', 4),
     (3, 'C-30248', 'C-30260', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 4),
     (3, 'C-30260', 'C-30108', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 5),
     (3, 'C-30108', 'C-30240', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 6),
     (5, 'C-30269', 'C-30285', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 9),
     (5, 'C-30285', 'C-30109', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 10),
     (5, 'C-30109', 'C-30211', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 11),
     (5, 'C-30211', 'C-30289', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 12),
     (5, 'C-30072', 'C-30375', 'C-710c460e8dfc2b3a523e077b6c6bdb40-1-1', 15),
     (5, 'C-30375', 'C-30095', 'C-710c460e8dfc2b3a523e077b6c6bdb40-1-1', 16)]

Out:

B = [[(1, 'C-30219', 'C-30060', 'C-6235d935d39c258876476e35a7acfd69-1-1', 2),
      (1, 'C-30060', 'C-30022', 'C-6235d935d39c258876476e35a7acfd69-1-1', 3),
      (1, 'C-30022', 'C-30205', 'C-6235d935d39c258876476e35a7acfd69-1-1', 4)],
     [(3, 'C-30248', 'C-30260', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 4),
      (3, 'C-30260', 'C-30108', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 5),
      (3, 'C-30108', 'C-30240', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 6)],
     [(5, 'C-30269', 'C-30285', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 9),
      (5, 'C-30285', 'C-30109', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 10),
      (5, 'C-30109', 'C-30211', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 11),
      (5, 'C-30211', 'C-30289', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 12)],
     [(5, 'C-30072', 'C-30375', 'C-710c460e8dfc2b3a523e077b6c6bdb40-1-1', 15),
      (5, 'C-30375', 'C-30095', 'C-710c460e8dfc2b3a523e077b6c6bdb40-1-1', 16)]]

Here is my attempt, which gives the desired output after a lot of mumbo jumbo. I am seeking a more efficient and Pythonic way to achieve this.

inter = list(set([(i[0],i[3]) for i in A]))
B = {o_t: [] for o_t in inter}
for i in range(1, len(A)):
    if (A[i][0] == A[i-1][0]
        and A[i][3] == A[i-1][3]):
        B[A[i][0],A[i][3]].append(A[i])
        B[A[i][0],A[i][3]].append(A[i-1])
B = {key: sorted(list(set(B[key])), key = lambda x: x[-1]) for key in B.keys()}
list(B.values())
tcokyasar
  • 582
  • 1
  • 9
  • 26

2 Answers2

1

Perfect task for groupby from itertools

from itertools import groupby
A = [(1, 'C-30219', 'C-30060', 'C-6235d935d39c258876476e35a7acfd69-1-1', 2),
     (1, 'C-30060', 'C-30022', 'C-6235d935d39c258876476e35a7acfd69-1-1', 3),
     (1, 'C-30022', 'C-30205', 'C-6235d935d39c258876476e35a7acfd69-1-1', 4),
     (3, 'C-30248', 'C-30260', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 4),
     (3, 'C-30260', 'C-30108', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 5),
     (3, 'C-30108', 'C-30240', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 6),
     (5, 'C-30269', 'C-30285', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 9),
     (5, 'C-30285', 'C-30109', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 10),
     (5, 'C-30109', 'C-30211', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 11),
     (5, 'C-30211', 'C-30289', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 12),
     (5, 'C-30072', 'C-30375', 'C-710c460e8dfc2b3a523e077b6c6bdb40-1-1', 15),
     (5, 'C-30375', 'C-30095', 'C-710c460e8dfc2b3a523e077b6c6bdb40-1-1', 16)]
B = [list(g) for _,g in groupby(A, key=lambda x: (x[0], x[3]))]

print(B)

output

[[(1, 'C-30219', 'C-30060', 'C-6235d935d39c258876476e35a7acfd69-1-1', 2),
  (1, 'C-30060', 'C-30022', 'C-6235d935d39c258876476e35a7acfd69-1-1', 3),
  (1, 'C-30022', 'C-30205', 'C-6235d935d39c258876476e35a7acfd69-1-1', 4)],
 [(3, 'C-30248', 'C-30260', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 4),
  (3, 'C-30260', 'C-30108', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 5),
  (3, 'C-30108', 'C-30240', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 6)],
 [(5, 'C-30269', 'C-30285', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 9),
  (5, 'C-30285', 'C-30109', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 10),
  (5, 'C-30109', 'C-30211', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 11),
  (5, 'C-30211', 'C-30289', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 12)],
 [(5, 'C-30072', 'C-30375', 'C-710c460e8dfc2b3a523e077b6c6bdb40-1-1', 15),
  (5, 'C-30375', 'C-30095', 'C-710c460e8dfc2b3a523e077b6c6bdb40-1-1', 16)]]
​

NOTE: I am assuming that A is sorted by 1st and fourth element. groupby will group a list [1,1,1,2,2,1,3,3] to [(1,1,1), (2,2), (1), (3,3)]. It won't group all the 1's

Epsi95
  • 8,832
  • 1
  • 16
  • 34
  • 1
    With my original dataset, my code takes 7.816915 seconds, yours 0.672617 seconds, @balderman 's 1.996961 seconds. As Pythonicity was the metric, I accepted yours. – tcokyasar Feb 21 '21 at 09:51
1

Below

from collections import defaultdict

data = defaultdict(list)
A = [(1, 'C-30219', 'C-30060', 'C-6235d935d39c258876476e35a7acfd69-1-1', 2),
     (1, 'C-30060', 'C-30022', 'C-6235d935d39c258876476e35a7acfd69-1-1', 3),
     (1, 'C-30022', 'C-30205', 'C-6235d935d39c258876476e35a7acfd69-1-1', 4),
     (3, 'C-30248', 'C-30260', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 4),
     (3, 'C-30260', 'C-30108', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 5),
     (3, 'C-30108', 'C-30240', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 6),
     (5, 'C-30269', 'C-30285', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 9),
     (5, 'C-30285', 'C-30109', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 10),
     (5, 'C-30109', 'C-30211', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 11),
     (5, 'C-30211', 'C-30289', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 12),
     (5, 'C-30072', 'C-30375', 'C-710c460e8dfc2b3a523e077b6c6bdb40-1-1', 15),
     (5, 'C-30375', 'C-30095', 'C-710c460e8dfc2b3a523e077b6c6bdb40-1-1', 16)]

for a in A:
    data[(a[0], a[3])].append(a)
B = [v for v in data.values()]
for b in B:
    print(b)

output

[(1, 'C-30219', 'C-30060', 'C-6235d935d39c258876476e35a7acfd69-1-1', 2), (1, 'C-30060', 'C-30022', 'C-6235d935d39c258876476e35a7acfd69-1-1', 3), (1, 'C-30022', 'C-30205', 'C-6235d935d39c258876476e35a7acfd69-1-1', 4)]
[(3, 'C-30248', 'C-30260', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 4), (3, 'C-30260', 'C-30108', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 5), (3, 'C-30108', 'C-30240', 'C-ac19d0edcf4d4ebe071e8d43be1901e2-1-1', 6)]
[(5, 'C-30269', 'C-30285', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 9), (5, 'C-30285', 'C-30109', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 10), (5, 'C-30109', 'C-30211', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 11), (5, 'C-30211', 'C-30289', 'C-d0d36bb9f2a7e248638cff9a04065977-1-1', 12)]
[(5, 'C-30072', 'C-30375', 'C-710c460e8dfc2b3a523e077b6c6bdb40-1-1', 15), (5, 'C-30375', 'C-30095', 'C-710c460e8dfc2b3a523e077b6c6bdb40-1-1', 16)]
balderman
  • 22,927
  • 7
  • 34
  • 52