Here is my solution (only two or three lines of codes) to check for any number of columns to see whether they are one to one match (duplicated matches are allowed, see the example bellow).
cols = ['A', 'B'] # or any number of columns ['A', 'B', 'C']
res = df.groupby(cols).count()
uniqueness = [res.index.get_level_values(i).is_unique
for i in range(res.index.nlevels)]
all(uniqueness)
Let's make it a function and add some docs:
def is_one_to_one(df, cols):
"""Check whether any number of columns are one-to-one match.
df: a pandas.DataFrame
cols: must be a list of columns names
Duplicated matches are allowed:
a - 1
b - 2
b - 2
c - 3
(This two cols will return True)
"""
if len(cols) == 1:
return True
# You can define you own rules for 1 column check, Or forbid it
# MAIN THINGs: for 2 or more columns check!
res = df.groupby(cols).count()
# The count number info is actually bootless.
# What maters here is the grouped *MultiIndex*
# and its uniqueness in each level
uniqueness = [res.index.get_level_values(i).is_unique
for i in range(res.index.nlevels)]
return all(uniqueness)
By using this function, you can do the one-to-one match check:
df = pd.DataFrame({'A': [0, 1, 2, 0],
'B': ["'a'", "'b'", "'c'", "'a'"],
'C': ["'apple'", "'banana'", "'apple'", "'apple'"],})
is_one_to_one(df, ['A', 'B'])
is_one_to_one(df, ['A', 'C'])
is_one_to_one(df, ['A', 'B', 'C'])
# Outputs:
# True
# False
# False