import pandas as pd
def getDuplicateColumns(df):
# Create an empty set
duplicateColumnNames = set()
# Iterate through all the columns of dataframe
for x in range(df.shape[1]):
# Take column at xth index.
col = df.iloc[:, x]
# Iterate through all the columns
for y in range(x + 1, df.shape[1]):
# Take column at yth index.
otherCol = df.iloc[:, y]
# Check if two columns at x & y
if col.equals(otherCol):
duplicateColumnNames.add(df.columns.values[y])
return list(duplicateColumnNames)
# Driver code
if __name__ == "__main__":
# List of Tuples
students = [
('Ankit', 34, 'Uttar pradesh', 34),
('Riti', 30, 'Delhi', 30),
('Aadi', 16, 'Delhi', 16),
('Riti', 30, 'Delhi', 30),
('Riti', 30, 'Delhi', 30),
('Riti', 30, 'Mumbai', 30),
('Ankita', 40, 'Bihar', 40),
('Sachin', 30, 'Delhi', 30)
]
# Create a DataFrame object
df = pd.DataFrame(students, columns=['Name', 'Age',
'Domicile', 'Marks'])
# Get list of duplicate columns
duplicateColNames = getDuplicateColumns(df)
for column in duplicateColNames:
print('Column Name : ', column)