We have a jenkins build where we noticed a few hundred class files are being packaged at two different locations. We would like to remove it from one of the location to avoid duplication.
Below is the code I have written but its not deleting duplicates.
import os
import zipfile
def delete_duplicate_files(jar_zip_path, directory_path):
deleted_files = []
with zipfile.ZipFile(jar_zip_path, 'r') as zip_file:
# Extract the JAR file from the ZIP archive
for item in zip_file.infolist():
if item.filename == jar_path:
jar_file_path = os.path.join(os.getcwd(), item.filename)
zip_file.extract(item, os.getcwd())
break
else:
return deleted_files
# Extract the WAR file from the EAR file
ear_file_path = os.path.join(os.path.dirname(jar_file_path), "HyperionPlanning.ear")
with zipfile.ZipFile(ear_file_path, 'r') as ear_file:
war_file_path = os.path.join(os.getcwd(), "HyperionPlanning.war")
ear_file.extract("HyperionPlanning.war", os.getcwd())
print(f"Deleting duplicates in directory: {directory_path}")
# Compare and delete duplicate files in the directory and its subdirectories
for root, dirs, files in os.walk(directory_path):
for file in files:
file_path = os.path.join(root, file)
if file_exists_in_jar(file, jar_file_path) or file_exists_in_war(file, war_file_path):
os.remove(file_path)
deleted_files.append(file_path)
# Clean up the extracted files
os.remove(jar_file_path)
os.remove(war_file_path)
os.remove(ear_file_path)
return deleted_files
def file_exists_in_jar(file_name, jar_file_path):
with zipfile.ZipFile(jar_file_path, 'r') as jar_file:
for item in jar_file.infolist():
if item.filename.endswith('.class') and os.path.basename(item.filename) == file_name:
return True
return False
def file_exists_in_war(file_name, war_file_path):
with zipfile.ZipFile(war_file_path, 'r') as war_file:
for item in war_file.infolist():
if item.filename.endswith('.class') and os.path.basename(item.filename) == file_name:
return True
return False
jar_zip_path = r"C:\Users\esbld_hp\Documents\project\30293915.23.07.6147.zip"
directory_path = r"C:\Users\esbld_hp\Documents\project\30293915.23.07.6147.zip\30293915\files\products\Planning\AppServer\InstallableApps\Common\HyperionPlanning.ear\HyperionPlanning.war\WEB-INF\classes"
jar_path = r"C:\Users\esbld_hp\Documents\project\30293915.23.07.6147.zip\30293915\files\products\Planning\lib\HspJS.jar"
deleted_files = delete_duplicate_files(jar_zip_path, directory_path)
print("Deleted files:")
for file in deleted_files:
print(file)
Code is not giving errors but at the same time its not deleting the duplicates.