0

We have a jenkins build where we noticed a few hundred class files are being packaged at two different locations. We would like to remove it from one of the location to avoid duplication.

Below is the code I have written but its not deleting duplicates.

import os
import zipfile

def delete_duplicate_files(jar_zip_path, directory_path):
    deleted_files = []

    with zipfile.ZipFile(jar_zip_path, 'r') as zip_file:
        # Extract the JAR file from the ZIP archive
        for item in zip_file.infolist():
            if item.filename == jar_path:
                jar_file_path = os.path.join(os.getcwd(), item.filename)
                zip_file.extract(item, os.getcwd())
                break
        else:
            return deleted_files

    # Extract the WAR file from the EAR file
    ear_file_path = os.path.join(os.path.dirname(jar_file_path), "HyperionPlanning.ear")
    with zipfile.ZipFile(ear_file_path, 'r') as ear_file:
        war_file_path = os.path.join(os.getcwd(), "HyperionPlanning.war")
        ear_file.extract("HyperionPlanning.war", os.getcwd())

    print(f"Deleting duplicates in directory: {directory_path}")

    # Compare and delete duplicate files in the directory and its subdirectories
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            file_path = os.path.join(root, file)

            if file_exists_in_jar(file, jar_file_path) or file_exists_in_war(file, war_file_path):
                os.remove(file_path)
                deleted_files.append(file_path)

    # Clean up the extracted files
    os.remove(jar_file_path)
    os.remove(war_file_path)
    os.remove(ear_file_path)

    return deleted_files

def file_exists_in_jar(file_name, jar_file_path):
    with zipfile.ZipFile(jar_file_path, 'r') as jar_file:
        for item in jar_file.infolist():
            if item.filename.endswith('.class') and os.path.basename(item.filename) == file_name:
                return True
    return False

def file_exists_in_war(file_name, war_file_path):
    with zipfile.ZipFile(war_file_path, 'r') as war_file:
        for item in war_file.infolist():
            if item.filename.endswith('.class') and os.path.basename(item.filename) == file_name:
                return True
    return False

jar_zip_path = r"C:\Users\esbld_hp\Documents\project\30293915.23.07.6147.zip"
directory_path = r"C:\Users\esbld_hp\Documents\project\30293915.23.07.6147.zip\30293915\files\products\Planning\AppServer\InstallableApps\Common\HyperionPlanning.ear\HyperionPlanning.war\WEB-INF\classes"
jar_path = r"C:\Users\esbld_hp\Documents\project\30293915.23.07.6147.zip\30293915\files\products\Planning\lib\HspJS.jar"

deleted_files = delete_duplicate_files(jar_zip_path, directory_path)

print("Deleted files:")
for file in deleted_files:
    print(file)

Code is not giving errors but at the same time its not deleting the duplicates.

0 Answers0