[Python]: filecmp - File and Directory Comparisons supports recursive traversing via dircmp.subdirs
. No need for os.walk
(or any other similar functions).
code.py:
import sys
import filecmp
import os
main_folder_v1 = "dir_v1"
main_folder_v2 = "dir_v2"
ROOT_DIR_MARKER = ""
def traverse_dircmp(dircmp_obj, dir_name=ROOT_DIR_MARKER):
for item in dircmp_obj.diff_files:
yield os.path.join(dir_name, item)
for subdir_name in dircmp_obj.subdirs:
yield from traverse_dircmp(dircmp_obj.subdirs[subdir_name], dir_name=os.path.join(dir_name, subdir_name))
#for item in traverse_dircmp(dircmp_obj.subdirs[subdir_name], dir_name=os.path.join(dir_name, subdir_name)):
# yield item
def traverse_dircmp_list(dircmp_obj, dir_name=ROOT_DIR_MARKER):
ret = [os.path.join(dir_name, item) for item in dircmp_obj.diff_files]
for subdir_name in dircmp_obj.subdirs:
ret.extend(traverse_dircmp_list(dircmp_obj.subdirs[subdir_name], dir_name=os.path.join(dir_name, subdir_name)))
return ret
def main():
comparison_object = filecmp.dircmp(main_folder_v1, main_folder_v2)
comparison_result = traverse_dircmp(comparison_object)
print("{:s}: {:}".format("Different files (gen)", list(comparison_result)))
comparison_result_list = traverse_dircmp_list(comparison_object)
print("{:s}: {:}".format("Different files (list)", comparison_result_list))
if __name__ == "__main__":
print("Python {:s} on {:s}\n".format(sys.version, sys.platform))
main()
Output (for a dir structure similar to yours):
(py35x64_test) e:\Work\Dev\StackOverflow\q050157870>"e:\Work\Dev\VEnvs\py35x64_test\Scripts\python.exe" code.py
Python 3.5.4 (v3.5.4:3f56838, Aug 8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32
Different files (gen): ['foo.json', 'subdir00\\bar.json', 'subdir00\\subdir001\\x.json']
Different files (list): ['foo.json', 'subdir00\\bar.json', 'subdir00\\subdir001\\x.json']
@EDIT0:
- Modified the
traverse_dircmp
function to return the list of files, instead of printing them, as requested in one of the comments
@EDIT1:
- Added generator functionality (as a personal exercise) which is the new (and preferred) style, and doesn't consume memory in case of huge dirs (!!requires Python3.3 or higher!!, or
yield from
statement can be replaced by the 2 commented (for
and yield
) lines below it)