-1

I want to check if all of the files (B01:B12) are present in a certain folder. If that is the case it should return True. I know the end of the filenames, but the beginning can vary.

Currently, I have the following code. It works, but I feel that it can be done a lot more efficient. Does anyone have an idea on how to improve this?

def Check3(filename, root):
    path = os.path.join(root, filename)
    os.chdir(path)
    for dirpath, dirnames, filenames in os.walk(path):
        for filename in filenames:
            if filename.endswith('_B01.jp2'):
                B01 = True
            elif filename.endswith('_B02.jp2'):
                B02 = True
            elif filename.endswith('_B03.jp2'):
                B03 = True
            elif filename.endswith('_B04.jp2'):
                B04 = True
            elif filename.endswith('_B05.jp2'):
                B05 = True
            elif filename.endswith('_B06.jp2'):
                B06 = True
            elif filename.endswith('_B07.jp2'):
                B07 = True
            elif filename.endswith('_B08.jp2'):
                B08 = True
            elif filename.endswith('_B8A.jp2'):
                B8A = True
            elif filename.endswith('_B09.jp2'):
                B09 = True
            elif filename.endswith('_B10.jp2'):
                B10 = True
            elif filename.endswith('_B11.jp2'):
                B11 = True
            elif filename.endswith('_B12.jp2'):
                B12 = True

    return B01 and B02 and B03 and B04 and B05 and B06 and B07\
     and B08 and B8A and B09 and B10 and B11 and B12
martineau
  • 119,623
  • 25
  • 170
  • 301
Niek
  • 51
  • 8

3 Answers3

3

You can use pathlib to get all files, extract last 8 characters from the file names, then build expected suffixes, compare lastly.

from pathlib import Path

all_last8 = set()
for path in Path(r'your directory').rglob('*.jp2'):
    # exract last 8 chars of file name
    all_last8.add(path.name[-8:])
# construct all expected suffixes
# hardcode this way, it is same run time efficient
# more verbose though
expected = {'_B01.jp2', '_B02.jp2', '_B03.jp2', }  # ...
# if they are of same pattern
# expected = set([f'_B{str(i).zfill(2)}.jp2' for i in range(1, 13)])

valid = all_last8.issuperset(expected)
print(valid)

The code firstly get all file names and suffixes, there could be more efficient ways that compares while globing.

Lei Yang
  • 3,970
  • 6
  • 38
  • 59
1

You could use the glob library, it lists the files that match a given condition under the folders you want to check.

from glob import glob

def Check3(root):
    # list the files which match a specific condition
    files = glob('{}/*/*.jp2'.format(root))
    
    # create the list of files you want to check that exists
    extensions_check_list = ['_B01.jp2', '_B02.jp2', '_B03.jp2', '_B04.jp2', '_B05.jp2', '_B06.jp2', '_B07.jp2', '_B08.jp2', '_B09.jp2', '_B10.jp2', '_B11.jp2', '_B12.jp2']
    
    # if the number of found files is equal to the number of the expected returns True
    return sum([file in extensions_check_list for file in files]) == len(extensions_check_list)
Douglas Ferreira
  • 707
  • 2
  • 5
  • 22
0
import wizzi_utils as wu  # pip install wizzi_utils


def check_if_sequential(dir_path: str, files_suffix: list) -> bool:
    files_in_dir = wu.find_files_in_folder(dir_path=dir_path, file_suffix='')
    print('files_in_dir:')
    for idx, f in enumerate(files_in_dir):
        print('\t{}: {}'.format(idx + 1, f))
    all_found = True
    for suffix in files_suffix:
        file_with_suffix_found = False
        for file in files_in_dir:
            if file.endswith(suffix):
                file_with_suffix_found = True
                break
        if not file_with_suffix_found:
            print('suffix {} not found'.format(suffix))
            all_found = False
            break
    if all_found:
        print('all files with suffix given found in folder')
    else:
        print('not all files found')
    return all_found


def main() -> None:
    files_suffix = [
        '_B01.jp2', '_B02.jp2', '_B03.jp2', '_B04.jp2', '_B05.jp2', '_B06.jp2', '_B07.jp2',
        '_B08.jp2', '_B8A.jp2', '_B09.jp2', '_B10.jp2', '_B11.jp2', '_B12.jp2',
    ]
    _ = check_if_sequential(dir_path='./my_files', files_suffix=files_suffix)
    return


if __name__ == '__main__':
    main()

If all files suffix are in the folder(and 1 extra file that we dont need), the output will be:

files_in_dir:
    1: D:/workspace/2021wizzi_utils/temp/my_files/bla_B01.jp2
    2: D:/workspace/2021wizzi_utils/temp/my_files/bla_B02.jp2
    3: D:/workspace/2021wizzi_utils/temp/my_files/bla_B03.jp2
    4: D:/workspace/2021wizzi_utils/temp/my_files/bla_B04.jp2
    5: D:/workspace/2021wizzi_utils/temp/my_files/bla_B06.jp2
    6: D:/workspace/2021wizzi_utils/temp/my_files/bla_B07.jp2
    7: D:/workspace/2021wizzi_utils/temp/my_files/bla_B08.jp2
    8: D:/workspace/2021wizzi_utils/temp/my_files/bla_B09.jp2
    9: D:/workspace/2021wizzi_utils/temp/my_files/bla_B10.jp2
    10: D:/workspace/2021wizzi_utils/temp/my_files/bla_B11.jp2
    11: D:/workspace/2021wizzi_utils/temp/my_files/bla_B12.jp2
    12: D:/workspace/2021wizzi_utils/temp/my_files/bla_B8A.jp2
    13: D:/workspace/2021wizzi_utils/temp/my_files/random_file.txt
    14: D:/workspace/2021wizzi_utils/temp/my_files/x_B05.jp2
all files with suffix given found in folder

Now delete one and rerun. i deleted bla_B06.jp2, the output will be:

files_in_dir:
    1: D:/workspace/2021wizzi_utils/temp/my_files/bla_B01.jp2
    2: D:/workspace/2021wizzi_utils/temp/my_files/bla_B02.jp2
    3: D:/workspace/2021wizzi_utils/temp/my_files/bla_B03.jp2
    4: D:/workspace/2021wizzi_utils/temp/my_files/bla_B04.jp2
    5: D:/workspace/2021wizzi_utils/temp/my_files/bla_B07.jp2
    6: D:/workspace/2021wizzi_utils/temp/my_files/bla_B08.jp2
    7: D:/workspace/2021wizzi_utils/temp/my_files/bla_B09.jp2
    8: D:/workspace/2021wizzi_utils/temp/my_files/bla_B10.jp2
    9: D:/workspace/2021wizzi_utils/temp/my_files/bla_B11.jp2
    10: D:/workspace/2021wizzi_utils/temp/my_files/bla_B12.jp2
    11: D:/workspace/2021wizzi_utils/temp/my_files/bla_B8A.jp2
    12: D:/workspace/2021wizzi_utils/temp/my_files/random_file.txt
    13: D:/workspace/2021wizzi_utils/temp/my_files/x_B05.jp2
suffix _B06.jp2 not found
not all files found
gilad eini
  • 360
  • 2
  • 6