In this case, it is probably easier to list all directories with glob.glob()
, to match your hierarchy pattern. You can use os.path.getctime()
to get a timestamp for each directory to sort and filter by
from glob import glob
import os.path
import time
def find_sessions_to_delete(cutoff):
# produce a list of (timestamp, path) tuples for each session directory
session_dirs = [(os.path.getctime(p), p) for p in glob('/root/job*/session*')]
session_dirs.sort(reverse=True) # sort from newest to oldest
# remove first two elements, they are kept regardless
session_dirs = session_dirs[2:]
# return a list of paths whose ctime lies before the cutoff time
return [p for t, p in session_dirs if t <= cutoff]
cutoff = time.time() - (7 * 86400) # 7 days ago
sessions_to_delete = find_sessions_to_delete(cutoff)
I included a sample cutoff date at 7 days ago, calculated from time.time()
, which returns an integer value, expressing the number of seconds passed since the 1st of January 1970 (the UNIX epoch).
If you needed to do this per job directory, do the same work per such directory and merge the resulting lists:
def find_sessions_to_delete(cutoff):
to_delete = []
# process each jobdir separately
for jobdir in glob('/root/job*'):
# produce a list of (timestamp, path) tuples for each session directory
session_dirs = [(os.path.getctime(p), p)
for p in glob(os.path.join(jobdir, 'session*'))]
session_dirs.sort(reverse=True) # sort from newest to oldest
# remove first two elements, they are kept regardless
session_dirs = session_dirs[2:]
# Add list of paths whose ctime lies before the cutoff time
to_delete.extend(p for t, p in session_dirs if t <= cutoff)
return to_delete