I have a rule that calculates new variables based on a set of variables, these variables are separated into different files. I have another rule for calculating the averages of all the different variables in the whole database. My problem is that snakemake tries to find my derived variables in the original database, which of course are not there.
Is there a way to have constrained the averaging rule such that it will calculate the average for all variables except for a list of the variables that are derived
Psudo code of how the rule look like
rule calc_average:
input:
pi_clim_var = lambda w: get_control_path(w, w.variable),
output:
outpath = outdir+'{experiment}/{variable}/{variable}_{experiment}_{model}_{freq}.nc'
log:
"logs/calc_average/{variable}_{model}_{experiment}_{freq}.log"
wildcard_constraints:
variable= '!calculated1!calculated2' # "orrvar1|orrvar2"....
notebook:
"../notebooks/calc_clim.py.ipynb"
I can of make a list of all the variables that I would like to have in the database and then do:
wildcard_constraints:
variable="|".join(list_of_vars)
But I was wondering if it is possible to do it the other way round? E.g:
wildcard_constraints:
variable="!".join(negate_list_of_vars) # don't match these wildcards
EDIT:
The get_control_path(w, w.variable)
constructs the path to the input file based on a lookup table that uses the wildcards as a keys.
def get_control_path(w, variable, grid_label=None):
if grid_label == None:
grid_label = config['default_grid_label']
try:
paths = get_paths(w, variable,'piClim-control', grid_label,activity='RFMIP', control=True)
except KeyError:
paths = get_paths(w, variable,'piClim-control', grid_label,activity='AerChemMIP', control=True)
return paths
def get_paths(w, variable,experiment, grid_label=None, activity=None, control=False):
"""
Get CMIP6 model paths in database based on the lookup tables.
Parameters:
-----------
w : snake.wildcards
a named tuple that contains the snakemake wildcards
"""
if w.model in ["NorESM2-LM", "NorESM2-MM"]:
root_path = f'{ROOT_PATH_NORESM}/{CMIP_VER}'
look_fnames = LOOK_FNAMES_NORESM
else:
root_path = f'{ROOT_PATH}/{CMIP_VER}'
look_fnames = LOOK_FNAMES
if activity:
activity= activity
else:
activity = LOOK_EXP[experiment]
model = w.model
if control:
variant=config['model_specific_variant']['control'].get(model, config['variant_default'])
else:
variant = config['model_specific_variant']['experiment'].get(model, config['variant_default'])
table_id = TABLE_IDS.get(variable,DEFAULT_TABLE_ID)
institution = LOOK_INSTITU[model]
try:
file_endings = look_fnames[activity][model][experiment][variant][table_id]['fn']
except:
raise KeyError(f"File ending is not defined for this combination of {activity}, {model}, {experiment}, {variant} and {table_id} " +
"please update config/lookup_file_endings.yaml accordingly")
if grid_label == None:
grid_label = look_fnames[activity][model][experiment][variant][table_id]['gl'][0]
check_path = f'{root_path}/{activity}/{institution}/{model}/{experiment}/{variant}/{table_id}/{variable}/{grid_label}'
if os.path.exists(check_path)==False:
grid_labels = ['gr','gn', 'gl','grz', 'gr1']
i = 0
while os.path.exists(check_path)==False and i < len(grid_labels):
grid_label = grid_labels[i]
check_path = f'{root_path}/{activity}/{institution}/{model}/{experiment}/{variant}/{table_id}/{variable}/{grid_label}'
i += 1
if control:
version = config['version']['version_control'].get(w.model, 'latest')
else:
version = config['version']['version_exp'].get(w.model, 'latest')
fname = f'{variable}_{table_id}_{model}_{experiment}_{variant}_{grid_label}'
paths = expand(
f'{root_path}/{activity}/{institution}/{model}/{experiment}/{variant}/{table_id}/{variable}/{grid_label}/{version}/{fname}_{{file_endings}}'
,file_endings=file_endings)
# Sometimes the verisons are just messed up... try one more time with latest
if not os.path.exists(paths[0]):
paths=expand(
f'{root_path}/{activity}/{institution}/{model}/{experiment}/{variant}/{table_id}/{variable}/{grid_label}/latest/{fname}_{{file_endings}}'
,file_endings=file_endings)
# Sometimes the file ending are different depending on varialbe
if not os.path.exists(paths[0]) and len(paths) >= 2:
paths = [paths[1]]
return paths