You didn't post a link to your image1.tif
file so the sample code below uses pysheds/data/dem.tif
from https://github.com/mdbartos/pysheds The basic idea is to split the input parameters, xs
and ys
in your case, into subsets, then give each CPU a different subset to work on.
main()
computes the solution twice, once sequentially and once in parallel, then compares the solutions from each. There's some inefficiency in the parallel solution since the image file will be read by each CPU so there's room for improvement (ie, read the image file outside the parallel portion then give the resulting grid
object to each instance).
import numpy as np
from pysheds.grid import Grid
from dask.distributed import Client
from dask import delayed, compute
xs = 10, 20, 30, 40, 50, 60, 70, 80, 90, 100
ys = 25, 35, 45, 55, 65, 75, 85, 95, 105, 115, 125
def var(image_file, x_in, y_in):
grid = Grid.from_raster(image_file, data_name='map')
variable_avg = []
for (x,y) in zip(x_in,y_in):
grid.catchment(data='map', x=x, y=y, out_name='catch')
variable = grid.view('catch', nodata=np.nan)
variable_avg.append( np.array(variable).mean() )
return(variable_avg)
def var_parallel(n_cpu, image_file, x_in, y_in):
tasks = []
for cpu in range(n_cpu):
x_in = xs[cpu::n_cpu] # eg, cpu = 0: x_in = (10, 40, 70, 100)
y_in = ys[cpu::n_cpu] #
tasks.append( delayed(var)(image_file, x_in, y_in) )
ans = compute(tasks)
# reassemble solution in the right order
par_avg = [None]*len(xs)
for cpu in range(n_cpu):
par_avg[cpu::n_cpu] = ans[0][cpu]
print('AVG (parallel) =',par_avg)
return par_avg
def main():
image_file = 'pysheds/data/dem.tif'
# sequential solution:
seq_avg = var(image_file, xs, ys)
print('AVG (sequential)=',seq_avg)
# parallel solution:
n_cpu = 3
dask_client = Client(n_workers=n_cpu)
par_avg = var_parallel(n_cpu, image_file, xs, ys)
dask_client.shutdown()
print('max error=',
max([ abs(seq_avg[i]-par_avg[i]) for i in range(len(seq_avg))]))
if __name__ == '__main__': main()