I have a year wise (1980-2020) precipitation data set in netCDF format. I am importing them in xarray to have 40 years of merged precipitation values:
import netCDF4
import numpy
import xarray as xr
import pandas as pd
prcp=xr.open_mfdataset('/home/hrsa/Sayantan/HAR_V2/prcp/HARv2_d10km_d_2d_prcp_*.nc',combine = 'nested', concat_dim="time")
prcp
which renders:
xarray.Dataset
Dimensions:
time: 14976west_east: 381south_north: 252
Coordinates:
time
(time)
datetime64[ns]
1980-01-01 ... 2020-12-31
west_east
(west_east)
float32
-1.675e+06 -1.665e+06 ... 2.125e+06
south_north
(south_north)
float32
-7.45e+05 -7.35e+05 ... 1.765e+06
lon
(south_north, west_east)
float32
dask.array<chunksize=(252, 381), meta=np.ndarray>
lat
(south_north, west_east)
float32
dask.array<chunksize=(252, 381), meta=np.ndarray>
Data variables:
prcp
(time, south_north, west_east)
float32
dask.array<chunksize=(366, 252, 381), meta=np.ndarray>
Attributes: (33)
This a large dataset, hence I am required to subset it according to an SRTM image whose extents (in EPSG:4326) is defined as
# Extents of the SRTM DEM covering Panchi_B and the SASE AWS/Base Camp
min_lon = 77.0
min_lat = 32.0
max_lon = 78.0
max_lat = 33.0
In order to subset according to above coordinates I have tried the following:
prcp = prcp.sel(lat = slice(min_lat,max_lat), lon = slice(min_lon,max_lon))
the Error output:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~/.pyenv/versions/3.9.7/envs/v3.9.7/lib/python3.9/site-packages/xarray/core/indexing.py:73, in group_indexers_by_index(data_obj, indexers, method, tolerance)
72 try:
---> 73 index = xindexes[key]
74 coord = data_obj.coords[key]
KeyError: 'lat'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
Input In [25], in <cell line: 1>()
----> 1 prcp = prcp.sel(lat = slice(min_lat,max_lat), lon = slice(min_lon,max_lon))
File ~/.pyenv/versions/3.9.7/envs/v3.9.7/lib/python3.9/site-packages/xarray/core/dataset.py:2501, in Dataset.sel(self, indexers, method, tolerance, drop, **indexers_kwargs)
2440 """Returns a new dataset with each array indexed by tick labels
2441 along the specified dimension(s).
2442
(...)
2498 DataArray.sel
2499 """
2500 indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "sel")
-> 2501 pos_indexers, new_indexes = remap_label_indexers(
2502 self, indexers=indexers, method=method, tolerance=tolerance
2503 )
2504 # TODO: benbovy - flexible indexes: also use variables returned by Index.query
2505 # (temporary dirty fix).
2506 new_indexes = {k: v[0] for k, v in new_indexes.items()}
File ~/.pyenv/versions/3.9.7/envs/v3.9.7/lib/python3.9/site-packages/xarray/core/coordinates.py:421, in remap_label_indexers(obj, indexers, method, tolerance, **indexers_kwargs)
414 indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "remap_label_indexers")
416 v_indexers = {
417 k: v.variable.data if isinstance(v, DataArray) else v
418 for k, v in indexers.items()
419 }
--> 421 pos_indexers, new_indexes = indexing.remap_label_indexers(
422 obj, v_indexers, method=method, tolerance=tolerance
423 )
424 # attach indexer's coordinate to pos_indexers
425 for k, v in indexers.items():
File ~/.pyenv/versions/3.9.7/envs/v3.9.7/lib/python3.9/site-packages/xarray/core/indexing.py:110, in remap_label_indexers(data_obj, indexers, method, tolerance)
107 pos_indexers = {}
108 new_indexes = {}
--> 110 indexes, grouped_indexers = group_indexers_by_index(
111 data_obj, indexers, method, tolerance
112 )
114 forward_pos_indexers = grouped_indexers.pop(None, None)
115 if forward_pos_indexers is not None:
File ~/.pyenv/versions/3.9.7/envs/v3.9.7/lib/python3.9/site-packages/xarray/core/indexing.py:84, in group_indexers_by_index(data_obj, indexers, method, tolerance)
82 except KeyError:
83 if key in data_obj.coords:
---> 84 raise KeyError(f"no index found for coordinate {key}")
85 elif key not in data_obj.dims:
86 raise KeyError(f"{key} is not a valid dimension or coordinate")
KeyError: 'no index found for coordinate lat'
How can I resolve this issue? Any help will be appreciated, Thank you.
############# Edit (for @Robert Wilson) ##################
In order to find out the ranges, I did the following:
lon = prcp.lon.to_dataframe()
lon
lat = prcp.lat.to_dataframe()
lat