I have been working on this dataset for a while now trying to plot some coordinates on a simple map of the US to create a density map. I easily created the map in Tableau, but I want to be able to make a similar map in Python. The following picture is the end-goal made in Tableau:
Below is what I have coded so far. I believe I have gotten around needing to mask this dataset as I have stripped out everything but the contiguous US, but I would like to know how to create a mask in the future to save time and have more options.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import timedelta, date
import matplotlib
from pandas import Series, DataFrame
import geopandas as gpd
from geopandas import GeoDataFrame
from shapely.geometry import Point, mapping
from rasterio import mask as msk
import fiona
# import data
df = pd.read_csv('./Data/complete.csv', on_bad_lines='skip') # csv had some bad data, had to skip
df = df.drop('Unnamed: 11', axis=1)
# remove Canadian provinces (and nulls) using list of states (including DC)
# remove AK and HI since only plotting contiguous US
list_of_states = ['AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA',
'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME',
'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM',
'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']
df['state'] = df['state'].str.upper() # convert column to uppercase
df = df[df['state'].isin(list_of_states)]
df = df[df['country'].isin(['us'])]
# clean the data
# remove values that contain '/' and 'q'
df = df.drop(df[df.latitude.str.contains(r'[/q]')].index)
# convert lat/long to float
df['latitude'] = df.latitude.astype('float')
df['longitude'] = df.longitude.astype('float')
A sample of my data:
# import shapefile
us_map = gpd.read_file(r'./Data/USA_States_(Generalized)/USA_States_Generalized.shp')
#remove AK and HI
us_map = us_map[~us_map['STATE_NAME'].isin(['Alaska', 'Hawaii'])]
# create geodataframe of lats and longs
geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]
points = GeoDataFrame(df, geometry=geometry)
This is where I am unsure of the best path as each path is giving me trouble. I have tried using Fiona and rasterio to create a mask (see below), but the I cannot figure out how to fix my AttributeError.
with fiona.open('./Data/USA_States_(Generalized)/USA_States_Generalized.shp', 'r') as shapefile:
for feature in shapefile:
shapes = [feature['geometry']]
us_array = msk.mask(points.geometry, us_map, shapes, crop=True)
The error:
AttributeError Traceback (most recent call last)
/var/folders/ll/77znv_6d7jng9_kjkpdllhbh0000gn/T/ipykernel_2471/3613015337.py in <module>
4 shapes = [feature['geometry']]
5
----> 6 us_array = msk.mask(points.geometry, us_map, shapes, crop=True)
~/opt/anaconda3/envs/PopulationDensity/lib/python3.7/site-packages/rasterio/mask.py in mask(dataset, shapes, all_touched, invert, nodata, filled, crop, pad, pad_width, indexes)
171
172 if nodata is None:
--> 173 if dataset.nodata is not None:
174 nodata = dataset.nodata
175 else:
~/opt/anaconda3/envs/PopulationDensity/lib/python3.7/site-packages/pandas/core/generic.py in __getattr__(self, name)
5485 ):
5486 return self[name]
-> 5487 return object.__getattribute__(self, name)
5488
5489 def __setattr__(self, name: str, value) -> None:
AttributeError: 'GeoSeries' object has no attribute 'nodata'
The rasterio documentation states the first argument must be a raster, so I then tried to convert my dataframe to a raster using the following:
# need to convert coords to raster so fiona will work?
# last line not working
import xarray as xr
import rioxarray
da = df.set_index('ID').to_xarray()
# promote the data variables model lat/long to 2d coordinates
da = da.set_coords(['latitude', 'longitude'])
da.rio.to_raster('./Data/UFO_Sightings_Coords_Shapefile.shp')
with an error following:
MissingSpatialDimensionError Traceback (most recent call last)
/var/folders/ll/77znv_6d7jng9_kjkpdllhbh0000gn/T/ipykernel_2471/1071145538.py in <module>
9 # promote the data variables model lat/long to 2d coordinates
10 da = da.set_coords(['latitude', 'longitude'])
---> 11 da.rio.to_raster('./Data/UFO_Sightings_Coords_Shapefile.shp')
~/opt/anaconda3/envs/PopulationDensity/lib/python3.7/site-packages/rioxarray/raster_dataset.py in to_raster(self, raster_path, driver, dtype, tags, windowed, recalc_transform, lock, compute, **profile_kwargs)
477 # write it to a raster
478 return data_array.rio.set_spatial_dims(
--> 479 x_dim=self.x_dim,
480 y_dim=self.y_dim,
481 inplace=True,
~/opt/anaconda3/envs/PopulationDensity/lib/python3.7/site-packages/rioxarray/rioxarray.py in x_dim(self)
790 return self._x_dim
791 raise MissingSpatialDimensionError(
--> 792 "x dimension not found. 'rio.set_spatial_dims()' or "
793 "using 'rename()' to change the dimension name to 'x' can address this."
794 f"{_get_data_var_message(self._obj)}"
MissingSpatialDimensionError: x dimension not found. 'rio.set_spatial_dims()' or using 'rename()' to change the dimension name to 'x' can address this.
I'm not sure where to go from here.