I have a pandas dataframe that contains latitude and longitude and am trying to create a function to take the coordinates and determine what state each location is in. I am using geopy to obtain location information from the coordinates.
Sample data:
If I wanted just one location, the code would be as follows:
geolocator = Nominatim(user_agent='geoapiExercises')
latitude = '38.0525'
longitude = '-85.9103'
location = geolocator.reverse(latitude + ',' + longitude)
address = location.raw['address']
state = address.get('state', '')
However, I have around 10,000 coordinates that I need to find the state for. I have tried my hand at iterating over the dataframe using a for loop, but I'll admit I'm not the best at it. My current function returns a ValueError.
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
df = pd.read_csv('filepath.csv')
us_df = df.loc[df['country_name'] == 'United States of America']
def state(df):
for row in df:
# get lat & long
latitude = str(df['latitude'])
longitude = str(df['longitude'])
geolocator = Nominatim(user_agent='geoapiExercises')
location = geolocator.reverse(latitude + ',' + longitude)
address = location.raw['address']
state = address.get('state', '')
df['state'] = state
state(us_df)
ValueError Traceback (most recent call last)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/geocoders/nominatim.py:350, in Nominatim.reverse(self, query, exactly_one, timeout, language, addressdetails, zoom, namedetails)
349 try:
--> 350 lat, lon = self._coerce_point_to_string(query).split(',')
351 except ValueError:
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/geocoders/base.py:300, in Geocoder._coerce_point_to_string(self, point, output_format)
299 if not isinstance(point, Point):
--> 300 point = Point(point)
302 # Altitude is silently dropped.
303 #
304 # Geocoding services (almost?) always consider only lat and lon
(...)
307 # though, because PoIs are assumed to span the whole
308 # altitude axis (i.e. not just the 0km plane).
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/point.py:166, in Point.__new__(cls, latitude, longitude, altitude)
165 elif isinstance(arg, str):
--> 166 return cls.from_string(arg)
167 else:
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/point.py:457, in Point.from_string(cls, string)
456 else:
--> 457 raise ValueError(
458 "Failed to create Point instance from string: unknown format."
459 )
ValueError: Failed to create Point instance from string: unknown format.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[15], line 17
14 state = address.get('state', '')
15 df['state'] = state
---> 17 city_state(us_df)
Cell In[15], line 12, in city_state(df)
9 longitude = str(df['longitude'])
11 geolocator = Nominatim(user_agent='geoapiExercises')
---> 12 location = geolocator.reverse(latitude + ',' + longitude)
13 address = location.raw['address']
14 state = address.get('state', '')
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/geocoders/nominatim.py:352, in Nominatim.reverse(self, query, exactly_one, timeout, language, addressdetails, zoom, namedetails)
350 lat, lon = self._coerce_point_to_string(query).split(',')
351 except ValueError:
--> 352 raise ValueError("Must be a coordinate pair or Point")
353 params = {
354 'lat': lat,
355 'lon': lon,
356 'format': 'json',
357 }
358 if language:
ValueError: Must be a coordinate pair or Point
It says the value must be a coordinate, so I must not be iterating correctly. Any help is appreciated.
I am using Python 3.10
Edit: When using the iterrows() solution below, a TimeoutError is returned:
TimeoutError Traceback (most recent call last)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/urllib/request.py:1348, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args)
1347 try:
-> 1348 h.request(req.get_method(), req.selector, req.data, headers,
1349 encode_chunked=req.has_header('Transfer-encoding'))
1350 except OSError as err: # timeout error
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/http/client.py:1282, in HTTPConnection.request(self, method, url, body, headers, encode_chunked)
1281 """Send a complete request to the server."""
-> 1282 self._send_request(method, url, body, headers, encode_chunked)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/http/client.py:1328, in HTTPConnection._send_request(self, method, url, body, headers, encode_chunked)
1327 body = _encode(body, 'body')
-> 1328 self.endheaders(body, encode_chunked=encode_chunked)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/http/client.py:1277, in HTTPConnection.endheaders(self, message_body, encode_chunked)
1276 raise CannotSendHeader()
-> 1277 self._send_output(message_body, encode_chunked=encode_chunked)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/http/client.py:1037, in HTTPConnection._send_output(self, message_body, encode_chunked)
1036 del self._buffer[:]
-> 1037 self.send(msg)
1039 if message_body is not None:
1040
1041 # create a consistent interface to message_body
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/http/client.py:975, in HTTPConnection.send(self, data)
974 if self.auto_open:
--> 975 self.connect()
976 else:
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/http/client.py:1454, in HTTPSConnection.connect(self)
1452 server_hostname = self.host
-> 1454 self.sock = self._context.wrap_socket(self.sock,
1455 server_hostname=server_hostname)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/ssl.py:513, in SSLContext.wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, session)
507 def wrap_socket(self, sock, server_side=False,
508 do_handshake_on_connect=True,
509 suppress_ragged_eofs=True,
510 server_hostname=None, session=None):
511 # SSLSocket class handles server_hostname encoding before it calls
512 # ctx._wrap_socket()
--> 513 return self.sslsocket_class._create(
514 sock=sock,
515 server_side=server_side,
516 do_handshake_on_connect=do_handshake_on_connect,
517 suppress_ragged_eofs=suppress_ragged_eofs,
518 server_hostname=server_hostname,
519 context=self,
520 session=session
521 )
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/ssl.py:1071, in SSLSocket._create(cls, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, context, session)
1070 raise ValueError("do_handshake_on_connect should not be specified for non-blocking sockets")
-> 1071 self.do_handshake()
1072 except (OSError, ValueError):
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/ssl.py:1342, in SSLSocket.do_handshake(self, block)
1341 self.settimeout(None)
-> 1342 self._sslobj.do_handshake()
1343 finally:
TimeoutError: _ssl.c:980: The handshake operation timed out
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/adapters.py:297, in URLLibAdapter.get_text(self, url, timeout, headers)
296 try:
--> 297 page = self.urlopen(req, timeout=timeout)
298 except Exception as error:
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/urllib/request.py:519, in OpenerDirector.open(self, fullurl, data, timeout)
518 sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
--> 519 response = self._open(req, data)
521 # post-process response
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/urllib/request.py:536, in OpenerDirector._open(self, req, data)
535 protocol = req.type
--> 536 result = self._call_chain(self.handle_open, protocol, protocol +
537 '_open', req)
538 if result:
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/urllib/request.py:496, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
495 func = getattr(handler, meth_name)
--> 496 result = func(*args)
497 if result is not None:
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/urllib/request.py:1391, in HTTPSHandler.https_open(self, req)
1390 def https_open(self, req):
-> 1391 return self.do_open(http.client.HTTPSConnection, req,
1392 context=self._context, check_hostname=self._check_hostname)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/urllib/request.py:1351, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args)
1350 except OSError as err: # timeout error
-> 1351 raise URLError(err)
1352 r = h.getresponse()
URLError: <urlopen error _ssl.c:980: The handshake operation timed out>
During handling of the above exception, another exception occurred:
GeocoderTimedOut Traceback (most recent call last)
Cell In[16], line 17
14 state = address.get('state', '')
15 df.at[index, 'state'] = state
---> 17 city_state(us_df)
Cell In[16], line 12, in city_state(df)
9 longitude = str(row['longitude'])
11 geolocator = Nominatim(user_agent='geoapiExercises')
---> 12 location = geolocator.reverse(latitude + ',' + longitude)
13 address = location.raw['address']
14 state = address.get('state', '')
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/geocoders/nominatim.py:372, in Nominatim.reverse(self, query, exactly_one, timeout, language, addressdetails, zoom, namedetails)
370 logger.debug("%s.reverse: %s", self.__class__.__name__, url)
371 callback = partial(self._parse_json, exactly_one=exactly_one)
--> 372 return self._call_geocoder(url, callback, timeout=timeout)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/geocoders/base.py:368, in Geocoder._call_geocoder(self, url, callback, timeout, is_json, headers)
366 try:
367 if is_json:
--> 368 result = self.adapter.get_json(url, timeout=timeout, headers=req_headers)
369 else:
370 result = self.adapter.get_text(url, timeout=timeout, headers=req_headers)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/adapters.py:286, in URLLibAdapter.get_json(self, url, timeout, headers)
285 def get_json(self, url, *, timeout, headers):
--> 286 text = self.get_text(url, timeout=timeout, headers=headers)
287 try:
288 return json.loads(text)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/adapters.py:315, in URLLibAdapter.get_text(self, url, timeout, headers)
313 elif isinstance(error, URLError):
314 if "timed out" in message:
--> 315 raise GeocoderTimedOut("Service timed out")
316 elif "unreachable" in message:
317 raise GeocoderUnavailable("Service not available")
GeocoderTimedOut: Service timed out