I have a data frame of ~17,000 lat
/lon
values that I wish to use in order to obtain and populate a new column with the equivalent state.
So far, I have tried several solutions (far too many to list here, but more than ten) that were suggested in other Stack Overflow answers but none have worked for me.
The closest I have come to finding a solution is to use the ggmap
package, but the problem is that I am warned that I have exceeded the limit, despite only sending a single lat
/lon
value to it.
I have individual lat
and lon
values and have even combined them into lat,lon
format too and despite this, none of the aforementioned solutions work for me.
What I want to do is determine the state from a given lat/lon
/coord
value and save the state in a new column (df$state
).
I initially matched all city values in order to obtain the matching state, but the problem there was that as several states contain cities with the same name, the matching process stopped after the first successful match; as a result, I found myself with over 2,800 cities belonging to AK despite them being literally several thousand miles away.
Any suggestions would be great.
Here are the first 100 rows of the coords
, lat
and lon
columns of my data:
structure(list(origin_coords = c("31.9618,-83.0588", "44.8782,-69.4718",
"37.3894,-121.8868", "36.0485,-93.5044", "37.652,-120.7292",
"33.7942,-84.2018", "32.0749,-81.0883", "31.0286,-97.6115", "40.7559,-111.8967",
"39.8359,-91.7538", "35.922,-80.537", "39.8036,-75.0058", "43.072,-83.8424",
"33.5207,-86.8025", "26.1216,-80.1288", "31.9618,-83.0588", "31.9618,-83.0588",
"61.6303,-149.8181", "33.8687,-84.3351", "42.2196,-88.2426",
"31.7943,-85.5581", "28.3067,-80.6862", "39.1157,-94.6271", "33.831,-85.7752",
"39.2655,-76.4935", "32.9824,-87.7919", "61.6303,-149.8181",
"31.086,-85.7192", "31.9618,-83.0588", "39.9048,-75.2946", "34.1132,-117.3771",
"41.905,-71.1026", "42.3921,-97.4751", "31.2627,-86.7711", "42.5864,-71.4401",
"33.7935,-93.807", "39.0097,-123.6523", "61.6303,-149.8181",
"37.7235,-85.9769", "38.0624,-87.2452", "37.7166,-121.9226",
"42.9993,-88.2196", "40.6316,-74.0927", "43.0892,-77.436", "39.8359,-91.7538",
"38.5487,-89.5413", "35.833,-90.6965", "41.363,-89.0008", "37.7953,-95.9368",
"33.4581,-83.0802", "33.7546,-93.6735", "32.7491,-96.4598", "41.8858,-87.6181",
"40.7328,-74.0755", "31.2627,-86.7711", "31.9618,-83.0588", "61.6303,-149.8181",
"38.4642,-85.7775", "40.6344,-92.9219", "37.8366,-89.1424", "42.5648,-83.0701",
"39.5394,-76.3564", "33.8687,-84.3351", "41.4564,-90.7235", "42.0122,-87.8417",
"38.8339,-104.8214", "36.4442,-92.5832", "39.838,-104.9988",
"41.8378,-87.7602", "28.3051,-81.4242", "41.6052,-71.9808", "40.7808,-80.0592",
"40.5364,-89.1885", "31.9618,-83.0588", "40.8915,-74.0119", "43.2078,-91.2976",
"34.4574,-83.476", "36.4105,-92.1951", "40.0177,-75.2594", "36.0557,-96.0602",
"44.694,-85.6763", "61.6303,-149.8181", "40.7446,-73.9345", "29.1989,-82.0874",
"26.6048,-80.2149", "34.6909,-118.1491", "39.0289,-95.2086",
"35.4074,-93.1355", "36.2523,-92.6907", "45.2097,-123.2043",
"37.7953,-95.9368", "61.6303,-149.8181", "39.1157,-94.6271",
"33.5793,-86.6375", "40.3757,-86.3201", "40.6344,-92.9219", "39.8359,-91.7538",
"42.3921,-97.4751", "41.2564,-73.2111", "44.2767,-121.1896"),
origin_lat = c(31.9618, 44.8782, 37.3894, 36.0485, 37.652,
33.7942, 32.0749, 31.0286, 40.7559, 39.8359, 35.922, 39.8036,
43.072, 33.5207, 26.1216, 31.9618, 31.9618, 61.6303, 33.8687,
42.2196, 31.7943, 28.3067, 39.1157, 33.831, 39.2655, 32.9824,
61.6303, 31.086, 31.9618, 39.9048, 34.1132, 41.905, 42.3921,
31.2627, 42.5864, 33.7935, 39.0097, 61.6303, 37.7235, 38.0624,
37.7166, 42.9993, 40.6316, 43.0892, 39.8359, 38.5487, 35.833,
41.363, 37.7953, 33.4581, 33.7546, 32.7491, 41.8858, 40.7328,
31.2627, 31.9618, 61.6303, 38.4642, 40.6344, 37.8366, 42.5648,
39.5394, 33.8687, 41.4564, 42.0122, 38.8339, 36.4442, 39.838,
41.8378, 28.3051, 41.6052, 40.7808, 40.5364, 31.9618, 40.8915,
43.2078, 34.4574, 36.4105, 40.0177, 36.0557, 44.694, 61.6303,
40.7446, 29.1989, 26.6048, 34.6909, 39.0289, 35.4074, 36.2523,
45.2097, 37.7953, 61.6303, 39.1157, 33.5793, 40.3757, 40.6344,
39.8359, 42.3921, 41.2564, 44.2767), origin_lon = c(-83.0588,
-69.4718, -121.8868, -93.5044, -120.7292, -84.2018, -81.0883,
-97.6115, -111.8967, -91.7538, -80.537, -75.0058, -83.8424,
-86.8025, -80.1288, -83.0588, -83.0588, -149.8181, -84.3351,
-88.2426, -85.5581, -80.6862, -94.6271, -85.7752, -76.4935,
-87.7919, -149.8181, -85.7192, -83.0588, -75.2946, -117.3771,
-71.1026, -97.4751, -86.7711, -71.4401, -93.807, -123.6523,
-149.8181, -85.9769, -87.2452, -121.9226, -88.2196, -74.0927,
-77.436, -91.7538, -89.5413, -90.6965, -89.0008, -95.9368,
-83.0802, -93.6735, -96.4598, -87.6181, -74.0755, -86.7711,
-83.0588, -149.8181, -85.7775, -92.9219, -89.1424, -83.0701,
-76.3564, -84.3351, -90.7235, -87.8417, -104.8214, -92.5832,
-104.9988, -87.7602, -81.4242, -71.9808, -80.0592, -89.1885,
-83.0588, -74.0119, -91.2976, -83.476, -92.1951, -75.2594,
-96.0602, -85.6763, -149.8181, -73.9345, -82.0874, -80.2149,
-118.1491, -95.2086, -93.1355, -92.6907, -123.2043, -95.9368,
-149.8181, -94.6271, -86.6375, -86.3201, -92.9219, -91.7538,
-97.4751, -73.2111, -121.1896)), row.names = c(NA, 100L), class = "data.frame")