I have one dataframe which contains names of stations and its coordinates. I want for every station the nearest station based on its coordinates.
What i have is 2 functions:
import math
def dist2(lat1, long1, lat2, long2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lat1, long1, lat2, long2 = map(lambda x: x*pi /180.0, [lat1, long1, lat2, long2])
# haversine formula
dlon = long2 - long1
dlat = lat2 - lat1
a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
c = 2 * math.asin(math.sqrt(a))
# Radius of earth in kilometers is 6371
km = 6371* c
return km
def find_nearest2(lat, lng):
min_dist =100000
min_index = None
distances = df_onlystations_clean.apply(
lambda row: dist2(lat, lng, row['lat'], row['lng']),
axis=1)
if d < min_dist:
min_dist = d
return df_onlystations_clean.loc[min_dist.idxmin(), 'name']
df_onlystations_clean.apply(
lambda row: find_nearest2(row['lat'], row['lng']),
axis=1)
I always get the same error: 'DataFrame' object has no attribute 'apply' How can I loop through the df. What I'm doing wrong??