You said there were around 50 stations. That is a very small number. Bruteforce is quadratic for this problem: if there are 50 stations, then there are only 1250 pairs of stations. No need for pyspark, nor for any clever algorithm.
from random import random
from math import dist, inf
from operator import itemgetter
stations = ['Lizard station', 'Pencil station', 'Answer station', 'Plastic station', 'Apple station', 'Raincoat station', 'Traffic station', 'Quill station', 'Painting station', 'Fountain station', 'Easter station', 'Market station', 'France station', 'Lamp station', 'Branch station', 'Rainbow station', 'Television station', 'Army station', 'Elephant station', 'Zoo station', 'Oyster station', 'Helmet station', 'Spoon station', 'Denmark station', 'Lighter station', 'Balloon station', 'Russia station', 'Tomato station', 'Glass station', 'Iron station', 'Pillow station', 'Refrigerator station', 'Truck station', 'Parrot station', 'Church station', 'Energy station', 'Vase station', 'House station', 'Kitchen station', 'Garage station', 'Hospital station', 'Belgium station', 'Vulture station', 'Orange station', 'Disease station', 'Rain station', 'Jelly station', 'Animal station', 'Crayon station', 'Rocket station']
data = [(station, 10*random(), 10*random()) for station in stations]
distances = [[dist((lats,lngs), (latt,lngt)) for (t,latt,lngt) in data] for (s,lats, lngs) in data]
for i in range(len(distances)): # forbid a station from being "closest to itself"
distances[i][i] = inf
closest_stations_idx = [min(enumerate(d), key=itemgetter(1))[0] for d in distances]
closest_stations = [stations[i] for i in closest_stations_idx]
print('# {:20s} {:4s} {:4s} {}'.format('Station', 'lat', 'lng', 'Closest station'))
for (s,lat,lng),t in zip(data, closest_stations):
print('# {:20s} {:.2f} {:.2f} {}'.format(s, lat, lng, t))
# Station lat lng Closest station
# Lizard station 2.10 2.86 Garage station
# Pencil station 3.32 1.87 Garage station
# Answer station 2.37 6.55 Glass station
# Plastic station 4.57 0.62 Easter station
# Apple station 1.71 9.74 Elephant station
# Raincoat station 7.89 2.11 Kitchen station
# Traffic station 6.78 0.16 Orange station
# Quill station 1.59 9.92 Elephant station
# Painting station 6.67 4.78 Jelly station
# Fountain station 3.35 4.15 Energy station
# Easter station 4.30 0.97 Plastic station
# Market station 3.76 9.98 Lamp station
# France station 0.27 7.82 Denmark station
# Lamp station 5.42 9.97 House station
# Branch station 4.23 2.03 Belgium station
# Rainbow station 0.61 3.66 Lizard station
# Television station 7.20 9.21 House station
# Army station 5.24 2.48 Rocket station
# Elephant station 1.65 9.82 Apple station
# Zoo station 9.82 5.22 Parrot station
# Oyster station 2.91 1.08 Pencil station
# Helmet station 6.96 3.56 Balloon station
# Spoon station 1.82 9.01 Disease station
# Denmark station 1.06 9.17 Spoon station
# Lighter station 9.84 9.00 Rain station
# Balloon station 6.46 3.24 Helmet station
# Russia station 1.72 8.60 Disease station
# Tomato station 7.65 1.11 Vase station
# Glass station 2.52 7.86 Disease station
# Iron station 8.50 8.07 Rain station
# Pillow station 7.77 9.66 Television station
# Refrigerator station 4.54 4.24 Fountain station
# Truck station 5.33 8.19 Vulture station
# Parrot station 8.45 6.20 Zoo station
# Church station 6.05 0.12 Orange station
# Energy station 3.32 4.55 Fountain station
# Vase station 7.81 1.89 Raincoat station
# House station 6.98 9.79 Television station
# Kitchen station 7.79 2.21 Raincoat station
# Garage station 3.15 2.19 Pencil station
# Hospital station 8.94 1.99 Raincoat station
# Belgium station 4.25 1.80 Branch station
# Vulture station 4.04 8.01 Truck station
# Orange station 6.58 0.25 Traffic station
# Disease station 1.99 8.77 Spoon station
# Rain station 9.05 9.10 Lighter station
# Jelly station 6.63 5.89 Painting station
# Animal station 5.34 1.20 Plastic station
# Crayon station 1.99 1.55 Oyster station
# Rocket station 5.93 2.61 Army station
Displaying the resulting directed graph:
import networkx as nx
import matplotlib.pyplot as plt
G = nx.DiGraph()
G.add_edges_from((s.removesuffix(' station'), t.removesuffix(' station')) for s,t in zip(stations, closest_stations))
nx.draw_networkx(G, pos={s.removesuffix(' station'): (lat, lng) for s,lat,lng in data}, with_labels=True)
plt.show()
