Trying to bin some geolocated data using scipy stats.binned_statisc_2d
but it seems there cannot exist any negative values in the data. Is there a way to do this accurately and fast?
import numpy as np
ilats = np.linspace(90,-90, 4000)
ilons = np.linspace(-180, 179.955, 8000)
values = np.array([17,-14, -7,-8,-11,-8,-7,-8,-10,-5,-3,
-12,-5, -6,21,30, 2, 4,-8, 6, 4, 7,
3,-6,-13, 21, 4, 5,11,-6, 8,-5,-6,
9,8,-8, -2,-16,-5,-5,-9,-4,-6,33,
-8,-5,-14,-8,-11,21,24,-7,-13,12,-6,
5,7,8,-3,-3,-4, 4, 9,-3, 9,-11,
-8,6,4, 8,-6,-6,-4,-3, 4, 5,11,
-3,-6,-4,-8,-4,12,-9,-8,15,-10,-5,
-4,12,5,-4, 4, 7,-13, 5,-4,-4,-5,
-8,-10,-9,-7.])
lats = np.array([ 6.7427, 42.7027, 42.6963, 10.3688, 37.5713, 37.5798,
-12.1563, 42.7127, 41.7457, 37.8122, 37.66 , 41.7456,
41.7457, 38.4462, 8.5418, -12.7309, -10.9395, -10.9464,
38.0641, -10.9507, -12.7316, -10.9313, -12.7235, 37.6469,
38.1234, 20.3964, -12.0847, -12.0844, 10.3794, 38.1302,
10.3627, 38.1582, 38.1463, 22.6466, 20.4246, 38.1401,
-36.6505, 38.2352, 37.8795, 40.2281, 37.8125, 42.323 ,
37.8775, 9.3717, 38.732 , 38.7202, 38.2688, 38.9148,
38.9414, -4.8618, -4.8525, 39.0108, 38.8187, -6.5067,
38.009 , -6.5174, -6.5101, -6.51 , 37.7243, 37.7512,
37.7215, -6.4902, -6.5113, 37.5409, 1.9481, 37.6398,
-6.5073, 37.8037, -11.133 , 9.0896, 38.177 , 9.089 ,
37.8708, 38.3848, -3.553 , 9.4345, -3.5343, -3.5769,
37.6847, 37.6045, 37.8857, 38.32 , 8.1673, 37.8822,
37.9113, 8.6278, 37.5652, 37.8236, 37.8593, 8.6219,
-3.5614, 37.924 , 37.7845, 37.8436, 37.8666, 37.6804,
37.639 , 40.7691, 40.7744, 37.8029, 42.9793, 8.207 ,
39.302 ])
lons = np.array([ 60.8964, -96.1017, -96.1049, 71.595 , -97.0008, -97.0126,
57.4887, -96.109 , -95.1058, -97.1088, -96.6413, -95.1054,
-95.1062, -95.2395, 58.3938, -73.7145, -70.626 , -70.5864,
-95.5678, -70.5914, -73.7525, -70.6048, -73.753 , -96.7662,
-95.504 , 100.3965, -70.7921, -70.7905, 71.5499, -95.4816,
71.5457, -95.326 , -95.3355, 96.8339, 100.2684, -95.8697,
39.1031, -95.4456, -96.3814, -94.5726, -96.3782, -95.4554,
-96.3797, -66.7449, -95.1513, -95.1465, -95.0972, -95.2498,
-95.2054, 84.2004, 84.21 , -94.5695, -94.9174, 114.0945,
-95.942 , 114.0592, 114.0956, 114.0873, -96.4689, -96.4599,
-96.463 , 114.0741, 114.0975, -96.582 , 117.2901, -96.572 ,
114.0561, -96.5539, -74.9417, 71.3391, -95.4253, 71.2452,
-96.5511, -95.065 , 107.5832, 71.3906, 107.6005, 107.4975,
-96.9722, -96.9307, -96.2627, -95.1745, 72.5249, -96.2632,
-96.3324, 57.9562, -96.9309, -96.5123, -96.589 , 57.9627,
107.6405, -96.2711, -96.5737, -96.2344, -96.2099, -96.5062,
-96.5248, -94.8421, -94.8522, -96.5873, -97.1523, 72.4707,
-95.0489])
ret = stats.binned_statistic_2d(lons, lats, values, 'count', bins=[ilons, ilats])
Trying to examine the ungridded data, values
by gridding them on a coarse grid (ilats, ilons) and plotting the counts first case and mean later on. But the above results produce:
--------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [52], in <cell line: 1>()
----> 1 ret = stats.binned_statistic_2d(lons, lats, ka, 'count', bins=[ilons, ilats])
File ~/anaconda3/envs/synrad/lib/python3.8/site-packages/scipy-1.9.3-py3.8-linux-x86_64.egg/scipy/stats/_binned_statistic.py:352, in binned_statistic_2d(x, y, values, statistic, bins, range, expand_binnumbers)
349 xedges = yedges = np.asarray(bins, float)
350 bins = [xedges, yedges]
--> 352 medians, edges, binnumbers = binned_statistic_dd(
353 [x, y], values, statistic, bins, range,
354 expand_binnumbers=expand_binnumbers)
356 return BinnedStatistic2dResult(medians, edges[0], edges[1], binnumbers)
File ~/anaconda3/envs/synrad/lib/python3.8/site-packages/scipy-1.9.3-py3.8-linux-x86_64.egg/scipy/stats/_binned_statistic.py:571, in binned_statistic_dd(sample, values, statistic, bins, range, expand_binnumbers, binned_statistic_result)
569 if binned_statistic_result is None:
570 nbin, edges, dedges = _bin_edges(sample, bins, range)
--> 571 binnumbers = _bin_numbers(sample, nbin, edges, dedges)
572 else:
573 edges = binned_statistic_result.bin_edges
File ~/anaconda3/envs/synrad/lib/python3.8/site-packages/scipy-1.9.3-py3.8-linux-x86_64.egg/scipy/stats/_binned_statistic.py:752, in _bin_numbers(sample, nbin, edges, dedges)
750 if dedges_min == 0:
751 raise ValueError('The smallest edge difference is numerically 0.')
-->** 752 decimal = int(-np.log10(dedges_min)) + 6**
753 # Find which points are on the rightmost edge.
754 on_edge = np.where((sample[:, i] >= edges[i][-1]) &
755 (np.around(sample[:, i], decimal) ==
756 np.around(edges[i][-1], decimal)))[0]
ValueError: cannot convert float NaN to integer
It looks like there is a log operation and I don't see a way around it.