0

I have a very large data set (over 60K rows), a subset of which for a particular data type is around 32K rows). Sample data using dput is given below (first 200 rows of Water). My question is, how can I use Jaccard distance from the proxy package to remove data in the data set that is say 95% similar and keep the remainder. Is there a way to use the output from the Jaccard distance function in the proxy package to do this?

structure(list(Corrected.Diameter.Pixels = c(12.19019, 68.27398, 47.76143, 73.22346, 12.19019, 68.27398, 47.76143, 73.22346, 22.75866, 29.18328, 8.36828, 24.04665, 46.39632, 17.54956, 8.48163, 34.8317, 63.37687, 21.19493, 30.39247, 50.77077, 11.97492, 12.06298, 12.25475, 47.38221, 32.43994, 28.94985, 24.2578, 6.54314, 8.15978, 5.86137, 10.79281, 45.88894, 45.61855, 62.30688, 26.97145, 22.74422, 14.71437, 10.85698, 57.38691, 49.8456, 10.16822, 6.44511, 17.13741, 82.2915, 30.46339, 26.88251, 13.60706, 26.89802, 9.94159, 14.4176, 69.87079, 20.82549, 12.82834, 56.12008, 42.99599, 21.41007, 28.61327, 31.16904, 45.71646, 5.99735, 8.20189, 8.59096, 33.17984, 39.97919, 16.47927, 62.84838, 46.17857, 8.34924, 21.1896, 18.83077, 26.87241, 17.48091, 29.38473, 24.51047, 10.99448, 45.74561, 18.83606, 48.27098, 34.10088, 27.26335, 47.261, 19.94131, 11.83929, 45.76572, 31.26533, 37.23263, 66.99194, 37.03562, 37.23263, 66.99194, 37.03562, 48.47576, 26.91966, 9.49569, 3.85533, 17.35284, 20.63553, 35.38634, 30.92475, 39.1207, 40.94417, 7.05849, 9.33047, 48.40907, 26.53372, 23.47957, 4.95074, 26.20909, 43.91702, 44.50182, 12.40077, 27.13945, 54.92627, 69.57408, 22.55181, 19.8885, 36.79568, 14.14961, 21.97367, 13.65241, 32.21456, 26.86064, 79.45784, 32.50686, 17.62748, 33.22018, 18.34666, 40.1397, 42.19999, 17.53833, 24.99026, 21.5522, 26.52919, 20.83556, 10.15603, 32.3793, 29.88744, 9.41774, 18.42085, 54.77612, 20.76089, 5.18625, 26.73453, 7.16143, 67.42748, 19.83195, 29.55424, 25.57784, 34.89938, 47.09778, 50.03208, 52.917, 34.84761, 11.71751, 43.0097, 10.7626, 19.80249, 5.27751, 29.94862, 6.53097, 35.8462, 44.63104, 16.06905, 15.96641, 49.53817, 22.80723, 26.74212, 22.07261, 6.76985, 28.21919, 19.92225, 31.41192, 38.10348, 66.85515, 23.34814, 31.64893, 9.29965, 9.29965, 28.3982, 10.8524, 42.95194, 35.25589, 32.39711, 8.73128, 6.23991, 40.09681, 24.12972, 35.72366, 13.16424, 10.16089, 14.49837, 8.83096, 28.32626, 25.79169, 40.59262, 5.29257, 66.95081, 23.22116, 36.50169, 12.97023), Contour.Slopes.Focus = c(4.3136, 25.36804, 10.92002, 2.12329, 4.3136, 25.36804, 10.92002, 2.12329, 4.30158, 6.35542, 2.11506, 3.71257, 2.89324, 4.64909, 2.70284, 2.12156, 4.55881, 9.37401, 14.45324, 33.61269, 8.59098, 4.57816, 3.48318, 3.75199, 11.25647, 16.81135, 10.61942, 2.23302, 2.87025, 2.9964, 4.05464, 2.76692, 10.31498, 7.45807, 7.63816, 10.68314, 3.13737, 3.08689, 18.50534, 15.5981, 3.95702, 2.37803, 5.98943, 23.67886, 7.09568, 22.48242, 3.29837, 2.68711, 2.16861, 6.44475, 7.15496, 2.34112, 2.06893, 3.84312, 2.08664, 4.02224, 3.72601, 4.11803, 26.3045, 2.87624, 3.98579, 3.04367, 3.39529, 5.05169, 3.61365, 19.98456, 14.06127, 2.22868, 4.26763, 10.38651, 19.54448, 3.43225, 2.41181, 3.16934, 3.82808, 23.9729, 7.45576, 2.07306, 21.02317, 5.61264, 2.05496, 2.5897, 7.24035, 2.0526, 2.10069, 2.79483, 2.55247, 2.3568, 2.79483, 2.55247, 2.3568, 15.50758, 17.59354, 4.46851, 2.5979, 2.13469, 8.90064, 11.08931, 6.44809, 4.2927, 3.47038, 5.40188, 2.67097, 3.16027, 2.3512, 19.05212, 2.0965, 15.44096, 3.41642, 3.37477, 2.9116, 4.33822, 4.91543, 5.14467, 4.45434, 9.14479, 15.53937, 4.55359, 3.43464, 4.41869, 4.44294, 6.91779, 17.98987, 3.99183, 4.33946, 8.10813, 6.29913, 15.06431, 11.24421, 2.96062, 7.0508, 2.05813, 5.67925, 5.20416, 3.78266, 3.33255, 3.41214, 4.15454, 4.92729, 2.06845, 3.48458, 2.37768, 12.21983, 3.67568, 2.92242, 4.18589, 10.19353, 5.37037, 3.74818, 2.99934, 23.26076, 4.12993, 2.06789, 2.88138, 2.62313, 5.04652, 6.90301, 2.13891, 10.06989, 2.60516, 11.75681, 4.68333, 2.66465, 3.42183, 20.50746, 10.14104, 16.13318, 11.83242, 2.9538, 6.09976, 2.1777, 10.97297, 3.66962, 2.85015, 7.96284, 6.78556, 3.17541, 3.17541, 3.39588, 2.18294, 2.48335, 2.29208, 3.46203, 2.05496, 4.23799, 18.35541, 3.36715, 2.48819, 5.91993, 2.83554, 3.1364, 2.14744, 7.4286, 8.63987, 2.72886, 2.09831, 3.33151, 2.33972, 2.36506, 2.34587), Center.Slopes.Focus = c(3.93569, 17.48096, 5.24481, 2.04047, 3.93569, 17.48096, 5.24481, 2.04047, 5.47357, 6.4319, 1.79427, 7.45559, 2.5578, 5.22557, 3.51968, 1.93345, 4.34895, 8.61821, 16.66679, 27.49446, 4.6633, 3.97783, 3.08418, 3.92054, 7.95604, 11.23728, 10.3449, 2.66223, 1.91881, 2.45007, 4.57762, 11.9696, 11.12726, 16.20871, 6.46894, 9.12789, 2.72591, 2.04958, 8.06506, 16.67047, 2.54557, 2.77529, 8.36701, 16.81984, 9.87396, 14.59361, 2.62569, 2.09149, 2.31127, 6.00849, 4.72961, 2.20902, 1.91998, 4.06132, 4.57347, 2.41457, 8.59905, 7.57387, 20.21926, 2.76708, 5.78741, 2.62601, 2.14974, 5.08722, 4.79306, 19.08451, 12.99397, 1.56353, 2.62428, 8.48093, 12.61335, 2.1338, 2.06289, 2.62905, 2.74988, 18.2123, 6.72277, 8.16906, 15.65421, 8.433, 1.79583, 2.34758, 6.86255, 2.15922, 2.03373, 6.4009, 2.4897, 1.83251, 6.4009, 2.4897, 1.83251, 14.08499, 12.91235, 4.07487, 2.21547, 2.07308, 8.29071, 12.91634, 3.82692, 8.43121, 2.97828, 2.06107, 2.32977, 9.12194, 3.18728, 9.84288, 1.65122, 10.59612, 2.73903, 2.4703, 3.2595, 7.86804, 4.82543, 13.92432, 2.90592, 10.49815, 16.18525, 4.74386, 2.05197, 4.74073, 8.98564, 7.67054, 18.01745, 9.98196, 4.22172, 5.13628, 6.88238, 19.92256, 7.89053, 2.61123, 7.40615, 2.05687, 3.51666, 2.89755, 4.73519, 2.38595, 2.68162, 2.87286, 6.4738, 1.96502, 1.97548, 3.03498, 9.24552, 5.75448, 3.67827, 2.51072, 6.77992, 3.50957, 2.89675, 2.46563, 9.0731, 15.51868, 2.08016, 2.3717, 2.50213, 6.43711, 8.18983, 2.0291, 10.35156, 2.10799, 15.97664, 12.49868, 2.313, 2.79687, 16.33956, 8.74962, 10.94272, 11.17668, 2.41257, 9.07319, 2.03425, 11.82396, 6.73517, 3.15668, 6.56735, 5.03541, 4.49669, 4.49669, 2.72608, 1.69062, 2.10132, 2.19671, 7.70856, 2.2093, 2.21091, 9.58419, 2.56105, 2.24081, 8.01145, 1.86848, 5.16671, 1.84117, 10.75741, 6.8423, 8.78951, 1.41239, 3.76303, 2.01126, 6.49525, 2.7017), Hollowness = c(0.52616, 0.37595, 0.68236, 1.05521, 0.52616, 0.37595, 0.68236, 1.05521, 0.59558, 0.4549, 1.02565, 0.3604, 0.99252, 0.32438, 0.35027, 1.13396, 0.83684, 0.50703, 0.47906, 0.45009, 1.64756, 0.4937, 0.62383, 0.99405, 0.90267, 0.33659, 0.71159, 1.07434, 1.12326, 1.22678, 0.5452, 0.49734, 0.38534, 0.43372, 0.94245, 0.59777, 1.01532, 1.10199, 0.3537, 0.45786, 1.05529, 1.12316, 0.89381, 0.4182, 0.428, 0.4253, 1.16391, 1.09938, 1.06376, 0.72487, 0.72082, 1.13886, 1.0306, 0.95027, 0.61007, 1.04794, 0.53296, 0.7093, 0.38053, 1.12633, 1.176, 1.18966, 0.9997, 0.99609, 0.419, 0.3866, 0.31941, 1.20569, 1.0829, 0.38096, 0.37841, 1.04863, 1.10137, 1.09964, 1.10824, 0.36615, 0.54242, 0.49812, 0.44068, 0.39192, 1.13573, 1.01345, 0.7259, 1.13595, 1.0808, 0.52761, 1.01203, 1.14371, 0.52761, 1.01203, 1.14371, 0.49288, 0.3546, 0.67057, 1.18182, 1.06765, 0.4982, 0.39265, 1.02071, 0.335, 1.00649, 1.32267, 0.78909, 0.41254, 0.65074, 1.29995, 1.02355, 0.53661, 1.07034, 1.03299, 0.53751, 0.32311, 0.88956, 0.50373, 1.00173, 0.38705, 0.39043, 0.40523, 1.06139, 1.19208, 0.42918, 0.95767, 0.39285, 0.4552, 1.09705, 0.94378, 1.01756, 0.45299, 0.5367, 0.68791, 0.35151, 1.02035, 1.02949, 1.04821, 0.99493, 1.14203, 1.05309, 0.67721, 0.26618, 1.17289, 1.0328, 1.1094, 0.3134, 0.88318, 0.9364, 1.0529, 0.95072, 1.02907, 0.99449, 1.00704, 0.46449, 0.46578, 1.13423, 1.05147, 1.0165, 1.12441, 0.74086, 0.83732, 0.40251, 1.18107, 0.48469, 0.51293, 1.01394, 1.05956, 0.34575, 0.63879, 0.36294, 0.37995, 1.15976, 0.4365, 1.09591, 0.35233, 0.53878, 0.92687, 0.22597, 1.03128, 0.43234, 0.43234, 1.0036, 1.1369, 1.15127, 1.16154, 0.51647, 1.15784, 1.22872, 0.55414, 1.03428, 1.10664, 0.72898, 1.09186, 0.31093, 1.08589, 0.35295, 0.98539, 0.42157, 0.90694, 0.8678, 1.04131, 0.44712, 0.92105), Ellipse.Best.Fit = c(0.12686, 0.06242, 0.0399, 0.08292, 0.12686, 0.06242, 0.0399, 0.08292, 0.0869, 0.10098, 0.22388, 0.09176, 0.06762, 0.10612, 0.13379, 0.09515, 0.0345, 0.09968, 0.1, 0.06043, 0.21739, 0.11979, 0.13131, 0.04318, 0.05479, 0.09419, 0.0825, 0.16048, 0.11341, 0.15954, 0.11861, 0.10554, 0.06632, 0.04445, 0.0692, 0.10756, 0.09534, 0.09852, 0.04601, 0.10938, 0.11533, 0.16893, 0.09193, 0.05332, 0.09533, 0.12059, 0.08821, 0.08091, 0.12238, 0.1066, 0.03199, 0.12503, 0.12608, 0.04762, 0.07634, 0.07655, 0.1012, 0.13079, 0.07817, 0.19241, 0.14994, 0.09514, 0.0654, 0.05102, 0.0964, 0.04918, 0.07226, 0.23066, 0.08644, 0.09246, 0.09367, 0.08282, 0.10171, 0.10327, 0.15858, 0.08353, 0.1072, 0.14693, 0.09354, 0.09023, 0.08682, 0.13214, 0.12566, 0.08757, 0.11074, 0.07518, 0.06005, 0.08598, 0.07518, 0.06005, 0.08598, 0.09446, 0.10164, 0.11482, 0.32865, 0.16509, 0.13208, 0.10245, 0.06301, 0.09023, 0.0626, 0.10438, 0.12711, 0.05577, 0.08548, 0.167, 0.33919, 0.06765, 0.09811, 0.0835, 0.13114, 0.11114, 0.03856, 0.06691, 0.13623, 0.11268, 0.07032, 0.12544, 0.08775, 0.10234, 0.11976, 0.06556, 0.05994, 0.10356, 0.10274, 0.05077, 0.08209, 0.07633, 0.11856, 0.10702, 0.08867, 0.13358, 0.07553, 0.07519, 0.14972, 0.09807, 0.08352, 0.17224, 0.09787, 0.08938, 0.07829, 0.12535, 0.09875, 0.17592, 0.04693, 0.08057, 0.06502, 0.07306, 0.06113, 0.08202, 0.07735, 0.05906, 0.09861, 0.1093, 0.06698, 0.2418, 0.09116, 0.47288, 0.10737, 0.1312, 0.11815, 0.10209, 0.09471, 0.085, 0.06287, 0.10586, 0.08374, 0.10256, 0.17284, 0.10033, 0.13071, 0.13577, 0.07558, 0.04658, 0.1316, 0.06445, 0.11592, 0.11592, 0.06354, 0.14166, 0.07837, 0.09737, 0.10549, 0.15289, 0.1791, 0.09515, 0.07661, 0.0822, 0.06911, 0.09861, 0.08877, 0.11622, 0.10625, 0.08489, 0.09665, 0.47287, 0.03571, 0.10801, 0.09309, 0.12792), Ellipse.Minor.Major = c(0.97484, 0.87149, 0.97655, 0.94141, 0.97484, 0.87149, 0.97655, 0.94141, 0.93112, 0.92623, 0.68616, 0.94106, 0.96672, 0.9292, 0.98265, 0.9126, 0.94138, 0.89859, 0.86031, 0.90321, 0.41054, 0.74691, 0.79333, 0.91219, 0.92769, 0.9378, 0.80746, 0.6985, 0.87451, 0.65704, 0.88385, 0.78848, 0.90607, 0.95438, 0.96349, 0.79419, 0.95932, 0.975, 0.94095, 0.78623, 0.79446, 0.67024, 0.8857, 0.86376, 0.92037, 0.87893, 0.89415, 0.95029, 0.85487, 0.78298, 0.97361, 0.94482, 0.92723, 0.93701, 0.96975, 0.83343, 0.97602, 0.86225, 0.89309, 0.72466, 0.88954, 0.92641, 0.9625, 0.93398, 0.90222, 0.93317, 0.91668, 0.70301, 0.89221, 0.97242, 0.86125, 0.88034, 0.97066, 0.98221, 0.64364, 0.88755, 0.94707, 0.75962, 0.86182, 0.94778, 0.99107, 0.9868, 0.90752, 0.95454, 0.94929, 0.9311, 0.93094, 0.98346, 0.9311, 0.93094, 0.98346, 0.81644, 0.91833, 0.85131, 0.5621, 0.91423, 0.97931, 0.86834, 0.95444, 0.87997, 0.94614, 0.78038, 0.87974, 0.98127, 0.9534, 0.58513, 0.69292, 0.88587, 0.8617, 0.94961, 0.82541, 0.96436, 0.94692, 0.8396, 0.82141, 0.93629, 0.93202, 0.952, 0.9566, 0.93307, 0.87949, 0.93758, 0.87382, 0.84368, 0.95025, 0.9825, 0.95884, 0.91336, 0.87575, 0.913, 0.95621, 0.94325, 0.95315, 0.92453, 0.81239, 0.97447, 0.93389, 0.67998, 0.93045, 0.93555, 0.87384, 0.56281, 0.97646, 0.94133, 0.98437, 0.93116, 0.96405, 0.92715, 0.93464, 0.96043, 0.9413, 0.93246, 0.952, 0.96144, 0.97202, 0.76449, 0.84536, 0.44224, 0.95637, 0.88116, 0.78823, 0.79391, 0.9312, 0.92753, 0.95135, 0.7575, 0.88473, 0.981, 0.6237, 0.96012, 0.89053, 0.88827, 0.93739, 0.93825, 0.92609, 0.94267, 0.90009, 0.90009, 0.9272, 0.92499, 0.98394, 0.9474, 0.91858, 0.9143, 0.88942, 0.89033, 0.95715, 0.94818, 0.91697, 0.93259, 0.95047, 0.9039, 0.88334, 0.92442, 0.80908, 0.57049, 0.9211, 0.95429, 0.9716, 0.84649), Ellipse.Angle = c(-0.26549, 47.87729, 32.81444, 0.41248, -0.26549, 47.87729, 32.81444, 0.41248, -84.62616, 33.2692, 30.52107, 59.02478, 10.3997, 15.29359, 79.84372, 17.00218, 1.69621, 11.81746, -79.6879, 87.38062, 5.98631, 6.19804, 61.87537, 35.6207, 73.81332, 79.533, -3.37468, -3.46536, 4.92232, 4.54409, -88.30457, -81.54878, 37.10699, 77.15878, 9.88208, 9.27422, 28.16016, 2.34975, 16.60769, 57.14313, 3.7687, 0.27241, 0.7656, -73.19455, 68.05791, -86.29077, 6.0115, 41.2951, 7.73961, 3.2164, 25.34626, 2.0231, 70.4911, 26.23402, 62.33054, 6.76939, 25.93145, 56.36418, -87.88627, -21.23725, -85.37364, 32.00761, 35.25288, -79.7581, -24.24713, 71.69191, 36.5758, -26.63295, 8.81189, -5.42305, 65.88388, 4.44144, -84.979, -65.68634, -1.76115, -62.38568, -82.90343, -82.59503, 68.85648, 82.84439, 50.71829, -32.7039, 27.42184, 36.22755, 38.5052, 54.30583, 11.04899, -31.53599, 54.30583, 11.04899, -31.53599, 79.05298, -88.21964, -11.13059, 62.49229, 67.14563, 13.70192, -89.29182, 20.07127, -88.92424, 15.24553, 16.53206, -25.23101, -69.6471, 32.34082, 22.7326, -36.32436, 5.91715, 1.00293, 26.09496, -0.6578, 89.6088, 19.4799, 87.81131, -32.30591, 61.31126, -70.51817, -3.67815, 41.6709, 83.77933, 63.97905, 84.87115, -65.22361, 79.98466, 51.07666, 55.46004, 32.32834, -81.63506, 78.16069, -32.71989, -9.99143, 43.63046, 69.51038, 65.42319, -0.99499, 46.87006, 67.95813, -0.76465, 9.91245, 2.72037, 10.02576, -3.10905, -60.15891, 68.70296, -11.16678, 2.68246, 15.47439, 11.88357, -13.77675, 10.44487, -59.76076, 62.30257, 43.41193, 8.58439, -31.92735, -19.00837, -7.67491, -77.03242, -63.67503, 5.00948, -87.86459, 70.2644, 42.20956, -88.13396, 34.0519, -6.83882, 81.47803, -24.68556, -8.03435, 25.76215, 71.77255, -75.49636, 51.86375, -0.6648, -0.41131, -1.30077, -5.19302, -5.19302, 46.07491, 34.30194, 25.71502, 34.11366, 38.96255, 58.57315, -18.01156, -52.6637, -82.42259, 29.84653, -6.24498, 54.00777, 8.53305, 5.90916, -67.80801, 10.97036, 75.45062, -53.53974, 11.68803, -72.71574, 67.94856, 49.56824), Contour.Circularity = c(0.57984, 0.31259, 0.57614, 0.32992, 0.57984, 0.31259, 0.57614, 0.32992, 0.49579, 0.38203, 0.16996, 0.35673, 0.44124, 0.44364, 0.55642, 0.32538, 0.49707, 0.50238, 0.35861, 0.36499, 0.39239, 0.69829, 0.4429, 0.47363, 0.58054, 0.51238, 0.59219, 0.54507, 0.68784, 0.60861, 0.57437, 0.17506, 0.38936, 0.3442, 0.60931, 0.54726, 0.58091, 0.56619, 0.47722, 0.18886, 0.63755, 0.54623, 0.65334, 0.27551, 0.33937, 0.39101, 0.54654, 0.34939, 0.38502, 0.62972, 0.55753, 0.35239, 0.36218, 0.52252, 0.23757, 0.58587, 0.2709, 0.21829, 0.43872, 0.47969, 0.59459, 0.6124, 0.51182, 0.54793, 0.41998, 0.35186, 0.38025, 0.27553, 0.48553, 0.56306, 0.51686, 0.54781, 0.27138, 0.49565, 0.46858, 0.36787, 0.51173, 0.12335, 0.3182, 0.30911, 0.27854, 0.37153, 0.65988, 0.28953, 0.31494, 0.27995, 0.39859, 0.3481, 0.27995, 0.39859, 0.3481, 0.33752, 0.47926, 0.65219, 0.28976, 0.25785, 0.39166, 0.3605, 0.52565, 0.23279, 0.46146, 0.7376, 0.39896, 0.31347, 0.29913, 0.36793, 0.11098, 0.72137, 0.38094, 0.52785, 0.36745, 0.33825, 0.535, 0.1863, 0.44796, 0.43577, 0.4647, 0.4649, 0.55783, 0.70578, 0.19916, 0.5446, 0.23488, 0.26414, 0.4931, 0.65159, 0.54989, 0.37499, 0.34683, 0.29308, 0.41818, 0.34788, 0.59432, 0.58566, 0.53021, 0.45434, 0.43537, 0.4838, 0.58022, 0.27608, 0.47766, 0.64015, 0.4575, 0.56754, 0.46998, 0.49236, 0.66591, 0.60207, 0.53154, 0.46698, 0.37326, 0.29032, 0.35785, 0.50671, 0.41213, 0.3326, 0.65913, 0.02252, 0.40109, 0.60521, 0.23903, 0.22271, 0.41207, 0.56428, 0.48571, 0.49939, 0.5455, 0.54608, 0.43553, 0.34283, 0.35736, 0.29468, 0.41175, 0.4598, 0.44601, 0.52998, 0.55968, 0.55968, 0.52693, 0.40363, 0.37611, 0.29309, 0.26134, 0.4002, 0.56722, 0.33748, 0.54575, 0.40915, 0.83766, 0.56029, 0.52272, 0.49637, 0.31738, 0.48753, 0.19278, 0.01605, 0.41144, 0.37996, 0.17427, 0.33571), Convex.Hull.Circularity = c(0.95452, 0.97756, 0.98551, 0.96918, 0.95452, 0.97756, 0.98551, 0.96918, 0.96737, 0.96509, 0.87561, 0.97622, 0.97008, 0.97073, 0.93563, 0.97727, 0.989, 0.97048, 0.96045, 0.97949, 0.69613, 0.9439, 0.94336, 0.98016, 0.98301, 0.96922, 0.95774, 0.89566, 0.95053, 0.86748, 0.95847, 0.93995, 0.97184, 0.98013, 0.97682, 0.95865, 0.962, 0.95634, 0.98045, 0.9326, 0.94343, 0.90115, 0.96056, 0.97901, 0.95421, 0.95998, 0.97379, 0.97555, 0.97097, 0.95375, 0.9898, 0.95915, 0.95802, 0.97736, 0.97341, 0.96096, 0.96343, 0.93928, 0.96931, 0.9087, 0.92696, 0.96462, 0.97702, 0.98306, 0.96978, 0.97862, 0.97239, 0.88828, 0.95851, 0.96804, 0.96829, 0.96696, 0.9547, 0.95874, 0.88022, 0.96627, 0.96874, 0.93356, 0.96086, 0.97163, 0.96986, 0.94628, 0.93651, 0.96472, 0.95908, 0.98232, 0.97587, 0.96754, 0.98232, 0.97587, 0.96754, 0.96678, 0.95683, 0.94982, 0.79142, 0.93517, 0.96691, 0.95162, 0.98091, 0.96642, 0.9783, 0.8499, 0.96012, 0.9823, 0.96049, 0.83793, 0.86966, 0.97624, 0.95089, 0.97351, 0.94081, 0.95098, 0.98555, 0.96934, 0.93905, 0.96482, 0.97905, 0.96239, 0.96955, 0.9676, 0.95003, 0.97403, 0.97745, 0.93772, 0.94393, 0.98481, 0.97512, 0.97424, 0.95336, 0.95927, 0.96911, 0.95691, 0.98043, 0.97289, 0.92612, 0.96436, 0.97502, 0.88734, 0.95745, 0.9639, 0.96252, 0.84067, 0.9671, 0.9475, 0.98454, 0.97523, 0.96989, 0.97203, 0.97307, 0.97211, 0.96897, 0.97704, 0.96131, 0.95667, 0.97914, 0.88039, 0.94506, 0.66942, 0.95603, 0.93732, 0.93096, 0.9365, 0.96493, 0.97557, 0.97841, 0.93981, 0.95348, 0.96814, 0.85398, 0.97675, 0.92406, 0.94602, 0.97492, 0.98237, 0.95324, 0.97392, 0.93816, 0.93816, 0.97622, 0.95486, 0.96851, 0.96179, 0.95823, 0.94437, 0.87086, 0.95986, 0.97093, 0.97256, 0.93827, 0.95562, 0.9785, 0.96941, 0.96207, 0.968, 0.95264, 0.82615, 0.98268, 0.95164, 0.96836, 0.94015), Box.H.W.Ratio = c(0.90476, 0.98701, 0.96552, 0.9322, 0.90476, 0.98701, 0.96552, 0.9322, 1.05714, 1, 1.04545, 1.025, 0.94737, 1, 1.05263, 0.95385, 0.93182, 0.96429, 1.14706, 1.07407, 0.43478, 0.8, 1.19048, 0.94444, 1.05263, 1, 0.8125, 0.66667, 0.84211, 0.71429, 1.11765, 1.23529, 0.94737, 1.06494, 0.97222, 0.93103, 0.9, 0.95455, 0.97015, 1.15789, 0.78947, 0.70588, 0.92, 1.13636, 1.075, 1.13793, 0.88889, 0.96, 0.875, 0.85714, 0.97727, 0.93333, 1.03448, 0.93976, 1.02667, 0.81579, 0.92, 1.08163, 1.16667, 0.8, 1.25, 1.05882, 0.96296, 1.05455, 0.93333, 1.07246, 1, 0.875, 0.91429, 1, 1.06897, 0.875, 1.03571, 1, 0.66667, 1.125, 1.08333, 1.27273, 1.13158, 1.07692, 1, 1.05, 1, 0.96341, 1, 1.04918, 0.90566, 0.9697, 1.04918, 0.90566, 0.9697, 1.14815, 1.10345, 0.8125, 1.33333, 1.05263, 1, 1.16667, 0.95349, 1.14035, 0.98438, 0.63636, 0.91304, 1.01351, 0.96154, 0.67647, 0.94118, 0.90323, 0.83784, 0.94286, 0.88889, 1.11905, 0.96053, 1.17442, 0.87179, 1.04, 1, 0.95652, 0.94737, 1.04762, 1.125, 1.02778, 1.07865, 1.27083, 0.93548, 1, 0.92593, 1.06667, 1.12, 1, 0.94444, 1, 1, 1.03333, 0.84211, 0.96296, 1.04167, 0.72222, 0.93103, 0.95745, 0.84211, 0.5625, 1.03125, 1.08333, 0.9697, 0.9375, 1, 0.94737, 0.89474, 0.93421, 1.07273, 1.04054, 0.98462, 0.95833, 0.95833, 0.8, 0.82759, 2.05882, 1.08333, 0.93333, 1.3, 1.18333, 1, 1.03571, 0.98214, 0.83871, 1.14286, 0.96296, 0.63158, 1, 1.15, 1.15789, 1, 0.94118, 1, 0.95349, 0.85, 0.85, 0.97872, 1, 0.94521, 0.96923, 0.94444, 1.05, 0.8, 1.11111, 1.02439, 0.95238, 0.78947, 1.09524, 0.96429, 0.90476, 1.16667, 0.97059, 1.24194, 1.03846, 0.92857, 1.02174, 1.06349, 1.06897), Angled.Box.H.W.Ratio = c(0.96296, 0.97485, 0.9637, 0.98024, 0.96296, 0.97485, 0.9637, 0.98024, 1, 0.94681, 0.66931, 0.976, 0.97464, 1, 1, 0.93385, 0.94236, 0.92258, 0.85938, 0.92657, 0.40909, 0.78947, 0.85514, 0.93038, 0.97059, 1, 0.80645, 0.64706, 0.83465, 0.69231, 0.88889, 0.81766, 0.93077, 0.94896, 0.97714, 0.90349, 1, 0.98462, 0.9633, 0.77094, 0.77778, 0.6875, 0.96639, 0.85212, 0.93525, 0.97619, 0.97143, 0.9668, 0.86957, 0.85093, 0.96455, 0.99111, 0.92683, 0.95257, 0.94604, 0.81081, 0.9292, 0.92308, 0.8835, 0.78571, 0.78571, 0.94118, 0.97857, 0.93651, 0.92683, 0.93478, 1, 0.67925, 0.89253, 0.98544, 0.99359, 0.87097, 0.99716, 0.97156, 0.65217, 0.88679, 0.92571, 0.769, 0.87778, 0.98557, 0.97735, 0.98137, 1, 0.93333, 0.98745, 0.93822, 0.98897, 0.98813, 0.93822, 0.98897, 0.98813, 0.81962, 0.90323, 0.8, 0.51546, 0.94872, 1, 0.93126, 0.93466, 0.9, 0.94923, 0.6, 0.9557, 0.95426, 0.9802, 0.58893, 0.77273, 0.90556, 0.93333, 0.98649, 0.88462, 0.98626, 0.95346, 0.84809, 0.85, 0.96429, 0.94753, 0.95652, 0.95385, 0.95238, 0.88679, 0.9625, 0.85923, 0.82171, 1, 0.97661, 0.93782, 0.93617, 0.9322, 0.96296, 0.96657, 0.93199, 1, 0.96667, 0.83333, 0.98267, 0.96078, 0.70588, 0.95738, 0.96063, 0.83784, 0.56667, 0.96875, 0.91667, 0.97107, 0.93548, 0.93878, 0.96127, 0.94103, 0.96614, 0.99497, 0.96053, 0.97183, 0.95652, 0.9941, 0.77586, 0.82143, 0.44099, 0.92105, 0.92857, 0.76687, 0.79445, 1, 0.96429, 0.97778, 0.82895, 0.87097, 0.96154, 0.61111, 1, 0.86667, 0.91219, 0.94172, 0.96988, 1, 0.92636, 0.84058, 0.84058, 0.97826, 1, 0.94444, 0.92662, 0.9434, 0.91257, 0.77778, 0.90418, 0.95604, 0.92099, 0.77778, 0.93401, 0.98305, 0.9, 0.85965, 0.91209, 0.78993, 0.55373, 0.99671, 0.97403, 0.93939, 0.95763)), row.names = c(NA, 200L), class = "data.frame")

similarity<- proxy::dist(Water, by_rows = TRUE, method = "Jaccard")

I would like to be able to calculate the similarity between each row in the data set and then remove the ones that are too similar to each other. But being a very large data set, the methods I have seen on Stackoverflow wont work as R runs out of memory. I am hoping to be able to create a function that will use the similarity calculated using the jaccard distance to apply it to the data set and remove those rows that are within a given similarity. Any ideas how to do that would be much appreciated!

0 Answers0