8

I have the following data.


x_plus <- c(1.3660254, 1.1123724, 1.0000000, 0.9330127, 0.8872983,
            0.8535534, 0.8273268, 0.8061862, 0.7886751, 0.7738613,
            0.6936492, 0.6581139, 0.6369306, 0.6224745, 0.6118034,
            0.5968246, 0.5866025, 0.5707107, 0.5612372, 0.5500000,
            0.5433013, 0.5387298, 0.5353553, 0.5306186, 0.5273861, 
            0.5193649, 0.5158114, 0.5122474, 0.5103510, 0.5086603,
            0.5050000, 0.5027386, 0.5008660)


x_minus <- c(-0.3660254, -0.1123724,  0.0000000,  0.0669873,  0.1127017,
             0.1464466,  0.1726732,  0.1938138,  0.2113249, 0.2261387,  
             0.3063508,  0.3418861,  0.3630694,  0.3775255,  0.3881966,
             0.4031754,  0.4133975,  0.4292893, 0.4387628,  0.4500000,
             0.4566987,  0.4612702,  0.4646447,  0.4693814,  0.4726139,
             0.4806351,  0.4841886, 0.4877526,  0.4896490,  0.4913397,
             0.4950000,  0.4972614,  0.4991340)


y <- c(1.50, 3.00, 4.50, 6.00, 7.50, 9.00, 1.05e+01, 1.20e+01, 1.35e+01,
       1.50e+01, 3.00e+01, 4.50e+01, 6.00e+01, 7.50e+01, 9.00e+01, 1.20e+02,
       1.50e+02, 2.25e+02, 3.00e+02, 4.50e+02, 6.00e+02, 7.50e+02, 9.00e+02,
       1.20e+03, 1.50e+03, 3.00e+03, 4.50e+03, 7.50e+03, 1.05e+04, 1.50e+04,
       4.50e+04, 1.50e+05, 1.50e+06)


df <- data.frame(cbind(x_plus, x_minus, y))

x_points <- c(.5, .6, .43, .1, 1, .52, .6)
y_points <- c(50, 100, 5000, 300, 500, 700, 10)


which I use to produce the following plot.

library(ggplot2)

ggplot()+
  geom_point(aes(x = x_points, y = y_points))+
  geom_path(data = df, aes(x = x_plus, y = y))+
  geom_path(aes(x = x_minus, y = y))+
  scale_y_log10()+
  coord_cartesian(ylim = c(10, 1e4))


Plot

How would one go about mathematically determining how many points fall between the two geom_path() lines? For my actual application there may be thousands of points on this plot. Any advice is greatly appreciated!

M--
  • 25,431
  • 8
  • 61
  • 93
ZT_Geo
  • 403
  • 1
  • 11

2 Answers2

8

You can use approxfun() to create a function which interpolates the points on the curve and tells you the y coordinate of the curve for any x. Then you just compare these values with the y coordinates of your points:

# rearrange the data into just two columns of x and y
df <- data.frame(x=c(df$x_minus, df$x_plus), y=df$y)
df <- df[order(df$x), ]

# function for linear interpolation of points
interp_fun <- approxfun(df)
# which points are below the curve
points_below <- interp_fun(x_points) > y_points

ggplot() +
  geom_point(aes(x=x_points, y=y_points, color=points_below)) +
  geom_path(data=df, aes(x=x, y=y)) +
  scale_y_log10() +
  coord_cartesian(ylim=c(10, 1e4))

points below the curve

The number of points below the curve can be obtained with sum(points_below).

Robert Hacken
  • 3,878
  • 1
  • 13
  • 15
1

One approach is to construct a convex polygon (using concaveman::concaveman()) from your points and test whether the test points lie inside the polygon (using sp::point.in.polygon()). Then you can just use table() to count the number of points inside etc.

library(tidyverse) # for wrangling and visualizing data
library(concaveman) # for identifying the concave hull on some points
library(sp) # for testing membership in polygon

df <- data.frame(x = c(x_plus, x_minus), y = rep(y, 2)) %>% 
  as.matrix() %>% 
  concaveman() %>% 
  as.data.frame() %>% 
  rename(x = 1, y = 2)

points <- data.frame(x = c(.5, .6, .43, .1, 1, .52, .6), 
                     y = c(50, 100, 5000, 300, 500, 700, 10))

# annotate points by position relative to polygon
pts <- points %>%
  mutate(
    inside = point.in.polygon(
      point.x = x,
      point.y = y,
      pol.x = df$x,
      pol.y = df$y
    ),
    point_status = case_when(
      inside == 0 ~ "inside",
      inside == 1 ~ "outside",
      inside == 2 ~ "edge",
      inside == 3 ~ "vertex"
    )
  )

# count of points of each type
table(pts$point_status)
#> 
#>  inside outside 
#>       3       4

df %>% 
  ggplot(aes(x, y)) +
  geom_path() +
  geom_point(data = pts, aes(color = point_status)) +
  scale_y_log10()+
  coord_cartesian(ylim = c(10, 1e4))

Created on 2022-10-26 with reprex v2.0.2

Data:

x_plus <- c(1.3660254, 1.1123724, 1.0000000, 0.9330127, 0.8872983,
            0.8535534, 0.8273268, 0.8061862, 0.7886751, 0.7738613,
            0.6936492, 0.6581139, 0.6369306, 0.6224745, 0.6118034,
            0.5968246, 0.5866025, 0.5707107, 0.5612372, 0.5500000,
            0.5433013, 0.5387298, 0.5353553, 0.5306186, 0.5273861, 
            0.5193649, 0.5158114, 0.5122474, 0.5103510, 0.5086603,
            0.5050000, 0.5027386, 0.5008660)

x_minus <- c(-0.3660254, -0.1123724,  0.0000000,  0.0669873,  0.1127017,
             0.1464466,  0.1726732,  0.1938138,  0.2113249, 0.2261387,  
             0.3063508,  0.3418861,  0.3630694,  0.3775255,  0.3881966,
             0.4031754,  0.4133975,  0.4292893, 0.4387628,  0.4500000,
             0.4566987,  0.4612702,  0.4646447,  0.4693814,  0.4726139,
             0.4806351,  0.4841886, 0.4877526,  0.4896490,  0.4913397,
             0.4950000,  0.4972614,  0.4991340)

y <- c(1.50, 3.00, 4.50, 6.00, 7.50, 9.00, 1.05e+01, 1.20e+01, 1.35e+01,
       1.50e+01, 3.00e+01, 4.50e+01, 6.00e+01, 7.50e+01, 9.00e+01, 1.20e+02,
       1.50e+02, 2.25e+02, 3.00e+02, 4.50e+02, 6.00e+02, 7.50e+02, 9.00e+02,
       1.20e+03, 1.50e+03, 3.00e+03, 4.50e+03, 7.50e+03, 1.05e+04, 1.50e+04,
       4.50e+04, 1.50e+05, 1.50e+06)
Dan Adams
  • 4,971
  • 9
  • 28