0

I am trying to write a simple routine for obtaining the measure of certain regions in the unit square (two dimensions) I know I could use one of the standard num. integration functions but I wanted to try and optimize my code since the class of functions I am interested in is very small.

This is my code:

#GRID DEFINITION    
listgrid <- vector(mode="list", length=10201)
listgrid2 <- vector(mode="list", length=101)
for(i in 0:100)
{   
    listgrid2[[i+1]] <- c(NaN,i/100)        
}
listgrid = rep(listgrid2,101)
for(j in 0:100)
    {
        for(k in 0:100)
        {
            listgrid[[j*101 + k + 1]][1] <- j/100
        }
    }


#Indicator Integration Routine
indint <- function(FUN){

    valuegrid <- sapply(listgrid, FUN)
    result <- ifelse(sum(valuegrid)== 0,0, (sum(valuegrid)+50.5)/10201)
    return(result)

}

It just defines a grid and then it tries all the points and checks whether it is zero or 1 and adds the elements of the grid up. The 50.5 is a bias correction (very rough, I know)

The problem is that this is neither fast nor accurate, I guess probably because of the sapply....

The regions I want to know the area of are nothing too complicated, they have smooth boundaries.

How would you approach this problem?

Thank you.

Moritzplatz
  • 123
  • 4
  • Can you elaborate on your functions? Will they be parametric functions that define closed curves that are completely contained within the unit circle or will they be more general functions like `sin` or `x^2`? – mlegge Jan 26 '15 at 21:56
  • @mkemp6 they are going to be indicator functions on some regions. they are all of the form "Boole(three (non linear) inequalities )". The inequalities are going to depend on some external parameters. These integrals are then part of an objective function which I want to minimize, hence the need for fast integration! – Moritzplatz Jan 26 '15 at 22:28

2 Answers2

2

From the little testing I've done, you may well be best off using the adaptIntegrate function in the cubature package, as that is written in C.

Otherwise, if you can change your function from returning vector values to separate inputs for x and y, you may be best off using outer.

Here is some test code including your original:

#GRID DEFINITION    
listgrid <- vector(mode="list", length=10201)
listgrid2 <- vector(mode="list", length=101)
for(i in 0:100)
{   
  listgrid2[[i+1]] <- c(NaN,i/100)        
}
listgrid = rep(listgrid2,101)
for(j in 0:100)
{
  for(k in 0:100)
  {
    listgrid[[j*101 + k + 1]][1] <- j/100
  }
}

#Indicator Integration Routine
indint <- function(FUN){

  valuegrid <- sapply(listgrid, FUN)
  result <- ifelse(sum(valuegrid)== 0,0, (sum(valuegrid)+50.5)/10201)
  return(result)

}

#Matrix Grid
Row <- rep(seq(0, 1, .01), 101)
Col <- rep(seq(0, 1, .01), each = 101)
Grid <- cbind(Row, Col)

#Integrator using apply
QuickInt <- function(FUN) {
  #FUN must be vector-valued and two-dimensional
  return(sum(apply(Grid, 1, FUN)) / dim(Grid)[1])
}

#Integrator using mapply
QuickInt2 <- function(FUN) {
  #FUN must take separate X and Y inpute and two-dimensional
  return(sum(mapply(FUN, x = Row, y = Col)) / length (Row))
}

#Index for each axis for outer
Index <- seq(0, 1, .01)

#Integrator using outer
QuickInt3 <- function(FUN) {
  #FUN must take separate X and Y inpute and two-dimensional
  return(sum(outer(Index, Index, FUN) / (length(Index) * length(Index))))
}

Here are some test functions:

#Two test functions, both in vector input (_V) and independant input (_I) forms
test_f1_I <- function(x, y) x^2 + y^2
test_f1_V <- function(X) X[1]^2 + X[2]^2
test_f2_I <- function(x, y) sin(x) * cos(y)
test_f2_V <- function(X) sin(X[1]) * cos(X[2])

Testing accuracy:

#Accuracy Test
library(cubature)
TrueValues <- c(adaptIntegrate(test_f1_V, c(0, 0), c(1, 1))$integral, adaptIntegrate(test_f2_V, c(0, 0), c(1, 1))$integral)
InditV <- c(indint(test_f1_V), indint(test_f2_V))
Q1V <- c(QuickInt(test_f1_V), QuickInt(test_f2_V))
Q2V <- c(QuickInt2(test_f1_I), QuickInt2(test_f2_I))
Q3V <- c(QuickInt3(test_f1_I), QuickInt3(test_f2_I))
Rs <- rbind(TrueValues, InditV, Q1V, Q2V, Q3V)
Rs
> Rs
                [,1]      [,2]
TrueValues 0.6666667 0.3868223
InditV     0.6749505 0.3911174
Q1V        0.6700000 0.3861669
Q2V        0.6700000 0.3861669
Q3V        0.6700000 0.3861669

Speed Test (R-3.1.2, Intel i7-2600K overclocked to 4.6Ghz, 16GB RAM, Windows 7 64 bit, R compiled with OpenBLAS 2.13).

#Speed Test
library(microbenchmark)
Spd1 <- microbenchmark(adaptIntegrate(test_f1_V, c(0,0), c(1,1)), 
                      indint(test_f1_V),
                      QuickInt(test_f1_V), 
                      QuickInt2(test_f1_I),
                      QuickInt3(test_f1_I), 
                      times = 100L, 
                      unit = "relative",
                      control = list(order = 'block'))
Spd2 <- microbenchmark(adaptIntegrate(test_f2_V, c(0,0), c(1,1)), 
                       indint(test_f2_V),
                       QuickInt(test_f2_V), 
                       QuickInt2(test_f2_I), 
                       QuickInt3(test_f2_I), 
                       times = 100L, 
                       unit = "relative",
                       control = list(order = 'block'))

> Spd1
Unit: relative
                                        expr         min          lq        mean      median          uq        max neval
 adaptIntegrate(test_f1_V, c(0, 0), c(1, 1))    1.000000    1.000000    1.000000    1.000000    1.000000    1.00000   100
                           indint(test_f1_V)  491.861071  543.166516  544.559059  568.961534  500.777301  934.62521   100
                         QuickInt(test_f1_V) 1777.972641 2102.021092 2188.762796 2279.148477 2141.449811 1781.36640   100
                        QuickInt2(test_f1_I)  681.548730  818.746406  887.345323  875.833969  923.155066  972.86832   100
                        QuickInt3(test_f1_I)    4.689201    4.525525    4.795768    4.401223    3.706085   31.28801   100
> Spd2
Unit: relative
                                        expr       min        lq      mean    median        uq        max neval
 adaptIntegrate(test_f2_V, c(0, 0), c(1, 1))   1.00000   1.00000   1.00000   1.00000   1.00000    1.00000   100
                           indint(test_f2_V) 192.49010 216.29215 232.99091 226.67006 239.32413  464.76166   100
                         QuickInt(test_f2_V) 628.38419 770.34591 872.15128 870.58423 937.67521 1018.59902   100
                        QuickInt2(test_f2_I) 267.68011 306.74721 345.69155 332.67551 352.19696  726.86772   100
                        QuickInt3(test_f2_I)  12.32649  12.05382  12.15228  11.89353  11.75468   26.54075   100
Avraham
  • 1,655
  • 19
  • 32
1

Here is a vectorized way to estimate using simple Monte Carlo techniques. You can play with n and replicate.n to get the accuracy/speed trade off you desire.

MC_estimator <- function(expr, n = 1E5, replicate.n = 50){
  f <- function(){
    # generate points randomly over the unit square
    x <- runif(n, min = -1)
    y <- runif(n, min = -1)

    # select points only in unit circle
    sel <- sqrt(x^2 + y^2) <= 1
    x <- x[sel]
    y <- y[sel]
    selN <- length(x)

    # select points in the desired area, find the proportion and multiply by total area of the sampled region for an estimate
    eval(parse(text = paste("pi*sum(", expr, ")/selN")))
  }
  # replicate the procedure to bootstrap the estimate
  mean(replicate(replicate.n, f()))
}

expr <- "y <= sin(x) & y >= x^2 & y <= 0.5 - x"


> system.time(MC_estimator(expr))
   user  system elapsed 
   1.20    0.01    1.23 
> true <- 0.0370152
> estimated <- MC_estimator(expr)
> cat(round(100*(estimated - true)/true, 2), "% difference")
0.34 % difference
mlegge
  • 6,763
  • 3
  • 40
  • 67