3

I am having trouble trying to run llply in parallel...

getOC function I am trying to run (it is a modified version of quantmod::getOptionChain):

`getOC` <-
  function(Symbols, Exp=NULL, src="yahoo", ...) {
    Call <- paste("getOptionChain",src,sep=".")
    if(missing(Exp)) {
      do.call(Call, list(Symbols=Symbols, ...))
    } else {
      do.call(Call, list(Symbols=Symbols, Exp=Exp, ...))
    }
  }

getOptionChain.yahoo <- function(Symbols, Exp, ...)
{
  if(!requireNamespace("XML", quietly=TRUE))
    stop("package:",dQuote("XML"),"cannot be loaded.")

  thParse <- function(x) {
    if (length(XML::xmlChildren(x)) > 1) {
      XML::xmlValue(x[["div"]][["div"]])
    } else {
      XML::xmlValue(x)
    }
  }
  NewToOld <- function(x, nm) {
    if(is.null(x))
      return(x)
    # clean up colnames, in case there's weirdness in the HTML
    x <- setNames(x, make.names(nm))
    # set cleaned up colnames to current output colnames
    d <- with(x, data.frame(Strike=strike, Last=last, Chg=change,
                            Bid=bid, Ask=ask, Vol=volume, OI=openinterest,
                            row.names=`contractname`, stringsAsFactors=FALSE))
    # remove commas from the numeric data
    d[] <- lapply(d, gsub, pattern=",", replacement="", fixed=TRUE)
    d[] <- lapply(d, type.convert, as.is=TRUE)
    d
  }
  cleanNames <- function(x) {
    tolower(gsub("[[:space:]]", "", x))
  }

  # Don't check the expiry date if we're looping over dates we just scraped
  checkExp <- !hasArg(".expiry.known") || !match.call(expand.dots=TRUE)$.expiry.known
  # Construct URL
  urlExp <- paste0("http://finance.yahoo.com/q/op?s=", Symbols[1])
  # Add expiry date to URL
  if(!checkExp)
    urlExp <- paste0(urlExp, "&date=", Exp)

  # Fetch data; ensure object is free'd on function exit
  tbl <- XML::htmlParse(urlExp, isURL=TRUE)
  on.exit(XML::free(tbl))

  # xpaths to the data we're interested in
  xpaths <- list()
  xpaths$tables <- "//table[contains(@class, 'quote-table')]"
  xpaths$table.names <- paste0(xpaths$tables, "/caption/text()")
  xpaths$headers <- paste0(xpaths$tables, "/thead/tr[not(contains(@class, 'filterRangeRow'))]")
  xpaths$expiries <- "//div[contains(@class, 'options_menu')]/form/select//option"

  # Extract table names and headers
  table.names <- XML::xpathSApply(tbl, xpaths$table.names, XML::xmlValue)
  table.names <- cleanNames(table.names)
  table.headers <- XML::xpathApply(tbl, xpaths$headers, fun=function(x) sapply(x['th'], thParse))
  table.headers <- lapply(table.headers, cleanNames)

  # Only return nearest expiry (default served by Yahoo Finance), unless the user specified Exp
  if(!missing(Exp) && checkExp) {
    all.expiries <- XML::xpathSApply(tbl, xpaths$expiries, XML::xmlGetAttr, name="value")
    all.expiries.posix <- .POSIXct(as.numeric(all.expiries), tz="UTC")

    if(is.null(Exp)) {
      # Return all expiries if Exp = NULL
      out <- lapply(all.expiries, getOptionChain.yahoo, Symbols=Symbols, .expiry.known=TRUE)
      # Expiry format was "%b %Y", but that's not unique with weeklies. Change
      # format to "%b.%d.%Y" ("%Y-%m-%d wouldn't be good, since names should
      # start with a letter or dot--naming things is hard).
      return(setNames(out, format(all.expiries.posix, "%b.%d.%Y")))
    } else {
      # Ensure data exist for user-provided expiry date(s)
      if(inherits(Exp, "Date"))
        valid.expiries <- as.Date(all.expiries.posix) %in% Exp
      else if(inherits(Exp, "POSIXt"))
        valid.expiries <- all.expiries.posix %in% Exp
      else if(is.character(Exp)) {
        expiry.range <- range(unlist(lapply(Exp, .parseISO8601, tz="UTC")))
        valid.expiries <- all.expiries.posix >= expiry.range[1] &
          all.expiries.posix <= expiry.range[2]
      }
      if(all(!valid.expiries))
        stop("Provided expiry date(s) not found. Available dates are: ",
             paste(as.Date(all.expiries.posix), collapse=", "))

      expiry.subset <- all.expiries[valid.expiries]
      if(length(expiry.subset) == 1)
        return(getOptionChain.yahoo(Symbols, expiry.subset, .expiry.known=TRUE))
      else {
        out <- lapply(expiry.subset, getOptionChain.yahoo, Symbols=Symbols, .expiry.known=TRUE)
        # See comment above regarding the output names
        return(setNames(out, format(all.expiries.posix[valid.expiries], "%b.%d.%Y")))
      }
    }
  }

  dftables <- XML::xmlApply(XML::getNodeSet(tbl, xpaths$tables), XML::readHTMLTable, stringsAsFactors=FALSE)
  names(dftables) <- table.names

  #dftables <- mapply(setNames, dftables, table.headers, SIMPLIFY=FALSE)
  #dftables <- lapply(dftables, NewToOld)
  dftables <- mapply(NewToOld, x=dftables, nm=table.headers, SIMPLIFY=FALSE)
  dftables
}

This is the actual code I run that fails to return data.

library("quatmod");library("doParallel");library("XML");library("plyr")


LIST <- c("^GSPC","PCLN","AMZN","BIDU")

cl <- makePSOCKcluster(2)
registerDoParallel(cl)

# RUN llply
system.time(
  WTF <- llply(.data=as.list(LIST), .fun=function(x) {
    tmp <- try(getOC(x, Exp=NULL))
    if (!inherits(tmp, 'try-error')) tmp
  },.parallel = TRUE, .paropts=c(.packages=c('quantmod'))
  ))


# I only want "WTF" with actual data
WTF <- WTF[lapply(WTF,length)>0]

The error I get is:

<anonymous>: ... may be used in an incorrect context: ‘.fun(piece, ...)’

I tried looking it up here on SO but I could not get a solution that worked...

Here is my sessionInfo:

R version 3.2.2 (2015-08-14)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 8 x64 (build 9200)

locale:
[1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252    LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                           LC_TIME=English_United States.1252    

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] doSNOW_1.0.14     snow_0.4-1        plyr_1.8.3        XML_3.98-1.3      doParallel_1.0.10 iterators_1.0.8  
 [7] foreach_1.4.3     quantmod_0.4-5    TTR_0.23-0        xts_0.9-7         zoo_1.7-12       

loaded via a namespace (and not attached):
[1] Rcpp_0.12.1      lattice_0.20-33  codetools_0.2-14 grid_3.2.2       tools_3.2.2      compiler_3.2.2
Rime
  • 912
  • 2
  • 11
  • 39
  • I think you cannot use anonymous `...` in `list`, as you do in your `getOC` function. –  Nov 11 '15 at 09:05
  • @Pascal it is weird, I run the exact same code on Mac OSX and it works fine... I can't make it to work on Windows, it only works on Windows without parallel – Rime Nov 11 '15 at 09:11
  • Then it seems I am mistaken. –  Nov 11 '15 at 09:12

1 Answers1

2

Windows doesn't make it easy to do parallel stuff. However in the case of the doParallel package, it actually makes it easy for an R user to do parallel stuff.

TO BE REMOVED:

cl <- makePSOCKcluster(2)
registerDoParallel(cl)

TO BE ADDED WHERE ABOVE WAS REMOVED:

registerDoParallel(cores=2)

Looking at your code a bit more I think another issue is about what you're expecting the value of "Exp" to be. In some languages, NULL's are "people" too so long as they're assigned to a variable. So a hunch I have is that you're ending up on the wrong path in your various if() blocks by checking missing(Exp).

To illustrate:

myfn <- function(x, y=NULL) {
    if(missing(y)){
        out <- 'bob'
    }else{
        out <- 'sally'
    }
    return(out)
}

myfn(1) #returns [1] "bob"
myfn(1, NULL) #returns [1] "sally"

You should probably be checking for is.null(Exp) instead of using missing(Exp).

myfn2 <- function(x, y=NULL) {
    if(is.null(y)){
        out <- 'tom'
    }else{
        out <- 'jane'
    }
    return(out)
}

myfn2(1) #returns [1] "tom"
myfn2(1, NULL) #returns [1] "tom"
myfn2(1, NA) #returns [1] "jane"
doicomehereoften1
  • 537
  • 1
  • 4
  • 12
  • Thanks for responding. The code above is exactly how i have it on MAC but does not work for me on Windows. – Rime Nov 20 '15 at 07:03
  • Yeah, that's what I'm saying. You've got the wrong code to run it on Windows. Don't run makePSOCKcluster(). Just run registerDoParallel() and specify the number of cores there. – doicomehereoften1 Nov 20 '15 at 16:35
  • I've just made an edit to my answer that will hopefully make it more clear about you need to do. On Windows, you need to skip the step where you manually set up the cluster. Just let registerDoParallel() handle it. By the way, on Windows, if you run registerDoParallel() without specifying the number of cores it will default to using three cores. – doicomehereoften1 Nov 20 '15 at 16:48
  • I understood what you meant the first time, but I still received the error & no data was stored in the list... Did it work for you? I also tried removing the `...` from the function `getOC` and when I ran in it, it did not generate errors but it generated an empty list again... – Rime Nov 21 '15 at 01:14
  • I made an update to my answer addressing what I think might be another issue. I think you might be misinterpreting what missing(Exp) is doing when you check its value, resulting in your if() blocks going off in the wrong direction. To check this in your own code you can use debug() on your getOC() function to step through the logic. – doicomehereoften1 Nov 23 '15 at 17:08
  • Don't forget to `stopImplicitCluster()` – Oleg Melnikov Jun 23 '16 at 17:01
  • Not necessarily. The documentation says "The function stopImplicitCluster can be used in vignettes and other places where it is important to explicitly close the implicitly created cluster." Here it may not be important since the cores will be released when the session closes anyway. – doicomehereoften1 Jun 23 '16 at 20:46