0

I am looking for the ability to start R processes Asynchronously from within R. Something like the below function

startFunctionInAsynchMode<-function(workingDir,filesToSource, functionName, ...){
 #workingdir - the dir that should be set as wd
 #filesToSource - vector of fileNames to be sourced
 #functionName - the actual function to be run asynchrously
 #...  - other parameters to be passed to the function
 #Return Value - should be the System Process Id Started
}

Would anyone have quick ideas? I checked packages like parallel etc. but doesn't seem to fit. Thanks in advance

guna
  • 1,148
  • 1
  • 10
  • 18
  • possible duplicate of [Asynchronous network IO using r: Any existing packages](http://stackoverflow.com/questions/1396632/asynchronous-network-io-using-r-any-existing-packages) –  Aug 05 '15 at 10:32
  • Maybe the approach outlined in [this question](http://stackoverflow.com/questions/30428610/compute-in-a-new-thread-and-refer-to-results-later-in-r/30430265#30430265) ? – Martin Morgan Aug 05 '15 at 14:58

1 Answers1

0

Here is an implementation using R CMD. Basic version tested. And with some open items.


startFunctionInAsynchMode<-function(workingDir,filesToSource, functionName, ...){
    wd<-getwd()
    setwd(workingDir)
    fs<-makeFiles()
    scriptFile<-fs$ScriptFile
    cat(file=scriptFile,paste0("source(\"",filesToSource,"\")", collapse = "\n"))
    cat(file=scriptFile,"\n",append = T)
    functionCall<-getFunctionCall(functionName,as.list(match.call()), startIndex=5)
    cat(file=scriptFile,functionCall,append = T)
    commandsToRun <- paste0("(R CMD BATCH ", scriptFile, " ",fs$LogFile , " --slave ) &")
    print(commandsToRun)
    system(commandsToRun)
    Sys.sleep(5)
    pids<-getPids(scriptFile, "--restore")
    cat(file=fs$KillScript,paste0("kill -9 ",pids$PID[1]))
    setwd(wd)
    return(as.character(pids$PID[1]))
}

makeFiles<-function(){
    res<-list()
    dir.create("./temp/tempRgen", recursive=T,showWarnings = F)
    tf<-tempfile("rGen-","./temp/tempRgen", fileext = "")
    res$ScriptFile<-paste0(tf,".R")
    res$LogFile<-paste0(tf,".log")
    res$KillScript<-paste0(tf,"-kill.sh")
    file.create(res$KillScript,showWarnings = F)
    file.create(res$ScriptFile,showWarnings = F)
    res
}

#Open Items to be handled
#1. Named Arguments
#2. Non String Arguments
getFunctionCall<-function(functionName,argList,startIndex){
    res<-paste0(functionName,"(")
    if(!is.null(argList)){
        if(length(argList)>=startIndex){
            first=T
            for(i in startIndex:length(argList)){
                if(first){
                    first=F
                } else {
                    res<-paste0(res,",")
                }
                res<-paste0(res,"\"",argList[[i]],"\"")
            }
        }
    }
    res<-paste0(res,")")
}

getPids <- function(grepFor, refineWith){
    numCols <- length(unlist(str_split(system("ps aux", intern=T)[1], "\\s+")))
    psOutput <- system(paste0("ps auxww | grep ", grepFor), intern=T)
    psOutput <- psOutput[str_detect(psOutput, refineWith)]
    pidDf <- ldply(psOutput, parseEachPsLine)
    # Remove the process that actually grep-ed for my search string
    pidDf <- pidDf[!str_detect(pidDf$COMMAND, "grep"),]
    return(pidDf)
}

parseEachPsLine <- function(line){
    tabular <- read.table(textConnection(line), header=F, sep=" ")
    tabular <- tabular[!is.na(tabular)]
    psTitles <- c("USER", "PID", "CPU", "MEM", "VSZ", "RSS", "TTY", "STAT", "START", "TIME", "COMMAND")
    psColNames <- setNames(seq(1, length(psTitles)), psTitles)

    COMMAND <- paste0(tabular[(psColNames["COMMAND"]):length(tabular)], collapse=" ")
    return(data.frame("PID"=tabular[psColNames["PID"]], "STARTED"=tabular[psColNames["START"]], "COMMAND"=COMMAND, "STATUS"=tabular[psColNames["STAT"]]))
}
guna
  • 1,148
  • 1
  • 10
  • 18