Similar problem has been described in our issue tracker here:
https://github.com/eddelbuettel/digest/issues/33
The current version of digest
can read a file to compute the hash.
Therefore, at least on Linux, we can use a named pipe which will be read by the digest package (in one thread) and from the other side data will be written by another thread.
The following code snippet shows how we can compute a MD5 hash from 10 number by feeding the digester first with 1:5 and then 6:10.
library(parallel)
library(digest)
x <- as.character(1:10) # input
fname <- "mystream.fifo" # choose name for your named pipe
close(fifo(fname, "w")) # creates your pipe if does not exist
producer <- mcparallel({
mystream <- file(fname, "w")
writeLines(x[1:5], mystream)
writeLines(x[6:10], mystream)
close(mystream) # sends signal to the consumer (digester)
})
digester <- mcparallel({
digest(fname, file = TRUE, algo = "md5") # just reads the stream till signalled
})
# runs both processes in parallel
mccollect(list(producer, digester))
unlink(fname) # named pipe removed
UPDATE: Henrik Bengtsson provided a modified example based on futures:
library("future")
plan(multiprocess)
x <- as.character(1:10) # input
fname <- "mystream.fifo" # choose name for your named pipe
close(fifo(fname, open="wb")) # creates your pipe if does not exists
producer %<-% {
mystream <- file(fname, open="wb")
writeBin(x[1:5], endian="little", con=mystream)
writeBin(x[6:10], endian="little", con=mystream)
close(mystream) # sends signal to the consumer (digester)
}
# just reads the stream till signalled
md5 <- digest::digest(fname, file = TRUE, algo = "md5")
print(md5)
## [1] "25867862802a623c16928216e2501a39"
# Note: Identical on Linux and Windows