Working on some observations on a previous question (haskell-data-hashset-from-unordered-container-performance-for-large-sets) I stumbled upon a strange memory leak
module Main where
import System.Environment (getArgs)
import Control.Monad.Trans.Resource (runResourceT)
import Data.Attoparsec.ByteString (sepBy, Parser)
import Data.Attoparsec.ByteString.Char8 (decimal, char)
import Data.Conduit
import qualified Data.Conduit.Attoparsec as CA
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.List as CL
main :: IO ()
main = do (args:_) <- getArgs
writeFile "input.txt" $ unlines $ map show [1..4 :: Int]
case args of "list" -> m1
"fail" -> m2
"listlist" -> m3
"memoryleak" -> m4
--UPDATE
"bs-lines":_ -> m5
"bs":_ -> m6
_ -> putStr $ unlines ["Usage: conduit list"
," fail"
," listlist"
," memoryleak"
--UPDATE
," bs-lines"
," bs"
]
m1,m2,m3,m4 :: IO ()
m1 = do hs <- runResourceT
$ CB.sourceFile "input.txt"
$$ CB.lines
=$= CA.conduitParser (decimal :: Parser Int)
=$= CL.map snd
=$= CL.consume
print hs
m2 = do hs <- runResourceT
$ CB.sourceFile "input.txt"
$$ CA.conduitParser (decimal :: Parser Int)
=$= CL.map snd
=$= CL.consume
print hs
m3 = do hs <- runResourceT
$ CB.sourceFile "input.txt"
$$ CB.lines
=$= CA.conduitParser (decimal `sepBy` (char '\n') :: Parser [Int])
=$= CL.map snd
=$= CL.consume
print hs
m4 = do hs <- runResourceT
$ CB.sourceFile "input.txt"
$$ CA.conduitParser (decimal `sepBy` (char '\n') :: Parser [Int])
=$= CL.map snd
=$= CL.consume
print hs
-- UPDATE
m5 = do inpt <- BS.lines <$> BS.readFile "input.txt"
let Right hs = mapM (parseOnly (decimal :: Parser Int)) inpt
print hs
m6 = do inpt <- BS.readFile "input.txt"
let Right hs = (parseOnly (decimal `sepBy` (char '\n') :: Parser [Int])) inpt
print hs
Here is some example output:
$ > stack exec -- example list
[1234]
$ > stack exec -- example listlist
[[1234]]
$ > stack exec -- conduit fail
conduit: ParseError {errorContexts = [], errorMessage = "Failed reading: takeWhile1", errorPosition = 1:2}
$ > stack exec -- example memoryleak
(Ctrl+C)
-- UPDATE
$ > stack exec -- example bs-lines
[1,2,3,4]
$ > stack exec -- example bs
[1,2,3,4]
Now the questions I have is:
- Why is
m1
not producing[1,2,3,4]
? - Why is
m2
failing? - Why is
m4
behaving totally different compared to all other versions and producing a space leak?