I am struggling to remove a Element with all its children with haskell. The task is to strip all table-tags from a given xml document (maybe I have not understood the concept of a cursor or it is something else I am missing).
I have tried three different approaches:
- lenses with traversing/filtering and setting the filtered value with a new element - here only the tag is replaced but not the contents
- accessing the table element with a cursor - resetting the content there and obtaining the document root again by traversing the cursor up to the document root - nothing is filtered
- filtering the children of the document root recursively - nothing is filtered
Tools
xml-conduit
xml-lens
ghc-8.0.1
Input (test.xml
)/output
INPUT EXPECTED OUTPUT (for the filtered cases)
<?xml version="1.0"?> | <?xml version="1.0"?>
<root> | <root>
<a> | <a>
... | ...
</a> | </a>
<b> | <b>
<table> | <bb>
<!--table entries--> | ...
</table> | </bb>
<bb> | </b>
... | <c>
</bb> | <cc>
</b> | ...
<c> | </cc>
<cc> | </c>
... | </root>
</cc>
</c>
</root>
Minimal-non-working-example
{-# LANGUAGE OverloadedStrings #-}
module Minimal where
import Control.Lens
import Data.Conduit.Text as CT
import Data.Default
import qualified Data.Text.Lazy.IO as TIO
import Text.XML
import Text.XML.Cursor
import qualified Text.XML.Lens as L
import Data.Maybe (isNothing, isJust)
main :: IO ()
main = do test <- Text.XML.readFile def "./test.xml"
pput $ filterDocument test
let cursor = fromDocument test
pput $ docUp $ elemUp $ getRoot ((head $ cursor $// checkName (== "table")) {child = []} )
pput $ docUp $ elemUp $ filterChildren (checkName (/= "table")) cursor
return ()
filterChildren :: Axis -> Cursor -> Cursor
filterChildren pred c = c {child = map (filterChildren pred) (c $/ pred) }
filterDocument :: Document -> Document
filterDocument doc = doc & (L.root.L.entire.filtered (\e -> isJust $ e^?L.named "table") .~ emptyElemt)
where emptyElemt = Element "empty" mempty []
-- helper functions --
docUp :: Element -> Document
docUp e = Document {documentRoot = e, documentPrologue = Prologue [] Nothing [], documentEpilogue = [] }
elemUp :: Cursor -> Element
elemUp cursor = Element {elementName = "DOC", elementAttributes = mempty , elementNodes = [node cursor]}
elemUp' :: [Cursor] -> Element
elemUp' cursors = Element {elementName = "DOC", elementAttributes = mempty , elementNodes = map node cursors}
getRoot :: Cursor -> Cursor
getRoot c = let p = (c $| parent)
in if null p then c else getRoot $ head p
pput :: Document -> IO ()
pput = TIO.putStrLn . renderText pretty
where pretty = def {rsPretty = True}
Output
> stack ghci
. . .
Ok, modules loaded: Minimal.
λ > main
<?xml version="1.0" encoding="UTF-8"?>
<root>
<a>
...
</a>
<b>
<empty>
<!-- table entries -->
</empty>
<bb>
...
</bb>
</b>
<c>
<cc>
...
</cc>
</c>
</root>
<?xml version="1.0" encoding="UTF-8"?>
<DOC>
<root>
<a>
...
</a>
<b>
<table>
<!-- table entries -->
</table>
<bb>
...
</bb>
</b>
<c>
<cc>
...
</cc>
</c>
</root>
</DOC>
<?xml version="1.0" encoding="UTF-8"?>
<DOC>
<root>
<a>
...
</a>
<b>
<table>
<!-- table entries -->
</table>
<bb>
...
</bb>
</b>
<c>
<cc>
...
</cc>
</c>
</root>
</DOC>