My Source data is like this, and it's a really large xml of 2+ GB.
<?xml version="1.0" encoding="UTF-8"?>
<Journal_Lines>
<jrnl1 CY="USD" CCD="1001" CC="11062" IsPyJrl="1" AID="11382" LAI="107709"
TLCCr="11062" TCAmt="222.85" TDAmt="0" CDI="C" CDAmt="222.85" DN=""
EDt="2019-06-16-07:00" SCd="" HURCl="0"/>
<jrnl1 CY="USD" CCD="1001" CC="11062" IsPyJrl="1" AID="11382" LAI="240997"
TLCCr="11062" TCAmt="0" TDAmt="222.85" CDI="D" CDAmt="222.85" DN=""
EDt="2019-06-16-07:00" SCd="" HURCl="0"/>
<jrnl1 CY="USD" CCD="1001" CC="16835" IsPyJrl="1" AID="12661" LAI="107769"
TLCCr="16835" TCAmt="94.06" TDAmt="0" CDI="C" CDAmt="94.06" DN="" EDt="2019-06-16-07:00"
SCd="" HURCl="0"/>
<jrnl1 CY="USD" CCD="1001" CC="16835" IsPyJrl="1" AID="12661" LAI="240997"
TLCCr="16835" TCAmt="0" TDAmt="94.06" CDI="D" CDAmt="94.06" DN="" EDt="2019-06-16-07:00"
SCd="" HURCl="0"/>
<jrnl1 CY="USD" CCD="1001" CC="19655" IsPyJrl="1" AID="12731" LAI="240997"
TLCCr="19655" TCAmt="0" TDAmt="899.11" CDI="D" CDAmt="899.11" DN=""
EDt="2019-06-16-07:00" SCd="" HURCl="0"/>
<jrnl1 CY="USD" CCD="1001" CC="19655" IsPyJrl="1" AID="12731" LAI="107709"
TLCCr="19655" TCAmt="899.11" TDAmt="0" CDI="C" CDAmt="899.11" DN=""
EDt="2019-06-16-07:00" SCd="" HURCl="0"/>
</Journal_Lines>
My Output is
<Journal_Lines xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:map="http://www.w3.org/2005/xpath-functions/map">
<Group CCD="1001" CC="11062">
<Jrnln CY="USD" CCD="1001" CC="11062" IsPyJrl="1" AID="11382" LAI="107709" TLCCr="11062"
TCAmt="222.85" TDAmt="0" CDI="C" CDAmt="222.85" DN="" EDt="2019-06-16-07:00" SCd=""
HURCl="0"/>
<Jrnln CY="USD" CCD="1001" CC="11062" IsPyJrl="1" AID="11382" LAI="240997" TLCCr="11062"
TCAmt="0" TDAmt="222.85" CDI="D" CDAmt="222.85" DN="" EDt="2019-06-16-07:00" SCd=""
HURCl="0"/>
</Group>
<Group CCD="1001" CC="16835">
<Jrnln CY="USD" CCD="1001" CC="16835" IsPyJrl="1" AID="12661" LAI="107769" TLCCr="16835"
TCAmt="94.06" TDAmt="0" CDI="C" CDAmt="94.06" DN="" EDt="2019-06-16-07:00" SCd=""
HURCl="0"/>
<Jrnln CY="USD" CCD="1001" CC="16835" IsPyJrl="1" AID="12661" LAI="240997" TLCCr="16835"
TCAmt="0" TDAmt="94.06" CDI="D" CDAmt="94.06" DN="" EDt="2019-06-16-07:00" SCd=""
HURCl="0"/>
</Group>
<Group CCD="1001" CC="19655">
<Jrnln CY="USD" CCD="1001" CC="19655" IsPyJrl="1" AID="12731" LAI="240997" TLCCr="19655"
TCAmt="0" TDAmt="899.11" CDI="D" CDAmt="899.11" DN="" EDt="2019-06-16-07:00" SCd=""
HURCl="0"/>
<Jrnln CY="USD" CCD="1001" CC="19655" IsPyJrl="1" AID="12731" LAI="107709" TLCCr="19655"
TCAmt="899.11" TDAmt="0" CDI="C" CDAmt="899.11" DN="" EDt="2019-06-16-07:00" SCd=""
HURCl="0"/>
</Group>
</Journal_Lines>
I'm grouping and sorting by CC and CCD, my current code is below and it works. But takes a very log time.
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="3.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:map="http://www.w3.org/2005/xpath-functions/map">
<xsl:output indent="no"/>
<xsl:mode streamable="yes" on-no-match="shallow-skip"/>
<xsl:variable name="vElementMap" as="map(*)"
select="map {
1:'CY', 2:'CCD', 3:'CC', 4:'IsPyJrl', 5:'AID',
6:'LAI', 7:'TLCCr', 8:'TCAmt', 9:'TDAmt', 10:'CDI',
11:'CDAmt', 12:'DN', 13:'EDt', 14:'SCd', 15:'HURCl' }"
/>
<xsl:template match="/">
<xsl:iterate select="Journal_Lines/jrnl1">
<xsl:param name="mapJournalLines" as="map(xs:string, xs:string)" select="map{}"/>
<xsl:on-completion>
<Journal_Lines>
<!-- Sort data -->
<xsl:for-each select="map:for-each($mapJournalLines, function ($k, $v) {$k})">
<xsl:sort select="."/>
<Group CCD="{substring-before(.,'^')}" CC="{substring-after(.,'^')}">
<xsl:for-each select="tokenize($mapJournalLines(.),'\^')">
<Jrnln>
<xsl:for-each select="tokenize(.,'\|')">
<xsl:attribute name="{$vElementMap(position())}">
<xsl:value-of select="."/>
</xsl:attribute>
</xsl:for-each>
</Jrnln>
</xsl:for-each>
</Group>
</xsl:for-each>
</Journal_Lines>
</xsl:on-completion>
<xsl:variable name="current-entry" select="copy-of()"/>
<xsl:variable name="vKey" select="$current-entry/@CCD || '^' || $current-entry/@CC"/>
<xsl:variable name="vValue">
<xsl:for-each select="$current-entry/@*">
<xsl:if test="position() ne 1">|</xsl:if>
<xsl:value-of select="."/>
</xsl:for-each>
</xsl:variable>
<xsl:next-iteration>
<xsl:with-param name="mapJournalLines"
select="
if (map:contains($mapJournalLines, xs:string($vKey))) then
map:put($mapJournalLines, xs:string($vKey), $mapJournalLines(xs:string($vKey)) || '^' || xs:string($vValue))
else
map:put($mapJournalLines, xs:string($vKey), xs:string($vValue))"
/>
</xsl:next-iteration>
</xsl:iterate>
</xsl:template>
</xsl:stylesheet>
I'm converting a single jrnl1 node to a single pipe delimited line, and multiple grouped lines are delimited by ^ This works with small load, but takes forever with large data.
Any help is appreciated.