0

XML:

<sample>
    <test>
        <Cell1>John</Cell1>
        <Cell2>A</Cell2>
        <Cell4>xy</Cell4>
    </test>
    <test>
        <Cell1>John</Cell1>
        <Cell2>B</Cell2>
        <Cell6>10</Cell6>
    </test>
    <test>
        <Cell1>John,Jade</Cell1>
        <Cell2>A,Y</Cell2>
        <Cell4>1</Cell4>
    </test>
    <test>
        <Cell1>John,Jade</Cell1>
        <Cell2>A C,X</Cell2>
    </test>
    <test>
        <Cell1>John,Jade</Cell1>
        <Cell2>C D,Y</Cell2>
    </test>
    <test>
        <Cell1>John</Cell1>
        <Cell2>A B</Cell2>
        <Cell4>xy</Cell4>
    </test>
</sample>

XSLT:

<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="xs">
    <xsl:output method="xml" encoding="UTF-8" indent="no"/>
    <xsl:template match="/">
        <xsl:apply-templates select="sample"/>
    </xsl:template>
    <xsl:template match="sample">
        <xsl:variable name="atomictest">
            <!--Store the test containing only one value in cell2-->
            <xsl:copy-of select="test[not(contains(Cell2,',')) or not(contains(Cell2,' '))]"/>
        </xsl:variable>
        <xsl:variable name="copy">
            <xsl:apply-templates select="test">
                <xsl:with-param name="atomictest" select="$atomictest"/>
            </xsl:apply-templates>
        </xsl:variable>
    </xsl:template>
    <xsl:template match="test">
        <xsl:param name="atomictest"/>
        <xsl:choose>
            <xsl:when test="contains(Cell2,',')">
                <xsl:variable name="Cell1">
                    <xsl:copy-of select="Cell1"/>
                </xsl:variable>
                <!-- tokenize cell2 based on comma -->
                <xsl:for-each select="tokenize(Cell2,',')">
                    <xsl:variable name="str">
                        <xsl:value-of select="."/>
                    </xsl:variable>
                    <xsl:variable name="pos">
                        <xsl:value-of select="position()"/>
                    </xsl:variable>
                    <xsl:choose>
                        <!-- If cell2 contains space -->
                        <xsl:when test="contains(.,' ')">
                            <!-- tokenize cell2 based on comma -->
                            <xsl:for-each select="tokenize(.,' ')">
                                <xsl:variable name="str">
                                    <xsl:value-of select="."/>
                                </xsl:variable>
                                <!-- if cell2 value not contained in the atomic collected -->
                                <xsl:if test="not($atomictest/test[normalize-space(Cell2/text())=normalize-space($str)])">
                                    <!--Store Cell2 value -->
                                    <xsl:variable name="Cell2">
                                        <xsl:value-of select="."/>
                                    </xsl:variable>
                                    <!-- tokenize cell1-->
                                    <xsl:for-each select="tokenize($Cell1/Cell1,',')">
                                        <xsl:if test="position()=$pos">
                                            <test>
                                                <Cell1>
                                                    <xsl:value-of select="."/>
                                                </Cell1>
                                                <Cell2>
                                                    <xsl:value-of select="$Cell2"/>
                                                </Cell2>
                                            </test>
                                        </xsl:if>
                                    </xsl:for-each>
                                </xsl:if>
                            </xsl:for-each>
                        </xsl:when>
                        <xsl:otherwise>
                            <!-- if cell2 doesnot contains space -->
                            <xsl:if test="not($atomictest/test[normalize-space(Cell2/text())=normalize-space($str)])">
                                <xsl:variable name="Cell2">
                                    <xsl:value-of select="."/>
                                </xsl:variable>
                                <xsl:for-each select="tokenize($Cell1/Cell1,',')">
                                    <xsl:if test="position()=$pos">
                                        <test>
                                            <Cell1>
                                                <xsl:value-of select="."/>
                                            </Cell1>
                                            <Cell2>
                                                <xsl:value-of select="$Cell2"/>
                                            </Cell2>
                                        </test>
                                    </xsl:if>
                                </xsl:for-each>
                            </xsl:if>
                        </xsl:otherwise>
                    </xsl:choose>
                </xsl:for-each>
            </xsl:when>
            <xsl:when test="contains(Cell2,' ')">
                <xsl:variable name="Cell1">
                    <xsl:copy-of select="Cell1"/>
                </xsl:variable>
                <!-- tokenize cell2 based on space or comma -->
                <xsl:for-each select="tokenize(Cell2,' ')">
                    <xsl:variable name="str">
                        <xsl:value-of select="."/>
                    </xsl:variable>
                    <xsl:variable name="pos">
                        <xsl:value-of select="position()"/>
                    </xsl:variable>
                    <!-- if cell2 value not contained in the atomic rows collected -->
                    <xsl:if test="not($atomictest/test[normalize-space(Cell2/text())=normalize-space($str)])">
                        <xsl:if test="position()=$pos">
                            <test>
                                <Cell1>
                                    <xsl:value-of select="$Cell1"/>
                                </Cell1>
                                <Cell2>
                                    <xsl:value-of select="$str"/>
                                </Cell2>
                            </test>
                        </xsl:if>
                    </xsl:if>
                </xsl:for-each>
            </xsl:when>
            <xsl:otherwise>
                <test>
                    <Cell1>
                        <xsl:value-of select="Cell1"/>
                    </Cell1>
                    <Cell2>
                        <xsl:value-of select="Cell2"/>
                    </Cell2>
                </test>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>
</xsl:stylesheet>
  1. I have stored the cell2 that contains a single value in atomictest variable
  2. Check if Cell2 contains comma. if true tokenize Cell2 based on comma and check if the tokenized Cell2 value is there in atomic test -> if no then add Cell2 and Cell1 value to the output
  3. I would like to update the newly added Cell1 and Cell2 values in the output to the atomictest variable so that if I come through the same Cell2 value the next time I need to skip it. How to do this??

The output which I get:

<test>
    <Cell1>John</Cell1>
    <Cell2>A</Cell2>
</test>
<test>
    <Cell1>John</Cell1>
    <Cell2>B</Cell2>
</test>
<test>
    <Cell1>Jade</Cell1>
    <Cell2>Y</Cell2>
</test>
<test>
    <Cell1>John</Cell1>
    <Cell2>C</Cell2>
</test>
<test>
    <Cell1>Jade</Cell1>
    <Cell2>X</Cell2>
</test>
<test>
    <Cell1>John</Cell1>
    <Cell2>C</Cell2>
</test>
<test>
    <Cell1>John</Cell1>
    <Cell2>D</Cell2>
</test>
<test>
    <Cell1>Jade</Cell1>
    <Cell2>Y</Cell2>
</test>

Resulting output should look like the following:

<test>
        <Cell1>John</Cell1>
        <Cell2>A</Cell2>
    </test>
    <test>
        <Cell1>John</Cell1>
        <Cell2>B</Cell2>
    </test>
    <test>
        <Cell1>Jade</Cell1>
        <Cell2>Y</Cell2>
    </test>
    <test>
        <Cell1>John</Cell1>
        <Cell2>C</Cell2>
    </test>
    <test>
        <Cell1>Jade</Cell1>
        <Cell2>X</Cell2>
    </test>
    <test>
        <Cell1>John</Cell1>
        <Cell2>D</Cell2>
    </test>
Shil
  • 211
  • 1
  • 3
  • 11
  • 1
    XSLT is a functional language. Among other things, this means that variables cannot be updated. Libraries of XSLT templates/functions exist that solve very challenging tasks with amazing simplicity, that are probably simpler, more understandable and maintainable and not less efficient than respective imperative language solutions. The result you want to produce can be generated with a fully functional code, not updating a variable. But please, provide a *short* example so that people would have time to read it and to work on it. – Dimitre Novatchev Oct 01 '12 at 12:42
  • Should the last expected elements be Jimmy/C, Jimmy/B and then John/B, instead of Jimmy/D ? Can you look again? You listed expected output doesn't make sense if "D" is included. Where did "D" come from? It is not even in the input document. – Sean B. Durkin Oct 01 '12 at 13:36
  • Ok. Getting better. But why is John/B excluded from your expected output? – Sean B. Durkin Oct 01 '12 at 13:47
  • forgot to update the output.Done it now! – Shil Oct 01 '12 at 14:08

2 Answers2

1

Variables are read-only in XSLT. That is, you can aasign them only once. After that they are read-only.

Luixv
  • 8,590
  • 21
  • 84
  • 121
1

This XSLT 2.0 style-sheet...

<xsl:stylesheet version="2.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:temp="http://stackoverflow.com/questions/12673307"
  exclude-result-prefixes="xsl temp">
<xsl:output omit-xml-declaration="yes" indent="yes" />
<xsl:strip-space elements="*" />  

<xsl:variable name="phase-1-output">
  <temp:tests>
    <xsl:apply-templates select="/*/test" mode="phase-1" />
  </temp:tests>
</xsl:variable>

<xsl:variable name="phase-2-output">
  <xsl:apply-templates select="$phase-1-output" mode="phase-2" />
</xsl:variable>

<xsl:template match="/">
 <xsl:copy-of select="$phase-2-output"/>
</xsl:template>

<xsl:template match="*" mode="phase-1" />

<xsl:template match="test[Cell1!=''][Cell2!='']" mode="phase-1">
  <xsl:variable name="cell2" select="tokenize(Cell2,',')" />
  <xsl:for-each select="tokenize(Cell1,',')" >
    <xsl:variable name="cell1-pos" select="position()" />
    <xsl:variable name="cell1" select="." />
    <xsl:for-each select="tokenize($cell2[$cell1-pos],' ')">
      <temp:test>
        <temp:Cell1><xsl:value-of select="$cell1" /></temp:Cell1>
        <temp:Cell2><xsl:value-of select="." /></temp:Cell2>
      </temp:test>
    </xsl:for-each>
  </xsl:for-each>
</xsl:template>

<xsl:template match="temp:tests" mode="phase-2">
  <xsl:for-each-group select="temp:test" group-by="concat(temp:Cell1,'|',temp:Cell2)">
    <test>
      <Cell1><xsl:value-of select="substring-before(current-grouping-key(),'|')" /></Cell1>
      <Cell2><xsl:value-of select="substring-after(current-grouping-key(),'|')" /></Cell2>
    </test>
  </xsl:for-each-group>
</xsl:template>

</xsl:stylesheet>

...will transform this input...

<sample>
    <test>
        <Cell1>John</Cell1>
        <Cell2>A</Cell2>
        <Cell4>xy</Cell4>
    </test>
    <test>
        <Cell1>John</Cell1>
        <Cell2>B</Cell2>
        <Cell6>10</Cell6>
    </test>
    <test>
        <Cell1>John,Jade</Cell1>
        <Cell2>A,Y</Cell2>
        <Cell4>1</Cell4>
    </test>
    <test>
        <Cell1>John,Jade</Cell1>
        <Cell2>A C,X</Cell2>
    </test>
    <test>
        <Cell1>John,Jade</Cell1>
        <Cell2>C D,Y</Cell2>
    </test>
    <test>
        <Cell1>John</Cell1>
        <Cell2>A B</Cell2>
        <Cell4>xy</Cell4>
    </test>
</sample>

...into...

<test>
   <Cell1>John</Cell1>
   <Cell2>A</Cell2>
</test>
<test>
   <Cell1>John</Cell1>
   <Cell2>B</Cell2>
</test>
<test>
   <Cell1>Jade</Cell1>
   <Cell2>Y</Cell2>
</test>
<test>
   <Cell1>John</Cell1>
   <Cell2>C</Cell2>
</test>
<test>
   <Cell1>Jade</Cell1>
   <Cell2>X</Cell2>
</test>
<test>
   <Cell1>John</Cell1>
   <Cell2>D</Cell2>
</test>

Alternative solution

Here is an alternative single phase solution. It is simpler, but less adaptable.

<xsl:stylesheet version="2.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output omit-xml-declaration="yes" indent="yes" />
<xsl:strip-space elements="*" />  

<xsl:template match="/">
  <xsl:for-each select="
     distinct-values(
       for $t in /*/test,
           $p1 in 1 to  count( tokenize($t/Cell1,',')),
           $cell1 in           tokenize($t/Cell1,',')[$p1],
           $cell2 in tokenize( tokenize($t/Cell2,',')[$p1], ' ') return
               concat($cell1,'|',$cell2))">
    <test>
      <Cell1><xsl:value-of select="substring-before(.,'|')" /></Cell1>
      <Cell2><xsl:value-of select="substring-after( .,'|')" /></Cell2>
    </test>
  </xsl:for-each>  
</xsl:template>

</xsl:stylesheet>

Note

Both solutions rely on the following assumptions:

  1. Both Cell1 and Cell2 have the same count of commas.
  2. Cell1 will never contain the pipe ('|') character.
Sean B. Durkin
  • 12,659
  • 1
  • 36
  • 65