2

I am writing XSLT filters to import XML data into Libreoffice-Calc, and then export it back to XML after making modifications in Libreoffice.

In the source XML, some of the fields contain multiple lines. When I import these into Libreoffice-Calc using my custom script, these are preserved and appear correctly in the spreadsheet. However, when I export the data back to the xml file, the line breaks are removed.

These line breaks are an important part of the data that I need to preserve. How can I get them to show up in the output?

Some things I've already tried:

  • Using 'copy-of' instead of 'value-of'
  • Setting the disable-output-escaping attribute for value-of
  • Applying the fn:data() function to the select statement of copy-of, (got a "function data not found" error)

Here is an example:

import.xslt

<?xml version="1.0" encoding="UTF-8"?>

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  <xsl:template match="/">
    <office:document-content xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" office:version="1.0">

      <office:body>
        <office:spreadsheet>
          <table:table>

            <!-- Rows -->
            <xsl:for-each select="top/row">
              <table:table-row>

                <table:table-cell>
                  <text:p><xsl:value-of select="column_1"/></text:p>
                </table:table-cell>

                <table:table-cell>
                  <text:p><xsl:value-of select="column_2"/></text:p>
                </table:table-cell>

              </table:table-row>
            </xsl:for-each>

          </table:table>
        </office:spreadsheet>
      </office:body>
    </office:document-content>
  </xsl:template>
</xsl:stylesheet>

export.xslt

<?xml version="1.0" encoding="UTF-8"?>

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
  xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0"
  xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
  exclude-result-prefixes="office table text">

  <xsl:output method = "xml" indent = "yes" encoding = "UTF-8" omit-xml-declaration = "no"/>

  <xsl:template match="office:spreadsheet">
    <xsl:for-each select="table:table/table:table-row">
      <row>
        <xsl:for-each select="table:table-cell">
          <xsl:choose>

            <xsl:when test="position()=1">
              <column_1><xsl:value-of select="."/></column_1>
            </xsl:when>

            <xsl:when test="position()=2">
              <column_2><xsl:value-of select="."/></column_2>
            </xsl:when>

          </xsl:choose>
        </xsl:for-each>
      </row>
    </xsl:for-each>
  </xsl:template>

<xsl:template match="/">
  <top>
    <xsl:apply-templates select="//office:spreadsheet"/>
  </top>
</xsl:template>

</xsl:stylesheet>

xml_file.xml

<?xml version="1.0" encoding="UTF-8"?>
<top>
  <row>
    <column_1>datum 1</column_1>
    <column_2>datum 2</column_2>
  </row>
  <row>
    <column_1>datum 3
    datum 4 (should appear on line below datum 3)</column_1>
    <column_2> datum 5 </column_2>
  </row>
</top>

After opening xml_file.xml in Libreoffice-Calc (using the import file), making some trivial change, and then exporting it back to xml_file.xml:

expected_output.xml

<?xml version="1.0" encoding="UTF-8"?>
<top>
  <row>
    <column_1>datum 1</column_1>
    <column_2>datum 2</column_2>
  </row>
  <row>
    <column_1>datum 3
    datum 4 (should appear on line below datum 3)</column_1>
    <column_2> datum 5 </column_2>
  </row>
</top>

actual_output.xml

<?xml version="1.0" encoding="UTF-8"?>
<top>
  <row>
    <column_1>datum 1</column_1>
    <column_2>datum 2</column_2>
  </row>
  <row>
    <column_1>datum 3 datum 4 (should appear on line below datum 3)</column_1>
    <column_2> datum 5 </column_2>
  </row>
</top>

EDIT:

Ok, I have something that works:

<xsl:stylesheet version="1.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
  xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0"
  xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
  exclude-result-prefixes="office table text">
  <xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>

  <xsl:template match="/office:document">
    <top>
      <xsl:for-each select="office:body/office:spreadsheet/table:table/table:table-row">
        <row>
          <xsl:for-each select="table:table-cell">
            <xsl:element name="column_{position()}">
              <xsl:for-each select="text:p">
                <xsl:apply-templates/>
                <xsl:if test="position() != last()">
                  <xsl:text>&#10;</xsl:text>
                </xsl:if>
              </xsl:for-each>
            </xsl:element>
          </xsl:for-each>
        </row>
      </xsl:for-each>
    </top>
  </xsl:template>

  <xsl:template match="text:line-break">
    <xsl:text>&#10;</xsl:text>
  </xsl:template>

</xsl:stylesheet>

This is a modified version of michael.hor257k's answer, which correctly solved the problem of exporting line breaks that are inserted in LibreOffice. The second part is the line-break template match, which takes care of line breaks that already existed in the imported xml.

Ananda
  • 23
  • 4
  • Does it matter that the file is imported? Do you get a different result if you insert the line breaks in LibreOffice itself? – michael.hor257k Jun 05 '19 at 21:37
  • The result of inserting the line breaks in LibreOffice is the same. The purpose of the script is to provide an easy way to edit existing xml files using LibreOffice, so the data does have to be imported. – Ananda Jun 06 '19 at 02:13
  • Okay, but if the result is the same when the line breaks are inserted in LibreOffice , then clearly the issue is on the export side and the import is irrelevant. – michael.hor257k Jun 06 '19 at 05:38
  • Yes, I guess the import is not relevant to the problem. It still helps with reproducing the issue, though. – Ananda Jun 06 '19 at 12:31
  • 1
    Actually it turns out there is an important difference between imports and inserting lines. When there are line breaks in the import, they show up as , whereas when lines are inserted in LibreOffice, they show up as separate blocks. – Ananda Jun 06 '19 at 13:37

1 Answers1

0

I am not in a position to test this now, but try this as your export XSLT:

XSLT 1.0

<xsl:stylesheet version="1.0" 
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0"
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
exclude-result-prefixes="office table text">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>

<xsl:template match="/office:document">
    <top>
        <xsl:for-each select="office:body/office:spreadsheet/table:table/table:table-row">
            <row>
                <xsl:for-each select="table:table-cell">
                    <xsl:element name="column_{position()}">
                        <xsl:for-each select="text:p">
                            <xsl:value-of select="."/>
                            <xsl:if test="position() != last()">
                                <xsl:text>&#10;</xsl:text>
                            </xsl:if>
                        </xsl:for-each>
                    </xsl:element>
                </xsl:for-each>
            </row>
        </xsl:for-each>
    </top>
</xsl:template>

</xsl:stylesheet>
michael.hor257k
  • 113,275
  • 6
  • 33
  • 51
  • Thanks! I can't test it right now, but I'll try it tomorrow and let you know if it works. – Ananda Jun 06 '19 at 02:14
  • Unfortunately, this gives the same result as before (datum 4 is moved to the same line as datum 3) – Ananda Jun 06 '19 at 12:30
  • I think the line breaks do not divide a column element into multiple elements, so for-each is only iterating over a single element. When I do copy-of, I get: datum 3 datum 4 (should appear on line below datum 3) (after removing a long list of attributes from ) – Ananda Jun 06 '19 at 12:39
  • I spoke too soon - this solution works perfectly when line breaks are inserted in LibreOffice. It only fails to work on line breaks that already exist in the imported xml because those are represented differently ( as opposed to a separate block) – Ananda Jun 06 '19 at 13:40
  • Thanks. This was really helpful. – Ananda Jun 06 '19 at 13:52
  • Well, you have two options: either replace `` with a linefeed character on the export, or - preferably, IMHO - make the import tokenize the paragraphs into `text:p` elements, so that they match the "native" ones. – michael.hor257k Jun 06 '19 at 14:12