This transformable HTML5:
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<table border="1">
<caption>Complex Table</caption>
<tbody>
<tr>
<th>Title</th>
<th>Volume</th>
<th>Chapter</th>
<th>Stds.</th>
<th>Dept.</th>
</tr>
<tr>
<td rowspan="6">STEM</td>
<td rowspan="1">1</td>
<td rowspan="2">1</td>
<td>1 to 10</td>
<td rowspan="2">Biology</td>
</tr>
<tr>
<td rowspan="1">2</td>
<td>20 to 30</td>
</tr>
<tr>
<td rowspan="1">3</td>
<td rowspan="1">2</td>
<td>40 to 60</td>
<td rowspan="1">Chemistry</td>
</tr>
<tr>
<td>4</td>
<td>3</td>
<td>70 to 80</td>
<td>Physics</td>
</tr>
<tr>
<td rowspan="4">5</td>
<td rowspan="1">4</td>
<td>80 to 120</td>
<td rowspan="1">Math</td>
</tr>
<tr>
<td rowspan="1">5</td>
<td>120 to 135</td>
<td rowspan="1">Geometry</td>
</tr>
</tbody>
</table>
<table border="1">
<caption>Simpler Table</caption>
<tbody>
<tr>
<th>Title</th>
<th>Volume</th>
<th>Chapter</th>
<th>Stds.</th>
<th>Dept.</th>
</tr>
<tr>
<td colspan="1" rowspan="3">Kinesiology</td>
<td>1</td>
<td>1</td>
<td>A to C</td>
<td>Strength</td>
</tr>
<tr>
<td>2</td>
<td>2 to 3</td>
<td>D to H</td>
<td>Agility</td>
</tr>
<tr>
<td>3</td>
<td>4</td>
<td>I to X</td>
<td>Flexibility</td>
</tr>
</tbody>
</table>
<table border="1">
<caption>Simplest Table</caption>
<tbody>
<tr>
<th>Title</th>
<th>Volume</th>
<th>Chapter</th>
<th>Stds.</th>
<th>Dept.</th>
</tr>
<tr>
<td>Skills</td>
<td>1</td>
<td>1</td>
<td>A to C</td>
<td>Keyboard</td>
</tr>
</tbody>
</table>
</body>
</html>
This desired output (if you view the rendered HTML, you can see the pattern of data wanted):
<?xml version="1.0" encoding="UTF-8"?>
<production>
<book title="STEM" volume="1"/>
<book title="STEM" volume="2"/>
<book title="STEM" volume="3"/>
<book title="STEM" volume="4"/>
<book title="STEM" volume="5"/>
<book title="Kinesiology" volume="1"/>
<book title="Kinesiology" volume="2"/>
<book title="Kinesiology" volume="3"/>
<book title="Skills" volume="1"/>
</production>
The not quite working transform:
<xsl:stylesheet
version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="xs">
<xsl:output method="xml" encoding="UTF-8" indent="yes" />
<xsl:template match="/">
<catalog>
<xsl:apply-templates/>
</catalog>
</xsl:template>
<xsl:template match="text()"/>
<!-- multi-volume edition -->
<xsl:template match="table">
<xsl:variable name="title" select="descendant::td[1]"/>
<xsl:variable name="context-td" select="."/>
<!-- the following needs work -->
<xsl:for-each select="descendant::tr/td[1][matches(.,'\d+$')]">
<book>
<xsl:attribute name="title" select="$title"/>
<xsl:attribute name="volume" select="."/>
</book>
</xsl:for-each>
</xsl:template>
<!-- single-volume edition -->
<xsl:template match="table[count(descendant::tr) < 3]">
<book>
<xsl:attribute name="title" select="descendant::td[1]"/>
<xsl:attribute name="volume" select="descendant::tr[2]/td[2]"/>
</book>
</xsl:template>
</xsl:stylesheet>
The xpath in for-each
needs work. I've tried various axis but haven't found one that works across all use cases.