1

I parse from this html table:

<table align="center">
   <tbody>
      <!-- riadok -->
      <tr>
         <td valign="middle" align="right">
            <form action="130427_0i.htm" method="get">
               <input type="submit" class="button" title="uvedení do první modlitby dne" value="Inv.">
            </form>
         </td>
         <td valign="middle" align="center">
            <form action="130427_0c.htm" method="get">
               <input type="submit" class="button" title="modlitba se čtením" value="Čtení">
            </form>
         </td>
         <td valign="middle" align="left">
            <form action="130427_0r.htm" method="get">
               <input type="submit" class="button" title="ranní chvály" value="Ranní chvály">
            </form>
         </td>
      </tr>
      <!-- riadok -->
      <tr>
         <td valign="middle" align="right">
            <form action="130427_09.htm" method="get">
               <input type="submit" class="button" title="modlitba dopoledne" value="9h">
            </form>
            <form action="130427_09d.htm" method="get">
               <input type="submit" class="button" title="modlitba dopoledne (žalmy z doplňovacího cyklu)" value="(alt)">
            </form>
         </td>
         <td valign="middle" align="center">
            <form action="130427_02.htm" method="get">
               <input type="submit" class="button" title="modlitba v poledne" value="12h">
            </form>
            <form action="130427_02d.htm" method="get">
               <input type="submit" class="button" title="modlitba v poledne (žalmy z doplňovacího cyklu)" value="(alt)">
            </form>
         </td>
         <td valign="middle" align="left">
            <form action="130427_03.htm" method="get">
               <input type="submit" class="button" title="modlitba odpoledne" value="15h">
            </form>
            <form action="130427_03d.htm" method="get">
               <input type="submit" class="button" title="modlitba odpoledne (žalmy z doplňovacího cyklu)" value="(alt)">
            </form>
         </td>
      </tr>
      <!-- riadok -->
      <tr>
         <td align="right">
            <form action="130427_0v.htm" method="get">
               <input type="submit" class="button" title="nešpory" value="Nešpory">
            </form>
         </td>
         <td valign="middle" align="center">
            <form action="130427_0k.htm" method="get">
               <input type="submit" class="button" title="kompletář" value="Kompl.">
            </form>
         </td>
      </tr>
      <!-- riadok -->
      <tr>
         <td align="right"></td>
      </tr>
   </tbody>
</table>

And I need to get every form (with input) in one HtmlNode. For example this:

<form action="130427_0c.htm" method="get">
               <input type="submit" class="button" title="modlitba se čtením" value="Čtení">
 </form>

With my code I get only this:

<form action="130427_0c.htm" method="get">

My code:

public static class FromHtmlTableToHtmlNodeList
    {
        static List<List<HtmlNode>> tableOfNode = new List<List<HtmlNode>>();

        public static List<List<HtmlNode>> Do(string htmltable)
        {
            var doc = new HtmlDocument();
            doc.LoadHtml(htmltable);

            HtmlNodeCollection rows = doc.DocumentNode.SelectNodes(".//tr");
            for (int i = 0; i < rows.Count; i++)
            {
                int i2 = tableOfNode.Count;
                HtmlNodeCollection cols = rows[i].SelectNodes("./td");

                for (int j = 0; j < cols.Count; j++)
                {

                    HtmlNodeCollection inCols = cols[j].SelectNodes("./form/descendant-or-self::*");
                    List<HtmlNode> nextRow = new List<HtmlNode>();

                    if (inCols != null)
                    {
                        for (int k = 0; k < inCols.Count; k++)
                        {
                            if (tableOfNode.Count < i2+k + 1)
                            {
                                tableOfNode.Add(nextRow);

                            }
                            if (tableOfNode[i2 + k].Count < j + 1) tableOfNode[i2 + k].Insert(j, inCols[k]);

                        }
                    }                                   
                }


            }

            return tableOfNode;
        }



    }

I know that problem is there:

HtmlNodeCollection inCols = cols[j].SelectNodes("./form/descendant-or-self::*");

How should XPath looks like for what I want?

Dimitre Novatchev
  • 240,661
  • 26
  • 293
  • 431
Mikaèl
  • 233
  • 3
  • 13

2 Answers2

0

You're looking for the XPath expression

./form[input]

This returns all <form/> elements including their subtrees which contain at least one <input/> element.

Jens Erat
  • 37,523
  • 16
  • 80
  • 96
0

The FORM is treated specially by default by the Html Agility Pack. See here why: HtmlAgilityPack -- Does <form> close itself for some reason?

This code should get all the FORM elements:

HtmlDocument doc = new HtmlDocument();
HtmlNode.ElementsFlags.Remove("form");
doc.Load(myTestHtm);

foreach (var v in doc.DocumentNode.SelectNodes("//form"))
{
    Console.WriteLine(v.OuterHtml);
}
Community
  • 1
  • 1
Simon Mourier
  • 132,049
  • 21
  • 248
  • 298