1

I'm trying to a make nested list based on list-item's (same level), but having different label text, which will start-text of each list-item. I have done with some regex replaces to meet the nesting. I assume my code is not up to mark to meet the grouping|nesting.

IN.xml:

<?xml version="1.0" encoding="UTF-8"?>
<article>
   <p>The Simple list sample</p>
   <list-item>1. First</list-item>
   <list-item>2. Second</list-item>
   <list-item>3. Third</list-item>
   <p>The Nested list sample</p>
   <list-item>1. FirstLevel First Text</list-item>
   <list-item>1.1 SecondLevel First Text</list-item>
   <list-item>1.1.1 ThirdLevel First Text</list-item>
   <list-item>1.1.2 ThirdLevel Second Text</list-item>
   <list-item>1.2 SecondLevel Second Text</list-item>
   <list-item>2. FirstLevel Second Text</list-item>
   <list-item>2.1 SecondLevel First Text</list-item>
   <list-item>2.2 SecondLevel Second Text</list-item>
   <list-item>3. FirstLevel Third Text</list-item>
   <list-item>4. FirstLevel Fourth Text</list-item>
</article>

C# (tried code):

using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
using System.Xml;
using System.Xml.Linq;
using System.Xml.XPath;
using System.Linq;
using System.Linq.Expressions;

namespace ListNesting1
{
    class Program
    {
        static void Main(string[] args)
        {
            XmlDocument XMLDoc1 = new XmlDocument();
            XmlNodeList NDL1;
            XmlElement XEle1;
           
            String S1, S2, StrFinal, StrEle1;
            StreamReader SR1;
            StreamWriter SW1;

            try
            {
                SR1 = new StreamReader(args[0]);
                S1 = SR1.ReadToEnd();
                SR1.Close();
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
                return;
            }

            XMLDoc1.LoadXml(S1);

            NDL1 = XMLDoc1.SelectNodes("//list-item");

            for(int i=0; i<NDL1.Count; i++)
            {
                if (Regex.IsMatch(NDL1[i].InnerText, @"^[0-9]\. "))
                    {
                    StrEle1 = "List1";
                }
                
                else if (Regex.IsMatch(NDL1[i].InnerText, @"^[0-9]\.[0-9] "))
                {
                    StrEle1 = "List2";
                }
                else if (Regex.IsMatch(NDL1[i].InnerText, @"^[0-9]\.[0-9]\.[0-9] "))
                {
                    StrEle1 = "List3";
                }
                else
                {
                    StrEle1 = "List4";
                }
                XEle1 = XMLDoc1.CreateElement(StrEle1);
                S2 = NDL1[i].OuterXml;
                XEle1.InnerXml = S2;
                
                NDL1[i].ParentNode.InsertAfter(XEle1, NDL1[i]);
                NDL1[i].ParentNode.RemoveChild(NDL1[i]);
            }

            StrFinal = XMLDoc1.OuterXml;
            StrFinal = StrFinal.Replace("</List1><List1>", "");
            StrFinal = StrFinal.Replace("</List2><List2>", "");
            StrFinal = StrFinal.Replace("</List3><List3>", "");
            StrFinal = StrFinal.Replace("</List4><List4>", "");

            StrFinal = StrFinal.Replace("</list-item></List1><List2>", "<List2>");
            StrFinal = StrFinal.Replace("</list-item></List2><List3>", "<List3>");
            StrFinal = StrFinal.Replace("</list-item></List3><List4>", "<List4>");

            StrFinal = StrFinal.Replace("</List2><List1>", "</List2></list-item>");
            StrFinal = StrFinal.Replace("</List3><List2>", "</List3></list-item>");
            StrFinal = StrFinal.Replace("</List4><List3>", "</List4></list-item>");

            StrFinal = StrFinal.Replace("><", ">\n<");

            SW1 = new StreamWriter(args[1]);
            SW1.Write(StrFinal);
            SW1.Close();
        }
    }
}

Required XML:

<?xml version="1.0" encoding="UTF-8"?>
<article>
   <p>The Simple list sample</p>
   <List1>
      <list-item>1. First</list-item>
      <list-item>2. Second</list-item>
      <list-item>3. Third</list-item>
   </List1>
   <p>The Nested list sample</p>
   <List1>
      <list-item>1. FirstLevel First Text
         <List2>
            <list-item>1.1 SecondLevel First Text
               <List3>
                  <list-item>1.1.1 ThirdLevel First Text</list-item>
                  <list-item>1.1.2 ThirdLevel Second Text</list-item>
               </List3>
            </list-item>
            <list-item>1.2 SecondLevel Second Text</list-item>
         </List2>
      </list-item>
      <list-item>2. FirstLevel Second Text
         <List2>
            <list-item>2.1 SecondLevel First Text</list-item>
            <list-item>2.2 SecondLevel Second Text</list-item>
         </List2>
      </list-item>
      <list-item>3. FirstLevel Third Text</list-item>
      <list-item>4. FirstLevel Fourth Text</list-item>
   </List1>
</article>
Rudramuni TP
  • 1,268
  • 2
  • 16
  • 26
  • 6
    string.Replace!? No, solve it in an XML way. You need a stack of wrapper nodes (ListN) The logic is when a `list-item` found and its level is detected, compare the level with the level of top node in the stack, 1) if match insert the item to the node. 2) if greater (or stack is empty) push a new node at the level. 3) if less pop from the stack. – shingo Feb 08 '23 at 05:20
  • 2
    What is your question? – Peter Csala Feb 13 '23 at 07:20
  • 1
    See the first comment, or if it suits you better: deserialize into an object model, write mapping code to convert to the desired model and serialize again. – Gert Arnold Feb 14 '23 at 08:00

3 Answers3

2

C# code

using System;
using System.Text.RegularExpressions;

public class Example
{
    public static void Main()
    {
        string pattern1 = @"(<list-item>1\. [\s\S]*?</list-item>(?!\s+<list-item>\d))";
        string substitution1 = @"<list1>$1</list1>";

        string pattern2 = @"(<list-item>\d\.1 [\s\S]*?</list-item>(?!\s+<list-item>\d.\d))";
        string substitution2 = @"<list2>$1</list2>";


        string pattern3 = @"(<list-item>\d.\d\.1 [\s\S]*?</list-item>(?!\s+<list-item>\d.\d.\d))";
        string substitution3 = @"<list3>$1</list3>";


        string input = @"<?xml version=""1.0"" encoding=""UTF-8""?>
<article>
   <p>The Simple list sample</p>
   <list-item>1. First</list-item>
   <list-item>2. Second</list-item>
   <list-item>3. Third</list-item>
   <p>The Nested list sample</p>
   <list-item>1. FirstLevel First Text</list-item>
   <list-item>1.1 SecondLevel First Text</list-item>
   <list-item>1.1.1 ThirdLevel First Text</list-item>
   <list-item>1.1.2 ThirdLevel Second Text</list-item>
   <list-item>1.2 SecondLevel Second Text</list-item>
   <list-item>2. FirstLevel Second Text</list-item>
   <list-item>2.1 SecondLevel First Text</list-item>
   <list-item>2.2 SecondLevel Second Text</list-item>
   <list-item>3. FirstLevel Third Text</list-item>
   <list-item>4. FirstLevel Fourth Text</list-item>
</article>";

        Regex regex = new Regex(pattern1);
        input = regex.Replace(input, substitution1);


        Regex regex2 = new Regex(pattern2);
        input = regex2.Replace(input, substitution2);


        Regex regex3 = new Regex(pattern3);
        input = regex3.Replace(input, substitution3);
    }
}

output

<?xml version="1.0" encoding="UTF-8"?>
<article>
    <p>The Simple list sample</p>
    <list1>
        <list-item>1. First</list-item>
        <list-item>2. Second</list-item>
        <list-item>3. Third</list-item>
    </list1>
    <p>The Nested list sample</p>
    <list1>
        <list-item>1. FirstLevel First Text</list-item>
        <list2>
            <list-item>1.1 SecondLevel First Text</list-item>
            <list3>
                <list-item>1.1.1 ThirdLevel First Text</list-item>
                <list-item>1.1.2 ThirdLevel Second Text</list-item>
            </list3>
            <list-item>1.2 SecondLevel Second Text</list-item>
        </list2>
        <list-item>2. FirstLevel Second Text</list-item>
        <list2>
            <list-item>2.1 SecondLevel First Text</list-item>
            <list-item>2.2 SecondLevel Second Text</list-item>
        </list2>
        <list-item>3. FirstLevel Third Text</list-item>
        <list-item>4. FirstLevel Fourth Text</list-item>
    </list1>
</article>
Oliver Hao
  • 715
  • 3
  • 5
  • Thanks for the suggestion, I am expecting the answer from STACK or some other object methods to get the result. Even my Regex also getting the required results. Thank u, and plus one. – Rudramuni TP Feb 14 '23 at 10:05
  • Using RE to change the structure of structured data like XML is a terrible idea. As an example, an XML input file with no line breaks between `list-item` elements (e.g. `

    The Simple list sample

    1. First2. Second3. Third`) would be valid and equivalent and, as currently written, the RE implementation in this answer would fail to format it correctly.
    – Jonathan Dodds Feb 15 '23 at 13:55
  • @Oliver Hao, thanks for the answer in RE formats (I expected in STACKING or other object oriented methods). – Rudramuni TP Feb 16 '23 at 04:49
1

This is a task for XSLT e.g. XSLT 3 with

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:mf="http://example.com/mf"
    exclude-result-prefixes="#all"
    version="3.0">
  
  <xsl:function name="mf:group" as="node()*">
    <xsl:param name="items" as="map(*)*"/>
    <xsl:param name="level" as="xs:integer"/>
    <xsl:choose>
      <xsl:when test="exists($items[count(?levels) ge $level])">
        <xsl:element name="List{$level}">
        <xsl:for-each-group select="$items" group-starting-with=".[count(?levels) eq $level]">
          <xsl:copy select="?item">
            <xsl:apply-templates select="node()"/>
            <xsl:sequence select="mf:group(tail(current-group()), $level + 1)"/>
          </xsl:copy>
        </xsl:for-each-group>
        </xsl:element>        
      </xsl:when>
      <xsl:otherwise>
        <xsl:apply-templates select="$items?item"/>
      </xsl:otherwise>
    </xsl:choose>
  </xsl:function>

  <xsl:mode on-no-match="shallow-copy"/>

  <xsl:output method="xml" indent="yes"/>

  <xsl:template match="article">
    <xsl:copy>
      <xsl:for-each-group select="*" group-adjacent="boolean(self::list-item)">
        <xsl:choose>
          <xsl:when test="current-grouping-key()">
              <xsl:sequence select="mf:group(current-group()!map { 'item' : ., 'levels' : (. => substring-before(' ') => tokenize('\.'))[normalize-space()]}, 1)"/>
          </xsl:when>
          <xsl:otherwise>
            <xsl:apply-templates select="current-group()"/>
          </xsl:otherwise>
        </xsl:choose>
      </xsl:for-each-group>
    </xsl:copy>
  </xsl:template>
  
</xsl:stylesheet>

For the .NET framework Saxon HE (latest version for .NET framework is Saxon HE 10.8) is available as an open-source package https://www.nuget.org/packages/Saxon-HE on NuGet as well as an executable download https://github.com/Saxonica/Saxon-HE/tree/main/10/Dotnet to run XSLT 3.

On .NET Core 6/7 Saxonica currently only has its commercial SaxonCS enterprise version (https://www.nuget.org/packages/SaxonCS) available but I have managed to cross compile both Saxon HE 10.8 as well as Saxon HE 11 using IKVM to .NET Core so even there you have the option to run XSLT 3.0 without needing to buy a commercial license:

Martin Honnen
  • 160,499
  • 6
  • 90
  • 110
  • 2
    The primary answer I have given is an XSLT 3.0 stylesheet to produce the nested list. As the poster asked in the context of C#/.NET then I pointed out how to run XSLT 3.0 with .NET, with links to Saxonica's Saxon HE as the established way to run XSLT 3 with the .NET framework. On the other hand some people these days are wanting to run their C#/.NET code with .NET Core or .NET 6/7, as there the only established way is the commercial enterprise package SaxonCS from Saxonica I also posted links to alternative ways using open-source tools (I am affiliated with as the creator). – Martin Honnen Feb 14 '23 at 08:13
  • Thanks for the XSLT3 answer. I am looking for C# with some object methods. – Rudramuni TP Feb 14 '23 at 10:06
  • @RudramuniTP, the API you are using (XmlDocument) doesn't offer any particular grouping features; LINQ to XML has some to group by a certain value and it is also possible with some more effort implement something similar to XSLT 2/3's group-starting-with and/or group-adjacent, see https://blogs.msmvps.com/martin-honnen/2009/11/27/grouping-with-linq-to-xml/, for instance. – Martin Honnen Feb 14 '23 at 10:29
  • On the other hand, XSLT 2 and 3 or XQuery 3.1 have powerful grouping and/or windowing constructs for such tasks so you might consider delegating the grouping to them and using them from C#: https://www.saxonica.com/html/documentation10/dotnet/dotnetapi.html – Martin Honnen Feb 14 '23 at 10:29
1

Here's something you might want to play with. It doesn't match your required output (the <Listx> elements are not inside the previous <list-item> but it's pretty close.

using System.Xml.Linq;

var xml = XmlString();

var s = new MemoryStream(System.Text.Encoding.UTF8.GetBytes(xml));
var x = XElement.Load(s);

XElement rootArticle = new ("article");
XElement parentElement = rootArticle;

List<int> currentLevel = new ();
foreach (var currentElement in x.Descendants())
{
  // When not a list-item, unwind
  if (currentElement.Name != "list-item")
  {
    while (currentLevel.Count > 0)
    {
      parentElement = parentElement.Parent;
      currentLevel.RemoveAt(currentLevel.Count - 1);
    }
    parentElement!.Add(currentElement);
    continue;
  }

  var headertext = (currentElement.FirstNode as XText)?.Value ?? string.Empty;
  List<int> previousLevel = currentLevel;
  currentLevel = headertext[..(headertext + " ").IndexOf(' ')].TrimEnd('.').Split('.').Select(x => { var _ = int.TryParse(x, out var n); return n; }).ToList();
  // If current level is in same sequence
  if (currentLevel.Count > 0 && currentLevel.Count >= previousLevel.Count && Enumerable.Range(0, previousLevel.Count).All(i => currentLevel[i] >= previousLevel[i]))
  {
    // Add required lists to match header depth
    for (int i = 0; i < currentLevel.Count - previousLevel.Count; i++)
    {
      XElement listElement = new ($"List{i + previousLevel.Count + 1}");
      parentElement.Add(listElement);
      parentElement = listElement;
    }
    parentElement.Add(currentElement);
    continue;
  }

  // Go back to parent with matching depth
  var depth = previousLevel.Take(currentLevel.Count).Where((n, i) => n <= currentLevel[i]).Count();
  for (int i=depth; i < previousLevel.Count; i++)
  {
    parentElement = parentElement!.Parent;
  }

  // Add required lists to match header depth
  for (int i = depth; i < currentLevel.Count; i++)
  {
    XElement listElement = new ($"List{i + 1}");
    parentElement!.Add(listElement);
    parentElement = listElement;
  }
  parentElement!.Add(currentElement);
}

Console.WriteLine(rootArticle.ToString());

// <article>
//   <p>The Simple list sample</p>
//   <List1>
//     <list-item>1. First</list-item>
//     <list-item>2. Second</list-item>
//     <list-item>3. Third</list-item>
//   </List1>
//   <p>The Nested list sample</p>
//   <List1>
//     <list-item>1. FirstLevel First Text</list-item>
//     <List2>
//       <list-item>1.1 SecondLevel First Text</list-item>
//       <List3>
//         <list-item>1.1.1 ThirdLevel First Text</list-item>
//         <list-item>1.1.2 ThirdLevel Second Text</list-item>
//       </List3>
//       <list-item>1.2 SecondLevel Second Text</list-item>
//     </List2>
//     <list-item>2. FirstLevel Second Text</list-item>
//     <List2>
//       <list-item>2.1 SecondLevel First Text</list-item>
//       <list-item>2.2 SecondLevel Second Text</list-item>
//     </List2>
//     <list-item>3. FirstLevel Third Text</list-item>
//     <list-item>4. FirstLevel Fourth Text</list-item>
//   </List1>
// </article>

static string XmlString() => @"<?xml version=""1.0"" encoding=""UTF-8""?>
<article>
   <p>The Simple list sample</p>
   <list-item>1. First</list-item>
   <list-item>2. Second</list-item>
   <list-item>3. Third</list-item>
   <p>The Nested list sample</p>
   <list-item>1. FirstLevel First Text</list-item>
   <list-item>1.1 SecondLevel First Text</list-item>
   <list-item>1.1.1 ThirdLevel First Text</list-item>
   <list-item>1.1.2 ThirdLevel Second Text</list-item>
   <list-item>1.2 SecondLevel Second Text</list-item>
   <list-item>2. FirstLevel Second Text</list-item>
   <list-item>2.1 SecondLevel First Text</list-item>
   <list-item>2.2 SecondLevel Second Text</list-item>
   <list-item>3. FirstLevel Third Text</list-item>
   <list-item>4. FirstLevel Fourth Text</list-item>
</article>";

agbinfo
  • 793
  • 5
  • 17