2

I am looking for a way to find the first result level in a XML tree using linq. The XML I have is like the following:

<column>
    <row>
        <object/>
        <column column-id="1" column-name="abc">
            <row>
                <column>
                    <row>
                        <column column-id="2" column-name="abc"/>
                    </row>
                </column>
            </row>
        </column>
    </row>
    <row>
        <column column-id="3" column-name="abc">
            <row>
                <column/>
            </row>
        </column>
    </row>
</column>

Now I want to get all the first level columns where the column-name is abc. So the result should be:

<column column-id="1" column-name="abc">...</column>
<column column-id="3" column-name="abc">...</column>

I have tried already the following code:

layout.Descendants("column")
      .Where(x => x.Attribute("column-name").Value.Equals("abc") && !x.Ancestors("column").Any());

This works fine when the XElement layout being searched is not named "column" and is not nested inside any container elements named "column". But my XElement does, in fact, belong inside a document whose root element is named "column", so the x.Ancestors("column").Any() expression wrongly filters out all matches. I.e. the problem can be reproduced using the XML string above by initializing layout as follows:

var layout = XElement.Parse(xmlString);

I want to keep the relation in the variable because of changes I have to make later on.

Is there maybe a way limit the ancestors selector?

dbc
  • 104,963
  • 20
  • 228
  • 340
P.Ms.
  • 63
  • 7

2 Answers2

1

Another way to state the question is that you want your predicate to include the distance from the document root.

Here is a function that does that:

static int DistanceToRoot(XElement elem, XElement root)
{
    var dist = 0;

    var curr = elem;

    while(curr != root)
    {
        dist++;
        curr = curr.Parent;
    }

    return dist;
}

And you use it like so (based on your example, the distance that we want is 2):

var columns = from column in xml.Descendants("column")
              where 
                  DistanceToRoot(column, xml.Root) == 2  &&
                  column.Attribute("column-name").Value == "abc"
              select column;


 foreach(var abc in xyzs)
 {
     Console.WriteLine(abc);
     Console.Write("Distance is: ");
     Console.WriteLine(DistanceToRoot(abc, xml.Root));
     Console.ReadLine();
 }

Which results in:

<column column-id="1" column-name="abc">
  <row>
    <column>
      <row>
        <column column-id="2" column-name="abc" />
      </row>
    </column>
  </row>
</column>
Distance is: 2

<column column-id="3" column-name="abc">
  <row>
    <column />
  </row>
</column>
Distance is: 2

Rextester Demo.

Rodrick Chapman
  • 5,437
  • 2
  • 31
  • 32
1

Assuming you don't know in advance the precise depth of the elements for which you are querying, what you want to do is to descend the element hierarchy underneath a specified element, and return the topmost elements that match a given condition, in this case having the name "column".

As a quick-and-dirty way to do this, you can only check for ancestors of the candidate matched column that are still descendants of layout by using TakeWhile()

var matches = layout
    .Descendants("column")
    .Where(x => (string)x.Attribute("column-name") == "abc" && !x.Ancestors().TakeWhile(a => a != layout).Any(a => a.Name == "column"));

A more performant, general solution would be to introduce an extension method on XElement that enumerates through all descendants of the given element, returning the topmost elements that match a given predicate. This would be generally useful e.g. in cases where one wants to query for descendants that are going to be near the top of a deep XML hierarchy, as it avoids descending unnecessarily into matched nodes:

public static partial class XElementExtensions
{
    /// <summary>
    /// Enumerates through all descendants of the given element, returning the topmost elements that match the given predicate
    /// </summary>
    /// <param name="root"></param>
    /// <param name="filter"></param>
    /// <returns></returns>
    public static IEnumerable<XElement> DescendantsUntil(this XElement root, Func<XElement, bool> predicate, bool includeSelf = false)
    {
        if (predicate == null)
            throw new ArgumentNullException();
        return GetDescendantsUntil(root, predicate, includeSelf);
    }

    static IEnumerable<XElement> GetDescendantsUntil(XElement root, Func<XElement, bool> predicate, bool includeSelf)
    {
        if (root == null)
            yield break;
        if (includeSelf && predicate(root))
        {
            yield return root;
            yield break;
        }
        var current = root.FirstChild<XElement>();
        while (current != null)
        {
            var isMatch = predicate(current);
            if (isMatch)
                yield return current;

            // If not a match, get the first child of the current element.
            XElement next = (isMatch ? null : current.FirstChild<XElement>());

            if (next == null)
                // If no first child, get the next sibling of the current element.
                next = current.NextSibling<XElement>();

            // If no more siblings, crawl up the list of parents until hitting the root, getting the next sibling of the lowest parent that has more siblings.
            if (next == null)
            {
                for (var parent = current.Parent as XElement; parent != null && parent != root && next == null; parent = parent.Parent as XElement)
                {
                    next = parent.NextSibling<XElement>();
                }
            }

            current = next;
        }
    }

    public static TNode FirstChild<TNode>(this XNode node) where TNode : XNode
    {
        var container = node as XContainer;
        if (container == null)
            return null;
        return container.FirstNode.NextSibling<TNode>(true);
    }

    public static TNode NextSibling<TNode>(this XNode node) where TNode : XNode
    {
        return node.NextSibling<TNode>(false);
    }

    public static TNode NextSibling<TNode>(this XNode node, bool includeSelf) where TNode : XNode
    {
        if (node == null)
            return null;
        for (node = (includeSelf ? node : node.NextNode); node != null; node = node.NextNode)
        {
            var nextTNode = node as TNode;
            if (nextTNode != null)
                return nextTNode;
        }
        return null;
    }
}

Then use it like:

var matches = layout
    .DescendantsUntil(x => x.Name == "column")
    .Where(x => (string)x.Attribute("column-name") == "abc");

The extension method should be reasonably performant as it avoids recursion and complex nested linq queries.

Sample .Net fiddle showing both options.

dbc
  • 104,963
  • 20
  • 228
  • 340