XML is comprised of nodes, and there are many different kinds of nodes (elements, attributes, text, namespaces, processing instructions, comments, documents, etc).
An XML element node that contains text content will have a child node named #text
. This is dictated by the XML specification. So, in your example, grandchild1
, grandchild2
, grandchild3
, and grandchild4
all have a child #text
node, eg:
Document
|
|_ PI: <?xml version="1.0" encoding="utf-8"?>
|
|_ Element: "Parent"
|
|_ Element: "child1"
| |
| |_ Element: "grandchild1"
| | |
| | |_ #text "someinfo1"
| |
| |_ Element: "grandchild2"
| |
| |_ #text "someinfo2"
|
|_ Element: "child2"
|
|_ Element: "grandchild3"
| |
| |_ #text: "someinfo3"
|
|_ Element: "grandchild4"
|
|_ #text: "someinfo4"
Even whitespace between elements, even if just line breaks, get stored as extra text nodes (because you are setting the preserveWhiteSpace
option to true), eg:
Document
|
|_ PI: <?xml version="1.0" encoding="utf-8"?>
|
|_ #text "\r\n"
|
|_ Element: "Parent"
|
|_ #text "\r\n "
|
|_ Element: "child1"
| |
| |_ #text "\r\n "
| |
| |_ Element: "grandchild1"
| | |
| | |_ #text "someinfo1"
| |
| |_ #text "\r\n "
| |
| |_ Element: "grandchild2"
| |
| |_ #text "someinfo2"
|
|_ #text "\r\n "
|
|_ Element: "child2"
| |
| |_ #text "\r\n "
| |
| |_ Element: "grandchild3"
| | |
| | |_ #text: "someinfo3"
| |
| |_ #text "\r\n "
| |
| |_ Element: "grandchild4"
| | |
| | |_ #text: "someinfo4"
| |
| |_ #text "\r\n "
|
|_ #text "\r\n"
XPath searches all nodes, but the *
wildcard only matches element nodes. But you are manually drilling into the children of found elements, so you are going to encounter the #text
nodes. For what you are attempting to do, turn OFF whitespace preservation to remove unwanted whitespace text nodes, and then focus only on element child nodes, eg:
IXMLDOMDocument *pXMLDom = NULL;
IXMLDOMNodeList *pNodes = NULL;
IXMLDOMNode *pNode = NULL;
long length = 0;
// create pXMLDom as needed ...
pXMLDom->put_async(VARIANT_FALSE);
pXMLDom->put_validateOnParse(VARIANT_TRUE);
pXMLDom->put_resolveExternals(VARIANT_FALSE);
pXMLDom->put_preserveWhiteSpace(VARIANT_FALSE); // <--
BSTR parentNode = SysAllocString(L"//Parent/*");
HRESULT hRes = pXMLDom->selectNodes(parentNode, &pNodes);
SysFreeString(parentNode);
if (SUCCEEDED(hRes))
{
pNodes->get_length(&length);
for (int i = 0; i < length; ++i)
{
hRes = pNodes->get_item(i, &pNode);
if (SUCCEEDED(hRes))
{
BSTR name = NULL;
hRes = pNode->get_nodeName(&name);
if (SUCCEEDED(hRes))
{
printf("Node (%d), <%S>:\n", i, name);
SysFreeString(name);
}
IXMLDOMNode *pChild = NULL;
hRes = pNode->get_firstChild(&pChild);
if (hRes == S_OK)
{
do
{
DOMNodeType type;
hRes = pChild->get_nodeType(&type);
if ((SUCCEEDED(hRes) && (type == NODE_ELEMENT))
{
hRes = pNode->get_nodeName(&name);
if (SUCCEEDED(hRes))
{
printf(" %S\n", name);
SysFreeString(name);
}
}
IXMLDOMNode *pSibling = NULL;
hRes = pChild->get_nextSibling(&pSibling);
if (hRes != S_OK) break;
pChild->Release();
pChild = pSibling;
}
while (true);
pChild->Release();
}
pNode->Release();
}
}
pNodes->Release();
}
...
pXMLDom->Release();
If you need to go more than 2 levels deep, you should setup a recursive loop instead, eg:
void processNode(IXMLDOMNode *pNode)
{
BSTR name = NULL;
hRes = pNode->get_nodeName(&name);
if (SUCCEEDED(hRes))
{
printf("%S\n", name);
SysFreeString(name);
}
IXMLDOMNode *pChild = NULL;
hRes = pNode->get_firstChild(&pChild);
if (hRes == S_OK)
{
do
{
DOMNodeType type;
hRes = pChild->get_nodeType(&type);
if ((SUCCEEDED(hRes) && (type == NODE_ELEMENT))
processNode(pChild);
IXMLDOMNode *pSibling = NULL;
hRes = pChild->get_nextSibling(&pSibling);
if (hRes != S_OK) break;
pChild->Release();
pChild = pSibling;
}
while (true);
pChild->Release();
}
}
...
IXMLDOMDocument *pXMLDom = NULL;
IXMLDOMNodeList *pNodes = NULL;
IXMLDOMNode *pNode = NULL;
long length = 0;
// create pXMLDom as needed ...
pXMLDom->put_async(VARIANT_FALSE);
pXMLDom->put_validateOnParse(VARIANT_TRUE);
pXMLDom->put_resolveExternals(VARIANT_FALSE);
pXMLDom->put_preserveWhiteSpace(VARIANT_FALSE); // <--
BSTR parentNode = SysAllocString(L"//Parent/*");
HRESULT hRes = pXMLDom->selectNodes(parentNode, &pNodes);
SysFreeString(parentNode);
if (SUCCEEDED(hRes))
{
pNodes->get_length(&length);
for (int i = 0; i < length; ++i)
{
hRes = pNodes->get_item(i, &pNode);
if (SUCCEEDED(hRes))
{
processNode(pNode);
pNode->Release();
}
}
pNodes->Release();
}
...
pXMLDom->Release();