I am reading a .docx file using OpenXML in C#. It reads everything correctly but strangely, the content of textbox is being read thrice. What could be wrong? Here is the code to read .docx:
public static string TextFromWord(String file)
{
const string wordmlNamespace = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
StringBuilder textBuilder = new StringBuilder();
using (WordprocessingDocument wdDoc = WordprocessingDocument.Open(file, false))
{
// Manage namespaces to perform XPath queries.
NameTable nt = new NameTable();
XmlNamespaceManager nsManager = new XmlNamespaceManager(nt);
nsManager.AddNamespace("w", wordmlNamespace);
// Get the document part from the package.
// Load the XML in the document part into an XmlDocument instance.
XmlDocument xdoc = new XmlDocument(nt);
xdoc.Load(wdDoc.MainDocumentPart.GetStream());
XmlNodeList paragraphNodes = xdoc.SelectNodes("//w:p", nsManager);
foreach (XmlNode paragraphNode in paragraphNodes)
{
XmlNodeList textNodes = paragraphNode.SelectNodes(".//w:t", nsManager);
foreach (System.Xml.XmlNode textNode in textNodes)
{
textBuilder.Append(textNode.InnerText);
}
textBuilder.Append(Environment.NewLine);
}
}
return textBuilder.ToString();
}
The part of file I am talking about is:
The result is: I read it in a test application like this:
What's wrong here?