If you are parsing word document. I'll suggest you to use OpenXMlpowertool by Eric white, download it from NuGet package manager or download directly from net.
Here is the code snippet i have used to parse document , code snippet is very small, clean and stable. You must first debug it to understand it working which will help you to customize it for yourself. it will read all text, paragraphs , bullets and contents etc. go through the documentation of Eric White for more details but below code snippet is the most you'll need to parse and top of it you can build your functionality.
using DocumentFormat.OpenXml.Packaging;
using OpenXmlPowerTools;
private static WordprocessingDocument _wordDocument;
_wordDocument = WordprocessingDocument.Open(wordFileStream, false); // stream wordFileStream in constructor
// To get header and footer use this
var headerList = _wordDocument.MainDocumentPart.HeaderParts.ToList();
var footerList = _wordDocument.MainDocumentPart.FooterParts.ToList();
private void GetDocumentBodyContents()
{
List<string> allList = new List<string>();
List<string> allListText = new List<string>();
try
{
//RevisionAccepter.AcceptRevisions(_wordDocument);
XElement root = _wordDocument.MainDocumentPart.GetXDocument().Root;
XElement body = root.LogicalChildrenContent().First();
OutputBlockLevelContent(_wordDocument, body);
}
catch (Exception ex)
{ }
}
private void OutputBlockLevelContent(WordprocessingDocument wordDoc, XElement blockLevelContentContainer)
{
try
{
string currentItem = string.Empty, currentItemText = string.Empty, numberText = string.Empty;
foreach (XElement blockLevelContentElement in
blockLevelContentContainer.LogicalChildrenContent())
{
if (blockLevelContentElement.Name == W.p)
{
currentItem = ListItemRetriever.RetrieveListItem(wordDoc, blockLevelContentElement);
//currentItemText = blockLevelContentElement
// .LogicalChildrenContent(W.r)
// .LogicalChildrenContent(W.t)
// .Select(t => (string)t)
// .StringConcatenate();
currentItemText = blockLevelContentElement
.LogicalChildrenContent(W.r)
.Select(t =>
{
if (t.LogicalChildrenContent(W.br).Count() > 0)
{
//Adding line Break for Steps because it is truncated when typecaste with String
t.SetElementValue(W.br, "<br />");
}
return (string)t;
}
).StringConcatenate();
continue;
}
// If element is not a paragraph, it must be a table.
foreach (var row in blockLevelContentElement.LogicalChildrenContent())
{
foreach (var cell in row.LogicalChildrenContent())
{
// Cells are a block-level content container, so can call this method recursively.
OutputBlockLevelContent(wordDoc, cell);
}
}
}
}
catch (Exception ex)
{
}
}