0

I'm trying to add HTML content to DOCX file using OpenXML altchunk approach using C#. The below sample code works fine and appends the HTML content to the end of the document. My requirement is to add HTML content at a specific place in the document, like inside a table cell or inside a paragraph, or search and replace a specific string with an HTML string or placeholders marked using content controls. Can you please point me to some sample example or share few suggestions. Please let me know if you need more info.

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Xml.Linq;
using DocumentFormat.OpenXml.Packaging;
using OpenXmlPowerTools;
using DocumentFormat.OpenXml.Wordprocessing;
using DocumentFormat.OpenXml;
using System.Xml;

namespace Docg2
{
    class Program
    {
        static void Main(string[] args)
        {
            testaltchunk();
        }

        public static void testaltchunk()
        {
            XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
            XNamespace r = "http://schemas.openxmlformats.org/officeDocument/2006/relationships";
            using (WordprocessingDocument myDoc = WordprocessingDocument.Open("../../Test3.docx", true))
            {
                string html =
                @"<html>
                    <head/>
                    <body>
                        <h1>Html Heading</h1>
                        <p>This is an html document in a string literal.</p>
                    </body>
                </html>";

                string altChunkId = "AltChunkId1";
                MainDocumentPart mainPart = myDoc.MainDocumentPart;
                AlternativeFormatImportPart chunk = mainPart.AddAlternativeFormatImportPart("application/xhtml+xml", altChunkId);

                using (Stream chunkStream = chunk.GetStream(FileMode.Create, FileAccess.Write))
                using (StreamWriter stringStream = new StreamWriter(chunkStream))
                    stringStream.Write(html);

                XElement altChunk = new XElement(w + "altChunk", new XAttribute(r + "id", altChunkId));
                XDocument mainDocumentXDoc = GetXDocument(myDoc);
                mainDocumentXDoc.Root
                    .Element(w + "body")
                    .Elements(w + "p")
                    .Last()
                    .AddAfterSelf(altChunk);

                SaveXDocument(myDoc, mainDocumentXDoc);
            }
        }

        private static void SaveXDocument(WordprocessingDocument myDoc, XDocument mainDocumentXDoc)
        {
            // Serialize the XDocument back into the part
            using (var str = myDoc.MainDocumentPart.GetStream(FileMode.Create, FileAccess.Write))
            using (var xw = XmlWriter.Create(str))
                mainDocumentXDoc.Save(xw);
        }

        private static XDocument GetXDocument(WordprocessingDocument myDoc)
        {
            // Load the main document part into an XDocument
            XDocument mainDocumentXDoc;
            using (var str = myDoc.MainDocumentPart.GetStream())
            using (var xr = XmlReader.Create(str))
                mainDocumentXDoc = XDocument.Load(xr);

            return mainDocumentXDoc;
        }
    }
}
caesay
  • 16,932
  • 15
  • 95
  • 160
dontnetnewbie
  • 159
  • 4
  • 15
  • If you insist on manipulating the XML directly like you do here (instead of using the OpenXML api) then treat it just like a regular XML document and insert your element as you do now wherever you want. If you choose to use the OpenXML api, then you will be able to [much more advanced searching](https://stackoverflow.com/questions/15520585/find-a-specific-table-after-a-bookmark-in-open-xml), and insert an [AltChunk](https://msdn.microsoft.com/en-gb/library/documentformat.openxml.wordprocessing.altchunk(v=office.14).aspx) instead of inserting an `XElement`. – caesay Dec 28 '17 at 18:50

1 Answers1

3

To expand on my comment a little bit: You really shouldn't be manipulating the document XML yourself. You lose all the benefits of using OpenXML in the first place. Thus, your code could be re-written like this:

static void Main(string[] args)
{
    using (WordprocessingDocument myDoc = WordprocessingDocument.Open("../../Test3.docx", true))
    {
        string html =
        @"<html>
            <head/>
            <body>
                <h1>Html Heading</h1>
                <p>This is an html document in a string literal.</p>
            </body>
        </html>";

        string altChunkId = "AltChunkId1";
        MainDocumentPart mainPart = myDoc.MainDocumentPart;
        AlternativeFormatImportPart chunk = mainPart.AddAlternativeFormatImportPart("application/xhtml+xml", altChunkId);

        using (Stream chunkStream = chunk.GetStream(FileMode.Create, FileAccess.Write))
        using (StreamWriter stringStream = new StreamWriter(chunkStream))
            stringStream.Write(html);

        AltChunk altChunk = new AltChunk();
        altChunk.Id = altChunkId;

        // this inserts altChunk after the last Paragraph
        mainPart.Document.Body
            .InsertAfter(altChunk, mainPart.Document.Body.Elements<Paragraph>().Last());

        mainPart.Document.Save();
    }
}

Now, it becomes clear that you can insert your AltChunk after, or before, or inside any element in the document, as long as you can find the element. That part will depend on what you're searching for.

If you're searching for a specific table, then search for a DocumentFormat.OpenXml.Wordprocessing.Table etc. Here is one example of how to search for a specific table in a document: Find a specific Table (after a bookmark) in open xml

Here's an example of replacing a content control https://msdn.microsoft.com/en-us/library/cc197932(v=office.12).aspx

caesay
  • 16,932
  • 15
  • 95
  • 160