7

I have an input HTML file with header and footer. It needs to be converted to RTF. The header/footer of HTML should be repeated in the resultant RTF file.

Is there any plugin to convert HTML to RTF by only using JavaScript??

Gireesh SB
  • 106
  • 1
  • 1
  • 8

4 Answers4

9

You can use this converter

However it does not address bullet points (ul, li elements)

function convertHtmlToRtf(html) {
  if (!(typeof html === "string" && html)) {
      return null;
  }

  var tmpRichText, hasHyperlinks;
  var richText = html;

  // Singleton tags
  richText = richText.replace(/<(?:hr)(?:\s+[^>]*)?\s*[\/]?>/ig, "{\\pard \\brdrb \\brdrs \\brdrw10 \\brsp20 \\par}\n{\\pard\\par}\n");
  richText = richText.replace(/<(?:br)(?:\s+[^>]*)?\s*[\/]?>/ig, "{\\pard\\par}\n");

  // Empty tags
  richText = richText.replace(/<(?:p|div|section|article)(?:\s+[^>]*)?\s*[\/]>/ig, "{\\pard\\par}\n");
  richText = richText.replace(/<(?:[^>]+)\/>/g, "");

  // Hyperlinks
  richText = richText.replace(
      /<a(?:\s+[^>]*)?(?:\s+href=(["'])(?:javascript:void\(0?\);?|#|return false;?|void\(0?\);?|)\1)(?:\s+[^>]*)?>/ig,
      "{{{\n");
  tmpRichText = richText;
  richText = richText.replace(
      /<a(?:\s+[^>]*)?(?:\s+href=(["'])(.+)\1)(?:\s+[^>]*)?>/ig,
      "{\\field{\\*\\fldinst{HYPERLINK\n \"$2\"\n}}{\\fldrslt{\\ul\\cf1\n");
  hasHyperlinks = richText !== tmpRichText;
  richText = richText.replace(/<a(?:\s+[^>]*)?>/ig, "{{{\n");
  richText = richText.replace(/<\/a(?:\s+[^>]*)?>/ig, "\n}}}");

  // Start tags
  richText = richText.replace(/<(?:b|strong)(?:\s+[^>]*)?>/ig, "{\\b\n");
  richText = richText.replace(/<(?:i|em)(?:\s+[^>]*)?>/ig, "{\\i\n");
  richText = richText.replace(/<(?:u|ins)(?:\s+[^>]*)?>/ig, "{\\ul\n");
  richText = richText.replace(/<(?:strike|del)(?:\s+[^>]*)?>/ig, "{\\strike\n");
  richText = richText.replace(/<sup(?:\s+[^>]*)?>/ig, "{\\super\n");
  richText = richText.replace(/<sub(?:\s+[^>]*)?>/ig, "{\\sub\n");
  richText = richText.replace(/<(?:p|div|section|article)(?:\s+[^>]*)?>/ig, "{\\pard\n");

  // End tags
  richText = richText.replace(/<\/(?:p|div|section|article)(?:\s+[^>]*)?>/ig, "\n\\par}\n");
  richText = richText.replace(/<\/(?:b|strong|i|em|u|ins|strike|del|sup|sub)(?:\s+[^>]*)?>/ig, "\n}");

  // Strip any other remaining HTML tags [but leave their contents]
  richText = richText.replace(/<(?:[^>]+)>/g, "");

  // Prefix and suffix the rich text with the necessary syntax
  richText =
      "{\\rtf1\\ansi\n" + (hasHyperlinks ? "{\\colortbl\n;\n\\red0\\green0\\blue255;\n}\n" : "") + richText +  "\n}";

  return richText;
}
Samra
  • 1,815
  • 4
  • 35
  • 71
5

After a bit of search I found a working solution:

https://www.npmjs.com/package/html-to-rtf

With html-to-rtf the conversion is easy (here's a piece of code based on browserify):

var htmlToRtf = require('html-to-rtf');
var htmlText = "<div>...</div>"; //or whatever html you want to transform
var htmlAsRtf = htmlToRtf.convertHtmlToRtf(htmlText); // html transformed to rtf

This solution worked for me. Without browserify you'll have to find implied js inside downloaded modules with npm and link them to your html page.

Cindy Meister
  • 25,071
  • 21
  • 34
  • 43
Francisco Valle
  • 613
  • 10
  • 10
0

I applied @Samra solution and it was working good. But then I spotted a bug in the output: some text was cut off. After a lot of investigation, it seemed to be about HTML comments (<!-- xxxx -->) weren't being handled properly. My solution was to add this richText transformation as the first one:

// Delete HTML comments
richText = richText.replace(/<!--[\s\S]*?-->/ig,"");
Dertalai
  • 198
  • 1
  • 9
0

According to Francisco Valles answer i created a bundle to easily include it your web-project:

https://github.com/geraphl/javascript-html-to-rtf-browser

<script src="~/js/html-to-rtf-browser.min.js"></script>

Then you can convert html to microsofts rtf format by adding

var htmlToRtfLocal = new window.htmlToRtf();
var rtfContent = htmlToRtfLocal.convertHtmlToRtf(htmlContent);

to you javascript.

geraphl
  • 305
  • 4
  • 10