The tricky part is to pass encoding as null to get a Buffer instead of a string.
encoding
- encoding to be used on setEncoding of response data.
If null
, the body is returned as a Buffer
.
—request
var request = require('request');
var legacy = require('legacy-encoding');
var requestSettings = {
method: 'GET',
url: 'http://www.chinanews.com/rss/scroll-news.xml',
encoding: null,
};
request(requestSettings, function(error, response, body) {
var text = legacy.decode(body, 'gb2312');
console.log(text);
});
Again, in the context of the follow-up question, "
Is there a way I could detect encoding?"
By "detect" I hope you mean, find the declaration. (…as opposed to guessing. If you have to guess then you have a failed communication.) The HTTP response header Content-Type is the primary way to communicate the encoding (if applicable to the MIME type). Some MIME types allow the encoding to be declared within the content, as servers quite rightly defer to that.
In the case of your RSS response. The server sends Content-Type:text/xml
. which is without an encoding override. And the content's XML declaration is <?xml version="1.0" encoding="gb2312"?>
The XML specification has procedures for finding such a declaration. It basically amounts to reading with different encodings until the XML declaration becomes intelligible, and then re-read with the declared encoding.
var request = require('request');
var legacy = require('legacy-encoding');
var convert = require('xml-js');
// specials listed here: https://www.w3.org/Protocols/rfc1341/4_Content-Type.html
var charsetFromContentTypeRegex = (/charset=([^()<>@,;:\"/[\]?.=\s]*)/i).compile();
var requestSettings = {
method: 'GET',
url: 'http://www.chinanews.com/rss/scroll-news.xml',
encoding: null,
};
request(requestSettings, function(error, response, body) {
var contentType = charsetFromContentTypeRegex.exec(response.headers['content-type'])
var encodingFromHeader = contentType.length > 1 ? contentType[1] : null;
var doc = convert.xml2js(body);
var encoding = doc.declaration.attributes.encoding;
doc = convert.xml2js(
legacy.decode(body, encodingFromHeader ? encodingFromHeader : encoding));
// xpath /rss/channel/title
console.log(doc.elements[1].elements[0].elements[0].elements[0].text);
});