I am trying to parse hebrew rss like this one: http://rss.walla.co.il/?w=/3/0/12/@rss.e
I am using feedparser and request, and the problem is that the encoding is windows-1255 and not UTF-8
so I see the text like: ����� ������� , and not like a regular hebrew text.
I tried some converts (like iconv-lite) but I did not succeed.
This is my code:
function getAll(URL) {
var request = require('request');
request(URL, function (error, response, body) {
if (!error && response.statusCode == 200) {
var allXML = body.substring(body.indexOf('<title>') + ('<title>').length, body.indexOf('</title>'));
var text = iconv.decode(new Buffer(allXML), 'win1255');
console.log("text = ", text);
}
})
}
and this is what it print: text = ן¿½ן¿½ן¿½ן¿½ן¿½! ן¿½ן¿½ן¿½ן¿½ן¿½ - ן¿½ן¿½ן¿½ן¿½ן¿½