It seems i've run a problem with the encoding itself in where i need to pass Bing translation junks..
def _unicode_urlencode(params):
if isinstance(params, dict):
params = params.items()
return urllib.urlencode([(k, isinstance(v, unicode) and v.encode('utf-8') or v) for k, v in params])
def _run_query(args):
data = _unicode_urlencode(args)
sock = urllib.urlopen(api_url + '?' + data)
result = sock.read()
if result.startswith(codecs.BOM_UTF8):
result = result.lstrip(codecs.BOM_UTF8).decode('utf-8')
elif result.startswith(codecs.BOM_UTF16_LE):
result = result.lstrip(codecs.BOM_UTF16_LE).decode('utf-16-le')
elif result.startswith(codecs.BOM_UTF16_BE):
result = result.lstrip(codecs.BOM_UTF16_BE).decode('utf-16-be')
return json.loads(result)
def set_app_id(new_app_id):
global app_id
app_id = new_app_id
def translate(text, source, target, html=False):
"""
action=opensearch
"""
if not app_id:
raise ValueError("AppId needs to be set by set_app_id")
query_args = {
'appId': app_id,
'text': text,
'from': source,
'to': target,
'contentType': 'text/plain' if not html else 'text/html',
'category': 'general'
}
return _run_query(query_args)
...
text = translate(sys.argv[2], 'en', 'tr')
HOST = '127.0.0.1'
PORT = 894
s = socket.socket()
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
s.connect((HOST, PORT))
s.send("Bing translation: " + text.encode('utf8') + "\r");
s.close()
As you can see, if the translated text contains some turkish characters, the script fails to send the text to the socket..
Do you have any idea on how to get rid of this?
Regards.