2

I find next code, by using it i can get text comments from groups. With vk api no problems, all is good. But there something wrong with printing in excel:

import vk_requests
import time
import xlsxwriter
import datetime
import codecs

api = vk_requests.create_api(app_id='', 
                             login='', 
                             password='', 
                             phone_number='') 


Group_id = 1
Topic_id = 2

#первое цифровое значение по ссылке - группа, второе - обсуждение

a = api.board.getComments(group_id=Group_id, topic_id=Topic_id)
CommentsCount = a['count']

print('N = ',CommentsCount)

num = 10 # input(':\n')
count = num


def GetTopicInfo(Offset):
    r = api.board.getComments(group_id=Group_id,
                              topic_id=Topic_id,
                              count=num,
                              offset=Offset)
    return r

# Create a workbook and add a worksheet.

'''
workbook = xlsxwriter.Workbook('data.xlsx')

worksheet = workbook.add_worksheet()

worksheet.write('A1', 'Hello world')

workbook.close()

'''

workbook  = xlsxwriter.Workbook('unicode_data.xlsx',{'strings_to_urls': False})

worksheet = workbook.add_worksheet()

bold = workbook.add_format({'bold': True})


#Создание колонок
worksheet.write(0, 0, u'Date', bold) 
worksheet.write(0, 1, u'Author', bold)
worksheet.write(0, 2, u'id', bold)
worksheet.write(0, 3, u'Text', bold)

Offset = 0
row = 1
col = 0

while Offset < CommentsCount:
    r = GetTopicInfo(Offset)
    i = 0
    print(Offset)
    try:
        while i < num:
            Topic_info = r['items'][i]
            Topic_info_from = r['items'][i]['from_id']
            Topic_info_id = r['items'][i]['id']
            Topic_info_text = r['items'][i]['text']
            Topic_info_date =  datetime.datetime.fromtimestamp(
                int(r['items'][i]['date'])
                ).strftime('%Y-%m-%d %H:%M:%S')

            worksheet.write(row, col,     Topic_info_date)
            worksheet.write(row, col + 1, Topic_info_from)
            worksheet.write(row, col + 2, Topic_info_id)
            worksheet.write(row, col + 3, Topic_info_text)
            row += 1
            print(i, Topic_info)
            i = i + 1
    except IndexError:
        print(u'Конец')
    Offset = Offset + num
    time.sleep(.35)

workbook.close()

And i have that error's:

Traceback (most recent call last):
  File "vk.py", line 90, in <module>
    print(i, Topic_info)

  File "C:\Program Files\Anaconda3\lib\encodings\cp866.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_map)[0]
UnicodeEncodeError: 'charmap' codec can't encode character '\u0306' in position
497: character maps to <undefined>

Exception ignored in: <bound method Workbook.__del__ of <xlsxwriter.workbook.Wor
kbook object at 0x00000064530D6518>>

Traceback (most recent call last):

  File "C:\Program Files\Anaconda3\lib\site-packages\xlsxwriter\workbook.py", li
ne 148, in __del__

Exception: Exception caught in workbook destructor. Explicit close() may be requ
ired for workbook.

Code in comments works well, but in while operator does not. What i must to rewrite for parsing?

  • It looks like a character encoding issue. The strings that you write to the Excel/XlsxWriter file must be UTF-8 encoded. You probably need to decode the strings that you are reading from the `getComments()` api before writing them with XlsxWriter. That is why the simpler, commented out, version works. – jmcnamara Nov 24 '16 at 08:06
  • a = a.decode('utf8') dont work. What correct function to decode is? – Ivan Blagopoluchnyi Nov 24 '16 at 11:38
  • board.getComments returns a list of messages in vk topic. – Ivan Blagopoluchnyi Nov 24 '16 at 12:09
  • At a guess I would say `decode('cp866')` if the input text is Cyrillic. It depends on the encoding of the input text. Maybe try `latin1` as well. – jmcnamara Nov 25 '16 at 09:59
  • What i get: Traceback (most recent call last): File "vk.py", line 17, in a = a.decode('cp866') AttributeError: 'dict' object has no attribute 'decode', with latin1 pointer in have similar error. It also works if i comment/delete print function. – Ivan Blagopoluchnyi Nov 30 '16 at 23:20

0 Answers0