It is a code made for identifying CEFR level of typing words. Basically frame see as below picture, I used QRegExp and QSyntaxHighlighter. The mainly function logic is through QSyntaxHighlighter to highlight specific words in the QPlaintextedit window, that specific words is a "dataframe" file storage around 46k vocabularies.
The problem is the more words I typed, the more slower this code will perform. like a few seconds to perform after you typed over 20 words.
I have tried using QThread(maybe used wrongly),change faster read method(not .csv instead .jay),change QReExp to QRegularExpression code, but none of them is worked as expected.
Now have no idea to keep through, can you help me to find out what is wrong with the code?
------------
# Main code
------------
from PySide2.QtWidgets import QApplication, QMessageBox, QDialogButtonBox
from PySide2.QtUiTools import QUiLoader #Qt designer ui loader
from PySide2.QtGui import QIcon #load icon library
from PySide2 import QtWidgets, QtCore
from PySide2.QtCore import QThread,Signal
import enchant
import English_syntax
import PySide2
class Stats:
def __init__(self):
#self.ui = QUiLoader().load('ui/main_page.ui')
self.ui =Uidocument
self.ui.comboBox.addItems(['Standard English', 'British English', 'American English'])
self.ui.plainTextEdit.textChanged.connect(self.cal_num)
def Highlight(self):
highlight = English_syntax.PythonHighlighter(Uidocument.plainTextEdit.document())
def cal_num(self):
text = self.ui.plainTextEdit.toPlainText() # get text
num = len(text)
words = len(text.split(' '))-1 #empty is equal to 1, need to -1
self.ui.textCount.setText(str(words)+" token(s) " +str(num)+ " bytes")
def handleDisplay(self,data):
self.ui.plainTextEdit.setPlainText(data)
class Worker(QThread):
#sig = Signal(str)
def __init__(self, parent=None):
super(Worker, self).__init__(parent)
self.run()
def run(self):
#while True:
highlight = English_syntax.PythonHighlighter(Uidocument.plainTextEdit.document())
print('Connected successfully')
if __name__ == "__main__":
app = 0
app = QApplication([])
app.setWindowIcon((QIcon('ui/images.png'))) #show icon
Uidocument = QUiLoader().load('ui/main_page.ui')
stats = Stats()
#highlight = English_syntax.PythonHighlighter(Uidocument.plainTextEdit.document())
thread = Worker()
thread.start()
Uidocument.show()
# stats.ui.show()
app.exec_()
----------------
# English_syntax
----------------
import pandas as pd
import numpy as np
from PySide2 import QtGui, QtWidgets
from PySide2.QtCore import QRegExp, Qt
import time
def format(color, style=''):
"""Return a QTextCharFormat with the given attributes.
"""
_color = QtGui.QColor()
_color.setNamedColor(color)
_format = QtGui.QTextCharFormat()
_format.setForeground(_color)
if 'bold' in style:
_format.setFontWeight(QtGui.QFont.Bold)
if 'italic' in style:
_format.setFontItalic(True)
return _format
# Syntax styles that can be shared by all languages
STYLES = {
'A1': format('#79C740'),
'A2': format('#CDE234'),
'B1': format('#F0B044'),
'B2': format('#E68237'),
'C1': format('#D95952'),
'C2': format('#D95999'),
'Unrecorded': format('#9a9a9a'), #gray
'string': format('magenta'),
}
class PythonHighlighter (QtGui.QSyntaxHighlighter):
"""Syntax highlighter for the CEFR words.
"""
start = time.perf_counter()
#data storage type: feather
pathlist = "data.feather"
pd_datas = pd.read_feather(pathlist)
A1s = pd_datas['A1'].tolist()
A2s = pd_datas['A2'].tolist()
B1s = pd_datas['B1'].tolist()
B2s = pd_datas['B2'].tolist()
C1s = pd_datas['C1'].tolist()
C2s = pd_datas['C2'].tolist()
dur = time.perf_counter()-start
print("data reading used: {:.6f}s".format(dur))
def __init__(self, parent: QtGui.QTextDocument) -> None:
super().__init__(parent)
start_1 = time.perf_counter()
rules = []
# Combine rules
rules += [(r'\b%s\b' % q, 0, STYLES['A1'])
for q in PythonHighlighter.A1s]
rules += [(r'\b%s\b' % w, 0, STYLES['A2'])
for w in PythonHighlighter.A2s]
rules += [(r'\b%s\b' % e, 0, STYLES['B1'])
for e in PythonHighlighter.B1s]
rules += [(r'\b%s\b' % r, 0, STYLES['B2'])
for r in PythonHighlighter.B2s]
rules += [(r'\b%s\b' % t, 0, STYLES['C1'])
for t in PythonHighlighter.C1s]
rules += [(r'\b%s\b' % y, 0, STYLES['C2'])
for y in PythonHighlighter.C2s]
self.rules = [(QRegExp(pat,cs=Qt.CaseInsensitive), index, fmt)
for (pat, index, fmt) in rules ]
end = time.perf_counter()-start_1
print("Analysing used: {:.6f}s".format(end))
def highlightBlock(self, text):
"""Apply syntax highlighting to the given block of text.
"""
# Do syntax formatting
for expression, nth, format in self.rules:
index = expression.indexIn(text, 0)
#print (index)
while index >= 0:
s = time.perf_counter()
# We actually want the index of the nth match
index = expression.pos(nth)
length = len(expression.cap(nth))
self.setFormat(index, length, format) #render No."index",word lengh is "length",to "format" type
index = expression.indexIn(text, index + length)
d = time.perf_counter() - s
print("Vocabulary match used:{:.6f}s ".format(d))
#print(index,length,format)
self.setCurrentBlockState(0)