0

I am stuck on the last bit of the code to create a small search engine. So far I have been able to let users do some actions as select a folder where the files to search are stored, create an index, search for keyword and then export excerpt of the text around the keyword to a txt file. This is the layout enter image description here

And this is the code I have used:

from PyQt5 import QtCore, QtGui, QtWidgets, QtWidgets
from PyQt5.QtWidgets import QHeaderView
import os, os.path
import glob
import os
from PyPDF2 import PdfFileReader, PdfFileWriter
import pdftotext
from whoosh import index
from whoosh.fields import Schema, TEXT, ID, STORED
from whoosh.analysis import RegexTokenizer
from whoosh.analysis import StopFilter
from whoosh import scoring 
from whoosh.index import open_dir
from whoosh import qparser

class Ui_MainWindow(object):
    def setupUi(self, MainWindow):
        MainWindow.setObjectName("MainWindow")
        MainWindow.resize(1126, 879)
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        self.centralwidget.setObjectName("centralwidget")
        self.pushButton = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton.setGeometry(QtCore.QRect(40, 30, 100, 30))
        self.pushButton.setObjectName("pushButton")
        self.pushButton_2 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_2.setGeometry(QtCore.QRect(180, 30, 120, 30))
        self.pushButton_2.setObjectName("pushButton_2")
        self.pushButton_3 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_3.setGeometry(QtCore.QRect(620, 30, 80, 30))
        self.pushButton_3.setObjectName("pushButton_3")
        self.lineEdit = QtWidgets.QLineEdit(self.centralwidget)
        self.lineEdit.setGeometry(QtCore.QRect(380, 60, 191, 21))
        self.lineEdit.setObjectName("lineEdit")
        self.lineEdit_2 = QtWidgets.QLineEdit(self.centralwidget)
        self.lineEdit_2.setGeometry(QtCore.QRect(40, 90, 50, 21))
        self.lineEdit_2.setObjectName("lineEdit_2")
        self.label = QtWidgets.QLabel(self.centralwidget)
        self.label.setGeometry(QtCore.QRect(380, 30, 50, 35))
        font = QtGui.QFont()
        font.setPointSize(10)
        self.label.setFont(font)
        self.label.setObjectName("label")
        self.label2 = QtWidgets.QLabel(self.centralwidget)
        self.label2.setGeometry(QtCore.QRect(40, 70, 150, 16))
        font = QtGui.QFont()
        font.setPointSize(10)
        self.label2.setFont(font)
        self.label2.setObjectName("label")
        self.tableView = QtWidgets.QTableView(self.centralwidget)
        self.tableView.setGeometry(QtCore.QRect(0, 120, 1121, 721))
        self.tableView.setObjectName("tableView")
        self.horizontal_header = self.tableView.horizontalHeader()
        self.vertical_header = self.tableView.verticalHeader()
        self.horizontal_header.setSectionResizeMode(
                               QHeaderView.ResizeToContents
                               )
        self.vertical_header.setSectionResizeMode(
                             QHeaderView.ResizeToContents
                             )
        self.horizontal_header.setStretchLastSection(True)
        self.tableView.showGrid()
        self.tableView.wordWrap()
        MainWindow.setCentralWidget(self.centralwidget)
        self.menubar = QtWidgets.QMenuBar(MainWindow)
        self.menubar.setGeometry(QtCore.QRect(0, 0, 1126, 21))
        self.menubar.setObjectName("menubar")
        MainWindow.setMenuBar(self.menubar)
        self.statusbar = QtWidgets.QStatusBar(MainWindow)
        self.statusbar.setObjectName("statusbar")
        MainWindow.setStatusBar(self.statusbar)

        self.retranslateUi(MainWindow)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)

        self.pushButton.clicked.connect(self.open_directory)
        self.pushButton_2.clicked.connect(self.createindex)
        self.pushButton_3.clicked.connect(self.export)
        self.lineEdit.returnPressed.connect(self.search)


    def open_directory(self):
        self.dialog = QtWidgets.QFileDialog()
        self.folder_path = self.dialog.getExistingDirectory(None, "Select Folder")
        return self.folder_path

    def createindex(self):
        os.chdir(self.folder_path)
        self.mypdfiles = glob.glob("*.pdf")

#creation of folder for splitted files
        MYDIR = ("Splitted")
        CHECK_FOLDER = os.path.isdir(MYDIR)
        if not CHECK_FOLDER:
            os.makedirs(MYDIR)

# save split downloaded file and save into new folder
        for self.file in self.mypdfiles:
            self.fname = os.path.splitext(os.path.basename(self.file))[0]
            self.pdf = PdfFileReader(self.file)
            for self.page in range(self.pdf.getNumPages()):
                self.pdfwrite = PdfFileWriter()
                self.pdfwrite.addPage(self.pdf.getPage(self.page))
                self.outputfilename = '{}_page_{}.pdf'.format(self.fname, self.page+1)
                with open(os.path.join("./Splitted", self.outputfilename), 'wb') as out:
                     self.pdfwrite.write(out)

        print('Created: {}'.format(self.outputfilename))

#set working directory 
        os.chdir(self.folder_path + "/Splitted")

        self.spltittedfiles = glob.glob("*.pdf")
        MYDIR = ("Txt")
        CHECK_FOLDER = os.path.isdir(MYDIR)
        if not CHECK_FOLDER:
            os.makedirs(MYDIR)
# Load your PDF
        for self.file in self.spltittedfiles:
            with open(self.file, "rb") as f:
                self.pdf = pdftotext.PDF(f)

#creation of folder for splitted files


# Save all text to a txt file.
            with open(os.path.join("./TXT", os.path.splitext(os.path.basename(self.file))[0] + ".txt") , 'w', encoding = 'utf-8') as f:
                f.write("\n\n".join(self.pdf))
            f.close()

        os.chdir(self.folder_path)
        MYDIR = ("indexdir")
        CHECK_FOLDER = os.path.isdir(MYDIR)
        if not CHECK_FOLDER:
            os.makedirs(MYDIR)

        self.my_analyzer = RegexTokenizer()| StopFilter(lang = "en")
        self.schema = Schema(title=TEXT(stored=True),path=ID(stored=True), 
                        content=TEXT(analyzer = self.my_analyzer),
                        textdata=TEXT(stored=True))

# set an index writer to add document as per schema
        self.ix = index.create_in("indexdir",self.schema)
        self.writer = self.ix.writer()

        self.filepaths = [os.path.join("./Splitted/Txt",i) for i in os.listdir("./Splitted/Txt")]
        for path in self.filepaths:
            self.fp = open(path, "r", encoding='utf-8')
            self.text = self.fp.read()
            self.writer.add_document(title = os.path.splitext(os.path.basename(path))[0] , path=path, content=self.text,textdata=self.text)
            self.fp.close()
        self.writer.commit()

    def search(self):
        os.chdir(self.folder_path)
        self.ix = open_dir("indexdir")
        MYDIR = ("Results")
        CHECK_FOLDER = os.path.isdir(MYDIR)
        if not CHECK_FOLDER:
            os.makedirs(MYDIR) 
        self.text = self.lineEdit.text()
        self.query_str = self.text
        self.query = qparser.QueryParser("textdata", schema = self.ix.schema)
        self.q = self.query.parse(self.query_str)
        self.topN = self.lineEdit_2.text()
        if self.lineEdit_2.text() == "":
            self.topN = 1000           
        else:
            self.topN = int(self.lineEdit_2.text())
        with self.ix.searcher(weighting=scoring.Frequency) as searcher:
            self.results = searcher.search(self.q, terms=True, limit=self.topN)
            for self.i in range(self.topN):
                    print(self.results[self.i]['title'], self.results[self.i]['textdata']) 

    def export(self):
        with self.ix.searcher(weighting=scoring.Frequency) as searcher:
            self.results = searcher.search(self.q, terms=True, limit= None)
            for self.i in range(self.topN):
                with open(os.path.join(self.folder_path, self.text + ".txt"), 'a') as f:
                    print(self.results[self.i]['title'], self.results[self.i]['textdata'], file=f)         


    def retranslateUi(self, MainWindow):
        _translate = QtCore.QCoreApplication.translate
        MainWindow.setWindowTitle(_translate("MainWindow", "Search Text"))
        self.pushButton.setText(_translate("MainWindow", "Select Folder"))
        self.pushButton_2.setText(_translate("MainWindow", "Create Database"))
        self.pushButton_3.setText(_translate("MainWindow", "Export"))
        self.label.setText(_translate("MainWindow", "Search"))
        self.label2.setText(_translate("MainWindow", "Top Results"))

if __name__ == "__main__":
    import sys
    app = QtWidgets.QApplication(sys.argv)
    MainWindow = QtWidgets.QMainWindow()
    ui = Ui_MainWindow()
    ui.setupUi(MainWindow)
    MainWindow.show()
    sys.exit(app.exec_())

What I want to do now is to show the results also in the table. I have been trying to understand how to "send" the returned value of the search function to the table and how to show it. It should have two columns: File_Page and Content and as many rows as top results selected. Each row should then show the file with hit and the text of interest like this:

enter image description here

So far I have been able to just set to parameter of the table, but not much more. Is there any mean to let the table how the results without pushing any other button? As I have so far understood, is it possible to trigger the same function from different places of the code but I did not find the opposite, that is to activate two functions with just one signal

I have found lots of examples but none suits the objective. I am still learning how to use Python and I have never used C++.

  • Your code doesn't make a lot of sense (especially the return in the for cycle) and it's not very clear what you're asking. If you want to know how to fill the data of a table, you should study the documentation about [QTableWidget](https://doc.qt.io/qt-5/qtablewidget.html) (which will probably suit your needs), its base class [QTableView](https://doc.qt.io/qt-5/qtableview.html) and, generally, how the [model/view](https://doc.qt.io/qt-5/model-view-programming.html) pattern works with Qt. – musicamante Jun 13 '20 at 09:58
  • I have added a new function and called it from setup UI – Vito Piepoli Jul 05 '20 at 10:42

2 Answers2

1

Use other signal to trigger layoutChanged. i.e. QLineEdit's signal.

Mind me if I understood your question wrong, assuming you want to update search results as soon as you type something in Search field.

In that case, here's neat working example demonstrating immediate searching on 5 entries:

enter image description here

from PySide2.QtWidgets import QWidget, QApplication, QPushButton, QVBoxLayout, QLineEdit
from PySide2.QtCore import QAbstractTableModel, QModelIndex, Qt, QObject
from PySide2.QtWidgets import QTableView
import sys


class Table(QAbstractTableModel):
    def __init__(self, data):
        super().__init__()
        self._data = data

    def data(self, index: QModelIndex, role: int = ...):
        if role == Qt.DisplayRole:
            return self._data[index.row()][index.column()]

    def rowCount(self, parent: QModelIndex = ...) -> int:
        return len(self._data)

    def columnCount(self, parent: QModelIndex = ...) -> int:
        return len(self._data[0])

    def overWriteData(self, new_list):
        self._data = new_list


class MainWindow(QWidget):
    def __init__(self):
        super(MainWindow, self).__init__()
        self.table = QTableView()
        self.line = QLineEdit()
        self.layout = QVBoxLayout()
        self.layout.addWidget(self.table)
        self.layout.addWidget(self.line)

        self.data = [('stack overflow', 'some_fancy_data'),
                     ('stack overflow', 'some_fancy_data'),
                     ('stack underflow', 'some_fancy_data'),
                     ('Server Fault', 'some_fancy_data'),
                     ('Ask Ubuntu', 'some_fancy_data')]

        self.model = Table(self.data)
        self.table.setModel(self.model)

        self.setLayout(self.layout)
        self.line.textChanged.connect(self.update)


    def update(self):
        filtered = [i for i in self.data if self.line.text() in i[0]]
        if filtered:
            self.model.overWriteData(filtered)
            self.model.layoutChanged.emit()


if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = MainWindow()
    window.show()
    sys.exit(app.exec_())
jupiterbjy
  • 2,882
  • 1
  • 10
  • 28
  • your reply very good and could be useful if applied to a single file. But I am working with multiple files stores in an index table. I need to get the QTable working within the bit of code already working. – Vito Piepoli Jun 13 '20 at 19:48
  • Meaning that you want to load up new file onto index table? Or want to hot-swap index table on the fly? – jupiterbjy Jun 14 '20 at 04:04
  • it means that I want to use the QtableView to show the results of the search function. I have uploaded the entire code on the question space. – Vito Piepoli Jun 14 '20 at 06:47
0

I have shifted to Qtable Widget, created a new function (datatable) and called it from setup UI. This is the new setupUI:

class Ui_MainWindow(object):
    def setupUi(self, MainWindow):
        MainWindow.setObjectName("MainWindow")
        MainWindow.resize(1126, 879)
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        self.centralwidget.setObjectName("centralwidget")
        self.pushButton = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton.setGeometry(QtCore.QRect(40, 30, 100, 30))
        self.pushButton.setObjectName("pushButton")
        self.pushButton_2 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_2.setGeometry(QtCore.QRect(180, 30, 120, 30))
        self.pushButton_2.setObjectName("pushButton_2")
        self.pushButton_3 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_3.setGeometry(QtCore.QRect(620, 30, 80, 30))
        self.pushButton_3.setObjectName("pushButton_3")
        self.pushButton_4 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_4.setGeometry(QtCore.QRect(180, 80, 80, 30))
        self.pushButton_4.setObjectName("pushButton_4")
        self.lineEdit = QtWidgets.QLineEdit(self.centralwidget)
        self.lineEdit.setGeometry(QtCore.QRect(380, 60, 191, 21))
        self.lineEdit.setObjectName("lineEdit")
        self.lineEdit_2 = QtWidgets.QLineEdit(self.centralwidget)
        self.lineEdit_2.setGeometry(QtCore.QRect(40, 90, 50, 21))
        self.lineEdit_2.setObjectName("lineEdit_2")
        self.label = QtWidgets.QLabel(self.centralwidget)
        self.label.setGeometry(QtCore.QRect(380, 30, 50, 35))
        font = QtGui.QFont()
        font.setPointSize(10)
        self.label.setFont(font)
        self.label.setObjectName("label")
        self.label2 = QtWidgets.QLabel(self.centralwidget)
        self.label2.setGeometry(QtCore.QRect(40, 70, 150, 16))
        font = QtGui.QFont()
        font.setPointSize(10)
        self.label2.setFont(font)
        self.label2.setObjectName("label")
        self.tableWidget = QtWidgets.QTableWidget(self.centralwidget)
        self.tableWidget.setGeometry(QtCore.QRect(0, 120, 1121, 721))
        self.tableWidget.setObjectName("tableWidget")
        MainWindow.setCentralWidget(self.centralwidget)
        self.menubar = QtWidgets.QMenuBar(MainWindow)
        self.menubar.setGeometry(QtCore.QRect(0, 0, 1126, 21))
        self.menubar.setObjectName("menubar")
        MainWindow.setMenuBar(self.menubar)
        self.statusbar = QtWidgets.QStatusBar(MainWindow)
        self.statusbar.setObjectName("statusbar")
        MainWindow.setStatusBar(self.statusbar)

        self.retranslateUi(MainWindow)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)

        self.pushButton.clicked.connect(self.open_directory)
        self.pushButton_2.clicked.connect(self.createindex)
        self.pushButton_3.clicked.connect(self.export)
        self.pushButton_4.clicked.connect(self.OCR)
        self.lineEdit.returnPressed.connect(self.search)
        self.lineEdit.returnPressed.connect(self.datatable)

And this is the function that takes the data returned from another function within the class.

              
        numrows = len(self.data)
        numcols = len(self.data[0])
        self.tableWidget.setColumnCount(numcols)
        self.tableWidget.setRowCount(numrows)
        self.tableWidget.setHorizontalHeaderLabels((list(self.data[0].keys())))
        self.tableWidget.resizeColumnsToContents()
        self.tableWidget.horizontalHeader().setSectionResizeMode(QHeaderView.ResizeToContents)
        self.tableWidget.verticalHeader().setSectionResizeMode(QHeaderView.ResizeToContents)
        for row in range(numrows):
            for column in range(numcols):
                item = (list(self.data[row].values())[column])
                self.tableWidget.setItem(row, column, QTableWidgetItem(item))  ```