0

The code below is supposed to run through a list of Word documents and based on a file containing keywords find the paragraphs containing any of those words, then subsequently copy the corresponding paragraphs over to a destination Word file. The code works but there is something strange with the execution depending on the number of files it has to search through. If the number of files is small (say 5 files), it works ok. If however the number of files is increased, the program hangs up at the initiation of the first Word object (wordAppx).

Now, if I hardcode the path to the keyword file, the program runs ok. It somehow doesn't want to work with the FileDialog methods. I find this weird since the FileDialog method has nothing to do with the number of files, it's purely there to select the parent folder path and the keyword file path.

Perhaps someone would be so kind and point me in the right directon, please? Kind regards, Krystof

Sub Main2()

Dim i As Integer, j As Integer
Dim r1 As Range
Dim s1 As Worksheet, s2 As Worksheet
Dim wb1 As Workbook
Dim str1 As String, str2 As String, str3 As String
Dim dir1 As String

TextParss

End Sub

Sub SubFold(fold1 As Object)

Dim subfold1 As Object
Dim s1 As Worksheet
Dim f As Object, f1 As Object
Dim fstr As String

Set s1 = Sheets("Arkusz1")

For Each subfold1 In fold1.subfolders
    SubFold subfold1
    s1.Cells(lrow(s1, 1) + 1, 1) = subfold1.Path

Next

For Each file1 In fold1.Files
    s1.Cells(lrow(s1, 5) + 1, 5) = file1.Name
    s1.Cells(lrow(s1, 6) + 1, 6) = file1.Path
    s1.Hyperlinks.Add Anchor:=s1.Cells(lrow(s1, 7) + 1, 7), Address:=file1.Path
Next

s1.Columns(1).AutoFit
s1.Columns(5).AutoFit
s1.Columns(6).AutoFit
s1.Columns(7).AutoFit

End Sub

Sub TextParss()

Dim docxName As String, excelTargetF As String, filename1 As String, str2 As String
Dim textt As String, textt2 As String
Dim wordAppx As Word.Application, wordAppx2 As Word.Application, wordDocx As Document, worDocx2 As Document
Dim wordApppx3 As Word.Application, wordDocx3 As Document
Dim r As Word.Range, r1 As Word.Range, r2 As Word.Range, r3 As Word.Range
Dim pr As Word.Paragraph, pr2 As Word.Paragraph
Dim lin As Word.Words
Dim i As Integer, j As Integer, k As Integer, l As Integer, m As Integer, n As Integer
Dim strC As String, strB As String
Dim wb As Workbook
Dim ws As Worksheet
Dim isChecked() As Boolean

str2 = FolderLocat("Select the Main Folder")
excelTargetF = FPicker("Select a Target Excel File")
Set wb = Workbooks.Open(excelTargetF)
Set ws = wb.Sheets(1)
ws.UsedRange.Clear
ws.Columns.ColumnWidth = ws.StandardWidth

FolderStruct (str2)
ExtensionSelect ws

Set wordAppx = CreateObject("Word.Application")
wordAppx.Visible = False
docxName = FPicker("Select a KeyWord File")
'Set wordDocx = wordAppx.Documents.Open("C:\Input\KeyWords.docx")
Set wordDocx = wordAppx.Documents.Open(docxName)
Set wordDocx = wordAppx.ActiveDocument                      'keyword file r
Set r = wordDocx.Content

Set wordAppx2 = CreateObject("Word.Application")
wordAppx2.Visible = True
Set wordDocx2 = wordAppx2.Documents.Add                     'parsing results file r2
Set r2 = wordDocx2.Content

For k = 2 To lrow(ws, 3)
    Set r3 = Nothing
    Set wordAppx3 = CreateObject("Word.application")
    Set wordDocx3 = wordAppx3.Documents.Open(ws.Cells(k, 3).Value)    'parsed file object r3
    Set r3 = wordDocx3.Content
    
    ReDim isChecked(1 To r3.Paragraphs.Count) As Boolean
    
    For m = 1 To UBound(isChecked)
        isChecked(m) = False
    Next
    
    For l = 1 To r.Document.Paragraphs.Count
        strC = Trim(r.Paragraphs(l).Range.Words(1))
        For i = 1 To r3.Document.Paragraphs.Count
            Set pr = r3.Document.Paragraphs(i)
            strB = pr.Range.Text
            For Each wordx In r3.Document.Paragraphs(i).Range.Words
              strB = Trim(CStr(wordx))
                If (strC = strB) Then
                    If isChecked(i) = False Then
                        r2.InsertAfter (wordDocx3.Name)
                        r2.Paragraphs.Add
                        parCount = r2.Paragraphs.Count
                        r2.Hyperlinks.Add Anchor:=r2.Paragraphs(parCount).Range, Address:=wordDocx3.Path
                        r2.Paragraphs.Add
                        parCount = r2.Paragraphs.Count
                        r3.Paragraphs(i).Range.Copy
                        r2.Paragraphs(parCount).Range.Paste
                        r2.Paragraphs.Add
                        r2.Paragraphs.Add
                        j = j + 1
                        ws.Cells(j, 1) = r3.Document.Paragraphs(i)
                        ws.Cells(j, 2) = strC
                        isChecked(i) = True
                        Exit For
                    End If
                End If
            Next
        Next
    Next
    wordAppx3.Documents.Close
    wordAppx3.Quit
Next

wordDocx2.Range.Font.Name = "Calibri"
wordDocx2.SaveAs Filename:=wordDocx.Path & "\ParsingResults.docx"

wordAppx.Documents.Close 'savechanges:=wdSaveChanges
wordAppx.Quit

'wordAppx2.Documents.Close savechanges:=wdPromptToSaveChanges
wordAppx2.Documents.Close
wordAppx2.Quit

wb.Save
'wb.SaveAs Filename:=excelTargetF
'wb.Close
'Workbooks.Close

End Sub

Function FPicker(str As String)

Dim fd As Object
'
'Set fd = new Application.FileDialog(msoFileDialogOpen)
'fd.Title = str
'fd.Show
'FPicker = fd.SelectedItems(1)
'Set fd = Nothing

With Application.FileDialog(msoFileDialogOpen)
    .Title = str
    .Show
    FPicker = .SelectedItems(1)
End With

End Function

Function FolderLocat(str As String) As String

'Dim fd As Object
'
'Set fd = Application.FileDialog(msoFileDialogFolderPicker)
'fd.Title = str
'fd.Show
'FolderLocat = fd.SelectedItems(1)
'Set fd = Nothing

With Application.FileDialog(msoFileDialogFolderPicker)
    .Title = str
    .Show
    FolderLocat = .SelectedItems(1)
End With

End Function

Sub FolderStruct(a As String)

Dim fileobj As Object
Dim fold As Object

Set fileobj = CreateObject("Scripting.FileSystemObject")
Set fold = fileobj.getfolder(a)

SubFold fold

End Sub

Function lrow(s As Worksheet, r As Integer) As Integer

lrow = s.Cells(Rows.Count, r).End(xlUp).Row

End Function

Sub ExtensionSelect(s As Worksheet)

Dim f As Object, f1 As Object
Dim wb1 As Workbook
Dim lrowx As Integer, i As Integer, j As Integer

Set wb1 = s.Application.ActiveWorkbook
lrowx = lrow(s, 5)

j = 2

For i = 2 To lrowx
    If (Mid(s.Cells(i, 5), InStr(s.Cells(i, 5), ".")) = ".xl" Or _
    Mid(s.Cells(i, 5), InStr(s.Cells(i, 5), ".")) = ".docx") And Left(s.Cells(i, 5), 1) <> "~" Then
        Range(s.Cells(i, 5), s.Cells(i, 5)).Copy s.Cells(j, 2)
        Range(s.Cells(i, 6), s.Cells(i, 6)).Copy s.Cells(j, 3)
        Range(s.Cells(i, 7), s.Cells(i, 7)).Copy s.Cells(j, 4)
        j = j + 1
    End If
Next

s.Columns(2).AutoFit
s.Columns(3).AutoFit
s.Columns(4).AutoFit
End Sub
chris neilsen
  • 52,446
  • 10
  • 84
  • 123
Chris
  • 11
  • 2

1 Answers1

0

I've just identified the problem. This line was reduntant:

Set wordDocx = wordAppx.ActiveDocument 

I just implemented Timothy's suggestion and the code seems to run much faster. I need to run it across a larger set of files but thanks for the idea, indeed. Much appreciated.

Chris
  • 11
  • 2
  • It will be horribly slow, at least in part, because of the multiple instances of Word that you are creating. You only need one. – Timothy Rylatt Sep 21 '21 at 21:14
  • Yes, it is intolerably slow. Approximately 250 files was processed by this code and it took 10 days to do that. Each file had roughly 1500 paragraphs and there were 20 keywords. – Chris Oct 05 '21 at 19:53
  • What I learnt though the process was that there is a glitch in the code which causes execution hangout when callin up the first Word object. But, when I introduced a one second delay there this problem was cured. Another one, using ranges to copy text across the files sometimes throws an error as if the data transfer wasn't quick enough. – Chris Oct 05 '21 at 20:05
  • You can also speed things up by not using the clipboard. Instead you can use `range1.FormattedText = range2.FormattedText`, see https://learn.microsoft.com/en-us/office/vba/api/word.range.formattedtext – Timothy Rylatt Oct 05 '21 at 21:34