I have built a network keyword searcher to assist with my job. I am opening Word documents and inspecting their content for key words. The process took far too long with a classic "for each" file run search. I developed the below code to execute a runspace within a runspace pool, but when this executes it seems to still run sequentially. I am assuming that I have messed up the construction of the runspace pool. I would appreciate any feedback on how to improve this function.
I have already reviewed the following documents but still can't quite seem to find my issue. https://devblogs.microsoft.com/scripting/beginning-use-of-powershell-runspaces-part-1/ https://adamtheautomator.com/powershell-multithreading/
Function getWordMatches ($wordFiles, $TermArray)
{
#Synchronized Hashtable is need for variable control during runspace execution
$dataSync = [hashtable]::Synchronized(@{})
$dataSync.returns = New-Object -TypeName "System.Collections.ArrayList"
# Create an empty arraylist that we'll use later
$Jobs = New-Object -TypeName "System.Collections.ArrayList"
$maxThreads = Get-WmiObject –class Win32_processor | Select-Object -ExpandProperty NumberOfLogicalProcessors
# Create a Runspace Pool with a minimum and maximum number of run spaces. (http://msdn.microsoft.com/en-us/library/windows/desktop/dd324626(v=vs.85).aspx)
$RunspacePool = [RunspaceFactory]::CreateRunspacePool(1,$maxThreads)
# Open the RunspacePool so we can use it
$RunspacePool.Open()
# Define a script block to actually do the work
$WordScriptBlock = {
Param($file, $TermArray, $dataSync)
#Establish Word variables for searching
$matchCase = $false
$matchWholeWord = $true
$matchWildCards = $false
$matchSoundsLike = $false
$matchAllWordForms = $false
$forward = $true
$wrap = 1
#The below lines open new instances of the MS Word and Excel applications that will be used.
$check = Test-Path -Path $file -PathType Leaf
if (-Not $check){return $false}
$Word = New-Object -ComObject Word.Application
$Word.visible = $False
#the visible value is set to false so as not to interfere with the user during execution.
$hash = Get-FileHash $file -Algorithm SHA256 | Select-Object Hash
try{
#I needed to add a default password 'ttt' due some password protected files that stopped the script from running
$document = $Word.documents.open($file.FullName, 'ReadOnly:=$true', 'AddToRecentFiles:=$false', "PasswordDocument:='ttt'", 'Visible:=$false')
$range = $document.content
}catch{
$Word.Quit()
}
Foreach ($term In $TermArray)
{
$wordFound = $range.find.execute($term,$matchCase,$matchWholeWord,$matchWildCards,
$matchSoundsLike,$matchAllWordForms,$forward,$wrap)
If($wordFound)
{
#Hashtable to store the data for writing the csv output. Stored the data
$properties = @{File = $file.FullName; Term_Match = $term; Hash = $hash.Hash}
$result = New-Object -TypeName PsCustomObject -Property $properties
$dataSync.returns += $result
Break
}
}
$document.close($false)
$Word.Quit()
} #/WordScriptBlock
# Loop through all word files
Foreach ($file In $wordFiles)
{
$Powershell = [Powershell]::Create().AddScript($WordScriptBlock)
$Powershell.RunspacePool = $RunspacePool
$Powershell.AddArgument($file).AddArgument($TermArray).addArgument($dataSync) | Out-Null
$JobObj = New-Object -TypeName PSObject -Property @{
Runspace = $Powershell.BeginInvoke()
Powershell = $Powershell
} #/New-Object
$Jobs.Add($JobObj) | Out-Null #added to a runspace arraylist
}
#cycles through the jobs and will check to see if they are complete. If it is then it will remove the job from the job list.
$i = 0
$total = $Jobs.Count
while($Jobs){
Write-Progress -Activity "Processing Word jobs" -Status "$($Jobs.Count) Jobs Remaining" -PercentComplete ($i/$total * 100)
foreach ($Runspace in $Jobs.ToArray()){
If ($Runspace.Runspace.IsCompleted) {
$Runspace.Powershell.Dispose()
$Jobs.Remove($Runspace)
$i++
}
}
}
$wordResults = $dataSync.returns
return $wordResults
}