You can use an external command line program pdftotext to retrieve the text to a (temporary) text file as in this example:
Public Function PdfToText( _
ByVal PdfFile As String, _
ByVal Id As Long) _
As String
Const PdfPathMask As String = "{0}\pdftotext.exe"
Const FileNameMask As String = "{0}\{1}.txt"
Const CommandMask As String = "{0} -simple -eol dos ""{1}"" ""{2}"""
' Emperic value for minimum size of a useful text file.
Const MinFileSize As Long = 80
Dim PdfPath As String
Dim Command As String
Dim TextFile As String
Dim Result As Long
PdfPath = Replace(PdfPathMask, "{0}", CurrentProject.Path)
If Dir(PdfPath, vbNormal) <> "" Then
' pfdtotext.exe is present.
' It will be safe to call ShellWait.
TextFile = Replace(FileNameMask, "{0}", Environ("temp"))
TextFile = Replace(TextFile, "{1}", CStr(Id))
If Dir(TextFile, vbNormal) <> "" Then
' File has been created in this session.
Else
Command = Replace(CommandMask, "{0}", PdfPath)
Command = Replace(Command, "{1}", PdfFile)
Command = Replace(Command, "{2}", TextFile)
Result = ShellWait(Command, vbMinimizedFocus)
If Result <> 0 Or Dir(TextFile, vbNormal) = "" Then
TextFile = ""
ElseIf FileLen(TextFile) < MinFileSize Then
TextFile = ""
End If
End If
If TextFile = "" Then
MsgBox "No text information in this file.", vbInformation + vbOKOnly, "Text in file"
End If
Else
MsgBox "The file: " & vbCrLf & PdfPath & vbCrLf & "is missing.", vbInformation + vbOKOnly, "Text in file"
End If
PdfToText = TextFile
End Function
You can read the created text file with the usual methods.
ShellWait
is lengthy but useful, as it will pause the code while pdftotext
is running:
' General constants.
'
' Wait forever.
Private Const Infinite As Long = &HFFFF
' Process Security and Access Rights.
'
' The right to use the object for synchronization.
' This enables a thread to wait until the object is in the signaled state.
Private Const Synchronize As Long = &H100000
' Constants for WaitForSingleObject.
'
' The specified object is a mutex object that was not released by the thread
' that owned the mutex object before the owning thread terminated.
' Ownership of the mutex object is granted to the calling thread and the
' mutex state is set to nonsignaled.
Private Const StatusAbandonedWait0 As Long = &H80
Private Const WaitAbandoned As Long = StatusAbandonedWait0 + 0
' The state of the specified object is signaled.
Private Const StatusWait0 As Long = &H0
Private Const WaitObject0 As Long = StatusWait0 + 0
' The time-out interval elapsed, and the object's state is nonsignaled.
Private Const WaitTimeout As Long = &H102
' The function has failed. To get extended error information, call GetLastError.
Private Const WaitFailed As Long = &HFFFFFFFF
' Missing enum when using late binding.
'
#If EarlyBinding = False Then
Public Enum IOMode
ForAppending = 8
ForReading = 1
ForWriting = 2
End Enum
#End If
' API declarations.
' Opens an existing local process object.
' If the function succeeds, the return value is an open handle
' to the specified process.
' If the function fails, the return value is NULL (0).
' To get extended error information, call GetLastError.
'
#If VBA7 Then
Private Declare PtrSafe Function OpenProcess Lib "kernel32" ( _
ByVal dwDesiredAccess As Long, _
ByVal bInheritHandle As Long, _
ByVal dwProcessId As Long) _
As LongPtr
#Else
Private Declare Function OpenProcess Lib "kernel32" ( _
ByVal dwDesiredAccess As Long, _
ByVal bInheritHandle As Long, _
ByVal dwProcessId As Long) _
As Long
#End If
' The WaitForSingleObject function returns when one of the following occurs:
' - the specified object is in the signaled state.
' - the time-out interval elapses.
'
' The dwMilliseconds parameter specifies the time-out interval, in milliseconds.
' The function returns if the interval elapses, even if the object's state is
' nonsignaled.
' If dwMilliseconds is zero, the function tests the object's state and returns
' immediately.
' If dwMilliseconds is Infinite, the function's time-out interval never elapses.
'
#If VBA7 Then
Private Declare PtrSafe Function WaitForSingleObject Lib "kernel32" ( _
ByVal hHandle As LongPtr, _
ByVal dwMilliseconds As Long) _
As Long
#Else
Private Declare Function WaitForSingleObject Lib "kernel32" ( _
ByVal hHandle As Long, _
ByVal dwMilliseconds As Long) _
As Long
#End If
' Closes an open object handle.
' If the function succeeds, the return value is nonzero.
' If the function fails, the return value is zero.
' To get extended error information, call GetLastError.
'
#If VBA7 Then
Private Declare PtrSafe Function CloseHandle Lib "kernel32" ( _
ByVal hObject As LongPtr) _
As Long
#Else
Private Declare Function CloseHandle Lib "kernel32" ( _
ByVal hObject As Long) _
As Long
#End If
' Shells out to an external process and waits until the process ends.
' Returns 0 (zero) for no errors, or an error code.
'
' The call will wait for an infinite amount of time for the process to end.
' The process will seem frozen until the shelled process terminates. Thus,
' if the shelled process hangs, so will this.
'
' A better approach could be to wait a specific amount of time and, when the
' time-out interval expires, test the return value. If it is WaitTimeout, the
' process is still not signaled. Then either wait again or continue with the
' processing.
'
' Waiting for a DOS application is different, as the DOS window doesn't close
' when the application is done.
' To avoid this, prefix the application command called (shelled to) with:
' "command.com /c " or "cmd.exe /c ".
'
' For example:
' Command = "cmd.exe /c " & Command
' Result = ShellWait(Command)
'
' 2018-04-06. Gustav Brock. Cactus Data ApS, CPH.
'
Public Function ShellWait( _
ByVal Command As String, _
Optional ByVal WindowStyle As VbAppWinStyle = vbNormalNoFocus) _
As Long
Const InheritHandle As Long = &H0
Const NoProcess As Long = 0
Const NoHandle As Long = 0
#If VBA7 Then
Dim ProcessHandle As LongPtr
#Else
Dim ProcessHandle As Long
#End If
Dim DesiredAccess As Long
Dim ProcessId As Long
Dim WaitTime As Long
Dim Closed As Boolean
Dim Result As Long
If Len(Trim(Command)) = 0 Then
' Nothing to do. Exit.
Else
ProcessId = Shell(Command, WindowStyle)
If ProcessId = NoProcess Then
' Process could not be started.
Else
' Get a handle to the shelled process.
DesiredAccess = Synchronize
ProcessHandle = OpenProcess(DesiredAccess, InheritHandle, ProcessId)
' Wait "forever".
WaitTime = Infinite
' If successful, wait for the application to end and close the handle.
If ProcessHandle = NoHandle Then
' Should not happen.
Else
' Process is running.
Result = WaitForSingleObject(ProcessHandle, WaitTime)
' Process ended.
Select Case Result
Case WaitObject0
' Success.
Case WaitAbandoned, WaitTimeout, WaitFailed
' Know error.
Case Else
' Other error.
End Select
' Close process.
Closed = CBool(CloseHandle(ProcessHandle))
If Result = WaitObject0 Then
' Return error if not closed.
Result = Not Closed
End If
End If
End If
End If
ShellWait = Result
End Function