-2

I'm trying for a project to read the content of a pdf file via VBA Access and put it in a variable.

Here is my Code:

Dim fso As New FileSystemObject
Dim tStream As TextStream
Dim vLine As String, vKey As String
vKey = "stream"

Set tStream = fso.OpenTextFile(form_filename, ForReading, False)

Do While Not tStream.AtEndOfStream
  vLine = tStream.ReadLine
  
  If InStr(vLine, vKey) > 0 Then
    MsgBox vLine
  End If
  
Loop

Set tStream = Nothing
Set fso = Nothing
me ke
  • 1
  • 1
  • 4
    Your [previous question](https://stackoverflow.com/questions/74367245/read-pdf-via-vba-access) was closed for a reason. Asking the same question under a new account is not a very good style. – Andre Nov 09 '22 at 10:29

1 Answers1

1

You can use an external command line program pdftotext to retrieve the text to a (temporary) text file as in this example:

Public Function PdfToText( _
    ByVal PdfFile As String, _
    ByVal Id As Long) _
    As String

    Const PdfPathMask   As String = "{0}\pdftotext.exe"
    Const FileNameMask  As String = "{0}\{1}.txt"
    Const CommandMask   As String = "{0} -simple -eol dos ""{1}"" ""{2}"""
    ' Emperic value for minimum size of a useful text file.
    Const MinFileSize   As Long = 80
    
    Dim PdfPath     As String
    Dim Command     As String
    Dim TextFile    As String
    Dim Result      As Long
    
    PdfPath = Replace(PdfPathMask, "{0}", CurrentProject.Path)
    If Dir(PdfPath, vbNormal) <> "" Then
        ' pfdtotext.exe is present.
        ' It will be safe to call ShellWait.
        TextFile = Replace(FileNameMask, "{0}", Environ("temp"))
        TextFile = Replace(TextFile, "{1}", CStr(Id))
        
        If Dir(TextFile, vbNormal) <> "" Then
            ' File has been created in this session.
        Else
            Command = Replace(CommandMask, "{0}", PdfPath)
            Command = Replace(Command, "{1}", PdfFile)
            Command = Replace(Command, "{2}", TextFile)
            
            Result = ShellWait(Command, vbMinimizedFocus)
            If Result <> 0 Or Dir(TextFile, vbNormal) = "" Then
                TextFile = ""
            ElseIf FileLen(TextFile) < MinFileSize Then
                TextFile = ""
            End If
        End If
        
        If TextFile = "" Then
            MsgBox "No text information in this file.", vbInformation + vbOKOnly, "Text in file"
        End If
    Else
        MsgBox "The file: " & vbCrLf & PdfPath & vbCrLf & "is missing.", vbInformation + vbOKOnly, "Text in file"
    End If
    
    PdfToText = TextFile

End Function

You can read the created text file with the usual methods.

ShellWait is lengthy but useful, as it will pause the code while pdftotext is running:

' General constants.
'
' Wait forever.
Private Const Infinite              As Long = &HFFFF

' Process Security and Access Rights.
'
' The right to use the object for synchronization.
' This enables a thread to wait until the object is in the signaled state.
Private Const Synchronize           As Long = &H100000

' Constants for WaitForSingleObject.
'
' The specified object is a mutex object that was not released by the thread
' that owned the mutex object before the owning thread terminated.
' Ownership of the mutex object is granted to the calling thread and the
' mutex state is set to nonsignaled.
Private Const StatusAbandonedWait0  As Long = &H80
Private Const WaitAbandoned         As Long = StatusAbandonedWait0 + 0
' The state of the specified object is signaled.
Private Const StatusWait0           As Long = &H0
Private Const WaitObject0           As Long = StatusWait0 + 0
' The time-out interval elapsed, and the object's state is nonsignaled.
Private Const WaitTimeout           As Long = &H102
' The function has failed. To get extended error information, call GetLastError.
Private Const WaitFailed            As Long = &HFFFFFFFF


' Missing enum when using late binding.
'
#If EarlyBinding = False Then
    Public Enum IOMode
        ForAppending = 8
        ForReading = 1
        ForWriting = 2
    End Enum
#End If


' API declarations.

' Opens an existing local process object.
' If the function succeeds, the return value is an open handle
' to the specified process.
' If the function fails, the return value is NULL (0).
' To get extended error information, call GetLastError.
'
#If VBA7 Then
    Private Declare PtrSafe Function OpenProcess Lib "kernel32" ( _
        ByVal dwDesiredAccess As Long, _
        ByVal bInheritHandle As Long, _
        ByVal dwProcessId As Long) _
        As LongPtr
#Else
    Private Declare Function OpenProcess Lib "kernel32" ( _
        ByVal dwDesiredAccess As Long, _
        ByVal bInheritHandle As Long, _
        ByVal dwProcessId As Long) _
        As Long
#End If

' The WaitForSingleObject function returns when one of the following occurs:
' - the specified object is in the signaled state.
' - the time-out interval elapses.
'
' The dwMilliseconds parameter specifies the time-out interval, in milliseconds.
' The function returns if the interval elapses, even if the object's state is
' nonsignaled.
' If dwMilliseconds is zero, the function tests the object's state and returns
' immediately.
' If dwMilliseconds is Infinite, the function's time-out interval never elapses.
'
#If VBA7 Then
    Private Declare PtrSafe Function WaitForSingleObject Lib "kernel32" ( _
        ByVal hHandle As LongPtr, _
        ByVal dwMilliseconds As Long) _
        As Long
#Else
    Private Declare Function WaitForSingleObject Lib "kernel32" ( _
        ByVal hHandle As Long, _
        ByVal dwMilliseconds As Long) _
        As Long
#End If

' Closes an open object handle.
' If the function succeeds, the return value is nonzero.
' If the function fails, the return value is zero.
' To get extended error information, call GetLastError.
'
#If VBA7 Then
    Private Declare PtrSafe Function CloseHandle Lib "kernel32" ( _
        ByVal hObject As LongPtr) _
        As Long
#Else
    Private Declare Function CloseHandle Lib "kernel32" ( _
        ByVal hObject As Long) _
        As Long
#End If


' Shells out to an external process and waits until the process ends.
' Returns 0 (zero) for no errors, or an error code.
'
' The call will wait for an infinite amount of time for the process to end.
' The process will seem frozen until the shelled process terminates. Thus,
' if the shelled process hangs, so will this.
'
' A better approach could be to wait a specific amount of time and, when the
' time-out interval expires, test the return value. If it is WaitTimeout, the
' process is still not signaled. Then either wait again or continue with the
' processing.
'
' Waiting for a DOS application is different, as the DOS window doesn't close
' when the application is done.
' To avoid this, prefix the application command called (shelled to) with:
' "command.com /c " or "cmd.exe /c ".
'
' For example:
'   Command = "cmd.exe /c " & Command
'   Result = ShellWait(Command)
'
' 2018-04-06. Gustav Brock. Cactus Data ApS, CPH.
'
Public Function ShellWait( _
    ByVal Command As String, _
    Optional ByVal WindowStyle As VbAppWinStyle = vbNormalNoFocus) _
    As Long

    Const InheritHandle As Long = &H0
    Const NoProcess     As Long = 0
    Const NoHandle      As Long = 0
    
#If VBA7 Then
    Dim ProcessHandle   As LongPtr
#Else
    Dim ProcessHandle   As Long
#End If
    Dim DesiredAccess   As Long
    Dim ProcessId       As Long
    Dim WaitTime        As Long
    Dim Closed          As Boolean
    Dim Result          As Long
  
    If Len(Trim(Command)) = 0 Then
        ' Nothing to do. Exit.
    Else
        ProcessId = Shell(Command, WindowStyle)
        If ProcessId = NoProcess Then
            ' Process could not be started.
        Else
            ' Get a handle to the shelled process.
            DesiredAccess = Synchronize
            ProcessHandle = OpenProcess(DesiredAccess, InheritHandle, ProcessId)
            ' Wait "forever".
            WaitTime = Infinite
            ' If successful, wait for the application to end and close the handle.
            If ProcessHandle = NoHandle Then
                ' Should not happen.
            Else
                ' Process is running.
                Result = WaitForSingleObject(ProcessHandle, WaitTime)
                ' Process ended.
                Select Case Result
                    Case WaitObject0
                        ' Success.
                    Case WaitAbandoned, WaitTimeout, WaitFailed
                        ' Know error.
                    Case Else
                        ' Other error.
                End Select
                ' Close process.
                Closed = CBool(CloseHandle(ProcessHandle))
                If Result = WaitObject0 Then
                    ' Return error if not closed.
                    Result = Not Closed
                End If
            End If
        End If
    End If
  
    ShellWait = Result

End Function
Gustav
  • 53,498
  • 7
  • 29
  • 55