0

With Delphi 10 Seattle, I need to get the text content of any file.

I've tried with GetFileContentsFromIFilter in SPFilter.pas from http://develop.shorterpath.com/spfree/default.asp, but I could not get it to work:

uses SPFilter;

procedure TForm1.btnTestClick(Sender: TObject);
var
  FilterStream: TStringStream;
begin
  FilterStream := TStringStream.Create;
  try
    if SPFilter.GetFileContentsFromIFilter(Trim(edtFile.Text), FilterStream) then // D:\Readme.txt
    begin
      FilterStream.Seek(0, soFromBeginning);
      Memo1.Lines.LoadFromStream(FilterStream);
    end;
  finally
    FilterStream.Free;
  end;
end;

This is the SPFilter.pas unit:

(******************************************************************************)
(* SPFilter - Read file content using IFilter interface                       *)
(* Shorter Path Free Components 1.0                                           *)
(*                                                                            *)
(* Copyright (c) 2003 Shorter Path Software                                   *)
(* http://develop.shorterpath.com                                             *)
(******************************************************************************)

unit SPFilter;

interface

uses
  CodeSiteLogging,
  Classes;

function GetFileContentsFromIFilter(const FileName: string; OutData: TStream): Boolean;

implementation

uses
  Windows, SysUtils, Registry, ActiveX, Filter;

function GetFileContentsFromIFilter(const FileName: string;
  OutData: TStream): Boolean;
var
  Reg: TRegistry;
  DocType, DocClass, HandlerClass, PersistClass, FilterDLL: string;
  DLLHandle: THandle;
  ClassFactory: IClassFactory;
  FilterObj: IFilter;
  PersistFile: IPersistFile;
  DllGetClassObject: TDllGetClassObject;
  DllCanUnloadNow: TDLLCanUnloadNow;
  Res, ChunkRes: HResult;
  pFlags: ULONG;
  WFileName: WideString;
  StatChunk: TStatChunk;
  cwcBuffer: ULONG;
  awcBuffer: PWideChar;
  Txt: WideString;
  AnsiTxt: string;
  t: string;
  EndOfChunksCount: Integer;
begin
  Result := False;

  { Find filter DLL }
  FilterDLL := EmptyStr;

  { Step 1: Determine the CLSID }
  Reg := TRegistry.Create(KEY_READ);
  Reg.RootKey := HKEY_LOCAL_MACHINE;

  { A. Locate document type }
  DocType := 'Software\Classes\' + ExtractFileExt(FileName);
  if Reg.OpenKey(DocType, False) then
  begin
    DocType := Reg.ReadString(EmptyStr);
    Reg.CloseKey;
    CodeSite.Send('document type', DocType); // -> txtfile

    if Length(DocType) > 0 then
    begin
      { B. Locate document class }
      if Reg.OpenKey('Software\Classes\' + DocType + '\CLSID', False) then // FALSE HERE!!
      begin
        CodeSite.Send('CLSID');
        DocClass := Reg.ReadString(EmptyStr);
        Reg.CloseKey;
      end;
    end;

    { Step 2: Determine the Persistent Handler }
    if Length(DocClass) > 0 then
    begin
      if Reg.OpenKey('Software\Classes\CLSID\' + DocClass + '\PersistentHandler', False) then
      begin
        HandlerClass := Reg.ReadString(EmptyStr);
        Reg.CloseKey;
      end;
    end;

    { Step 3: Determine the IFilter Persistent Handler GUID }
    if Length(HandlerClass) > 0 then
    begin
      if Reg.OpenKey('Software\Classes\CLSID\' + HandlerClass +
        '\PersistentAddinsRegistered\' + GUIDToString(IID_IFilter), False) then
      begin
        PersistClass := Reg.ReadString(EmptyStr);
        Reg.CloseKey;
      end;
    end;

    { Step 4: Determine the Filter DLL }
    if Length(PersistClass) > 0 then
    begin
      if Reg.OpenKey('Software\Classes\CLSID\' + PersistClass + '\InprocServer32', False) then
      begin
        FilterDLL := Reg.ReadString(EmptyStr);
        Reg.CloseKey;
      end;
    end;
  end;
  Reg.Free;

  { Use Filter DLL to read the file }
  if Length(FilterDLL) > 0 then
  begin
    DLLHandle := LoadLibrary(PChar(FilterDLL));
    if DLLHandle <> 0 then
    begin
      @DllGetClassObject := GetProcAddress(DLLHandle, 'DllGetClassObject');
      @DllCanUnloadNow := GetProcAddress(DLLHandle, 'DllCanUnloadNow');
      if Assigned(DllGetClassObject) then
      begin
        { Get Class Factory }
        DllGetClassObject(StringToGUID(PersistClass), IClassFactory, ClassFactory);
        if Assigned(ClassFactory) then
        begin
          { Get IFilter object }
          ClassFactory.CreateInstance(nil, IFilter, FilterObj);
          if Assigned(FilterObj) then
          begin
            FilterObj.QueryInterface(IPersistFile, PersistFile);
            if Assigned(PersistFile) then
            begin
              WFileName := FileName;
              PersistFile.Load(PWideChar(WFileName), 0);
              EndOfChunksCount := 0;
              Res := FilterObj.Init(0, 0, nil, pFlags);
              if Res = S_OK then
              //if FilterObj.Init(0, 0, nil, pFlags) = S_OK then
              begin
                repeat
                  ChunkRes := FilterObj.GetChunk(StatChunk);
                  if ChunkRes = S_OK then
                  begin
                    if (StatChunk.flags and CHUNK_TEXT) <> 0 then
                    begin
                      GetMem(awcBuffer, 16384*2);
                      repeat
                        cwcBuffer := 16384;
                        FillChar(awcBuffer^, cwcBuffer, 0);
                        Res := FilterObj.GetText(cwcBuffer, awcBuffer);
                        if cwcBuffer > 0 then
                        begin
                          SetLength(Txt, cwcBuffer*2);
                          FillChar(PWideChar(Txt)^, cwcBuffer*2, 0);
                          Move(awcBuffer^, PWideChar(Txt)^, cwcBuffer*2);

                          SetLength(AnsiTxt, cwcBuffer*2);
                          FillChar(PChar(AnsiTxt)^, cwcBuffer*2, 0);
                          WideCharToMultiByte(CP_ACP, 0, PWideChar(Txt),
                            cwcBuffer, PAnsiChar(AnsiTxt), cwcBuffer*2, ' ', nil);

                          SetLength(AnsiTxt, StrLen(PChar(AnsiTxt)));
                          t := t + AnsiTxt;
                        end;
                      until Res = FILTER_E_NO_MORE_TEXT;
                      FreeMem(awcBuffer);
                    end;
                  end;
                  if ChunkRes = FILTER_E_END_OF_CHUNKS then
                    Inc(EndOfChunksCount) else
                    EndOfChunksCount := 0;
                until EndOfChunksCount > 1;
                { Return True for success }
                Result := True;
              end;
            end;
            { Release filter }
            PersistFile := nil;
            FilterObj := nil;
          end;
          { Release Class Factory }
          ClassFactory := nil;
        end;
      end;
      if Assigned(DllCanUnloadNow) then
      begin
        if DllCanUnloadNow = S_OK then
          FreeLibrary(DLLHandle) else
      end else
        FreeLibrary(DLLHandle);
    end;
  end;

  { Write data to stream }
  if Result then
  try
    OutData.Write(t[1], Length(t));
  except
    Result := False;
  end;
end;

end.

As you can see from the comment "FALSE HERE" it fails to get the CLSID of .TXT file. However, there is no CLSID key inside the txtfile registry key. So what is wrong here?

IFilter Explorer from Citeknet however does show me that there IS a valid iFilter for .TXT files!

Does anybody know how to get the content of any file?

user1580348
  • 5,721
  • 4
  • 43
  • 105
  • I don't want to use "Indexing" service. I want to get the text content of any file with IFilter. Is IFilter part of the "Indexing" service or Windows Search? So how can I get the text content of any file? – user1580348 Apr 04 '16 at 14:08
  • The user who has previously posted a comment to which I answered with my comment above has deleted his comment without explaining the reason and then he has downvoted my question. What kind of behavior is this? Isn't this an irrational behavior? – user1580348 Apr 04 '16 at 14:30
  • I have not downvoted the question. I've closed it as a duplicate of another question which shows you how to obtain and use an `IFilter`. You're welcome. – David Heffernan Apr 04 '16 at 14:31
  • There is no accepted solution to that question you linked to. The first answer is poorly described (no indication of used units e.g. for `IFilter` and thus unusable). The second answer just indicates the very same sources I've used in my question above. So your objection of a "duplicate question" is unfounded. Please revise your objection. – user1580348 Apr 04 '16 at 14:46
  • For sure you'll have to do some work for yourself. You'll want a Delphi header translation. I believe JEDI has one. Did you read the MSDN docs for `LoadIFilter`? If I were you I'd start by using one of the C++ examples that you can find to prove the concept. That way you'll know that you aren't getting bitten by header translation problems. Then you can port to Delphi. Good luck! – David Heffernan Apr 04 '16 at 14:50
  • So you admit that my question is NOT a duplicate of another question which has a working answer. Thank you. – user1580348 Apr 04 '16 at 14:57
  • Not at all. I admit that you might need to do some work, and that Ian's answer could be better. A duplicate means that the question is the same, not that there is an answer that can be copy/pasted. Anyway, it took me about 15 minutes to get Ian's code to work. I was able to use it to read text from a .txt file. – David Heffernan Apr 04 '16 at 14:58
  • Fantastic, you are a genius! (Which unfortunately I am not). Would you please let us participate to your solution? Thank you! – user1580348 Apr 04 '16 at 15:00
  • Here is `LoadIFilter`: `function LoadIFilter(pwcsPath: PWideChar; pUnkOuter: IUnknown; out ppIUnk: IUnknown): HRESULT; stdcall; external 'query.dll'; ` The thing is though, `IFilter` support is really patchy. It's going to take you a lot of work to get it to read from a wide range of file types, and even then I expect you'll struggle to have much success. I think this is probably the wrong way to solve your actual problem. But you did say that you must solve it with `IFilter`, so there it is. – David Heffernan Apr 04 '16 at 15:02
  • David, thanks for your expert opinion. I have rephrased my question as requested so it is not restricted to `IFilter`. – user1580348 Apr 04 '16 at 15:12
  • There doesn't have to be an accepted answer to the other post to make this a duplicate. There simply has to be an answer to that other post with upvotes. – Ken White Apr 04 '16 at 15:18
  • You already have what you need at the other question. That code can read text from a text file – David Heffernan Apr 04 '16 at 15:23
  • As I said before that answer is unusable as it does not mention where e.g. `IFilter` in `ProcessFile` is declared (aka which units have to be included in the uses clause). Why don't you publish the solution with which you were able to read the content of a text file? – user1580348 Apr 04 '16 at 15:37
  • I managed to use that code, so unusable is a little strong. The code in your question uses `IFilter`, defined in the unit named `Filter`. You'd do well though to extract that declaration and put it in your own code. – David Heffernan Apr 04 '16 at 15:54
  • It is still unusable: No declaration can be found for `chunk: PSTAT_CHUNK;`. And why do you hide your working solution from the public? – user1580348 Apr 04 '16 at 19:07
  • So read the code and work out how to fix it. I'd declare GetChunk as taking `out Chunk: TStatChunk` and get rid of Ian's needless heap allocation. I'm not really one for doing work for others. I don't think you are trying hard enough. – David Heffernan Apr 04 '16 at 19:25
  • What I'm getting at is that sooner or later you are going to have to do this yourself. You can't have us write all your code for you. You need to understand how to translate interfaces, how to read msdn docs, how to translate C++ examples etc. We could do it for you but that does nothing for anyone. You learn nothing and we spend time for no gain. It will take you time, but we are here to help you learn. But not to feed you polished code. – David Heffernan Apr 04 '16 at 19:36

0 Answers0