-1

I would like to read a block of bytes from file (e.g. 400KB ), replace some text in the buffer and then write it to file. Originally I tried TFileStream with buffer array of bytes but then I stuck on the problem that stringreplace works with string. The source data are txt UTF-8. This is what I have:

var
  SS,ST: TFileStream;
  Buffer: string;
  sf,tf,TempStr: string;
  i: Integer;
begin
  sf := 'U:\SYSTEM\enwiktionary-latest-stub-articles\stub-articles.xml';
  tf := 'A:1.txt';
  SS := TFileStream.Create(sf, fmOpenRead);
  ST := TFileStream.Create(tf, fmCreate or fmOpenWrite);
  try
    SS.Read(Buffer, sizeof(Buffer));
    Buffer := stringreplace(Buffer, '<page>','<p>', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '</page>','</p>', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '<title>','<t>', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '</title>','</t>', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '<ns>','<n', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '</ns>','>', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '<revision>','<r>', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '</revision>','</r>', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '<id>','<i', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '</id>','>', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '<parentid>','<pi', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '</parentid>','>', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '<contributor>','', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '</contributor>','', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '<username>','<u>', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '</username>','</u>', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '<comment>','<c>', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '</comment>','</c>', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '<text id="','<t =', [rfReplaceAll]);
    Buffer := stringreplace(Buffer, '" bytes="',' b=', [rfReplaceAll]);
    ST.Write(Buffer, sizeof(Buffer));
  finally
    SS.Free;
  end;

Buffer := stringreplace makes Runtime error Access violation..

John Boe
  • 3,501
  • 10
  • 37
  • 71

2 Answers2

2

Use UTF8 buffer string and allocate place for string body:

 Buffer: AnsiString; //type UTF8String = AnsiString;
 ...
 SetLength(Buffer, BlockSize)
 SS.Read(PAnsiChar(Buffer)^, BlockSize);
 ...
 ST.Write(PAnsiChar(Buffer)^, Length(Buffer)); 

But with this approach you can loose patterns at the borders of blocks. Why not use TStringList, load all contents in it and work with its lines?

MBo
  • 77,366
  • 5
  • 53
  • 86
  • 1) I have not enough memory to load whole the file. My free memory is about 74-100MB. 2) I have some problems because my output file is very small. I contains only short ' – John Boe Jun 30 '18 at 13:11
  • Read thoroughly - You wrote `sizeof(Buffer)` but it is 4 bytes - size of pointer! I showed correct way to solve this problem – MBo Jun 30 '18 at 13:20
  • You right, you clever guy! May I have a question? Why there is ^ ? – John Boe Jun 30 '18 at 13:26
  • It dereferences pointer to prepare data for untyped var-parameter of `Read`. Another method - use `Buffer[0]`- but it is less error-prone – MBo Jun 30 '18 at 13:29
  • FYI Delphi 7 has an actual `UTF8String` type (alias for `AnsiString`): `Buffer: UTF8String;` – Remy Lebeau Jun 30 '18 at 15:29
0

A working example.

var
    ifname,ofname:string;
    sourceStream, targetStream: TFileStream;
    filesizevalue, size:integer;
    Buffer: AnsiString;
begin
  ifname := 'U:\SYSTEM\enwiktionary-latest-stub-articles\stub-articles.xml';
  ofname := 'A:1.txt';
  filesizevalue:=900000;
  size := 1;
  SetLength(Buffer, filesizevalue);
  sourceStream := TFileStream.Create(ifname, fmOpenRead);
  targetStream := TFileStream.Create(ofname, fmCreate or fmOpenWrite);
  try
    sourceStream.seek(0, soFromBeginning);
    sourceStream.ReadBuffer(PAnsiChar(Buffer)^, filesizevalue * size);

    buffer := stringreplace(buffer, '<page>','<p>', [rfReplaceAll]);
    buffer := stringreplace(buffer, '</page>','</p>', [rfReplaceAll]);
    buffer := stringreplace(buffer, '<title>','<t>', [rfReplaceAll]);
    buffer := stringreplace(buffer, '</title>','</t>', [rfReplaceAll]);
    buffer := stringreplace(buffer, '<ns>','<n', [rfReplaceAll]);
    buffer := stringreplace(buffer, '</ns>','>', [rfReplaceAll]);
    buffer := stringreplace(buffer, '<revision>','<r>', [rfReplaceAll]);
    buffer := stringreplace(buffer, '</revision>','</r>', [rfReplaceAll]);
    buffer := stringreplace(buffer, '<id>','<i', [rfReplaceAll]);
    buffer := stringreplace(buffer, '</id>','>', [rfReplaceAll]);
    buffer := stringreplace(buffer, '<parentid>','<pi', [rfReplaceAll]);
    buffer := stringreplace(buffer, '</parentid>','>', [rfReplaceAll]);
    buffer := stringreplace(buffer, '<contributor>','', [rfReplaceAll]);
    buffer := stringreplace(buffer, '</contributor>','', [rfReplaceAll]);
    buffer := stringreplace(buffer, '<username>','<u>', [rfReplaceAll]);
    buffer := stringreplace(buffer, '</username>','</u>', [rfReplaceAll]);
    buffer := stringreplace(buffer, '<comment>','<c>', [rfReplaceAll]);
    buffer := stringreplace(buffer, '</comment>','</c>', [rfReplaceAll]);
    buffer := stringreplace(buffer, '<text id="','<t =', [rfReplaceAll]);
    buffer := stringreplace(buffer, '" bytes="',' b=', [rfReplaceAll]);
    targetStream.Write(PAnsiChar(buffer)^, length(buffer) );
  finally
     sourceStream.free;
     targetStream.free;
  end;
John Boe
  • 3,501
  • 10
  • 37
  • 71