0

I have a record type and a dynamic array made up of that record type. I pass it to a mergesort routine and try to set one of it's field properties which is boolean to true but seems does not take effect.

I looked into sorting array of record by other means(see this quicksort for customrecord array: http://en.wikibooks.org/wiki/Algorithm_Implementation/Sorting/Quicksort#Delphi) or here: Best way to sort an array (I could not get none of these suggestions to work from here mostly because of creating a comaring function). This question: Sorting of Arrays Alphabetically? was helpful and works but this sorting is excruciatingly slow.

CODE:

type    
       TCustomRecord = Record
        fLine     : AnsiString; //full line
        fsubLine     : AnsiString; // part of full line
        isDuplicate : boolean;  //is that subline duplicate in another line
        isRefrence     : boolean; // is this line from a refrence file or the one being deduped
        fIndex    : Cardinal; // original order line was loaded
       end;
      TCustomRecordArray = array of TCustomRecord; 

function Merge2(var Vals: array of TCustomRecord ):Integer;
var
  AVals: array of TCustomRecord;

   //returns index of the last valid element
  function Merge(I0, I1, J0, J1: Integer):Integer;
  var
    i, j, k, LC:Integer;
  begin
    LC := I1 - I0;
    for i := 0 to LC do
      AVals[i]:=Vals[i + I0];
      //copy lower half or Vals into temporary array AVals

    k := I0;
    i := 0;
    j := J0;
    while ((i <= LC) and (j <= J1)) do
    if (AVals[i].fsubLine < Vals[j].fsubLine) then
    begin
      Vals[k] := AVals[i];
      if Vals[k].isRefrence = False then
        Vals[k].isDuplicate := False;
      inc(i);
      inc(k);
    end
    else if (AVals[i].fsubLine > Vals[j].fsubLine) then
    begin
      Vals[k]:=Vals[j];
      if Vals[k].isRefrence = False then
        Vals[k].isDuplicate := False;
      inc(k);
      inc(j);
    end else
    begin //duplicate
      Vals[k] := AVals[i];
      if Vals[k].isRefrence = False then
        Vals[k].isDuplicate := True;
      inc(i);
      inc(j);
      inc(k);
    end;

    //copy the rest
    while i <= LC do begin
      Vals[k] := AVals[i];
      inc(i);
      inc(k);
    end;

    if k <> j then
      while j <= J1 do begin
        Vals[k]:=Vals[j];
        inc(k);
        inc(j);
      end;

    Result := k - 1;
  end;

 //returns index of the last valid element

  function PerformMergeSort(ALo, AHi:Integer): Integer; //returns
  var
    AMid, I1, J1:Integer;
  begin

  //It would be wise to use Insertion Sort when (AHi - ALo) is small (about 32-100)
    if (ALo < AHi) then
    begin
      AMid:=(ALo + AHi) shr 1;
      I1 := PerformMergeSort(ALo, AMid);
      J1 := PerformMergeSort(AMid + 1, AHi);
      Result := Merge(ALo, I1, AMid + 1, J1);
    end else
      Result := ALo;
  end;

begin
  //SetLength(AVals, Length(Vals) + 1 div 2);
  SetLength(AVals, Length(Vals) div 2 + 1);
  Result := 1 + PerformMergeSort(0, High(Vals));
end;

QUESTION: How can I sort efficiently, preferably using mergesort, this array of record and set some of it's properties according to that sort? Thank you.

UPDATE: I added a pointer type and did a modified mergesort on array of pointers. This turned out to be very fast way of sorting the array of record. I added also a compare routine which added the flags I needed. The only part I am not able to do is to add a flag for duplicates based on if they belonged to file A or Reference file.

CODE:

    type    
          PCustomRecord = ^TCustomRecord; 
          TCustomRecord = Record
            fLine     : AnsiString; //full line
            fsubLine  : AnsiString; // part of full line
            isDuplicate : boolean;  //is that subline duplicate in another line
            isRefrence     : boolean; // line from a refrence file or the one being deduped
            isUnique  : boolean; //flag to set if not refrence and not dupe
            fIndex    : Cardinal; // original order line was loaded
           end;
          TCustomRecordArray = array of TCustomRecord;
          PCustomRecordList = ^TCustomRecordArray;

//set up actual array
//set up pointer array to point at actual array
//sort by mergesort first
// then call compare function - this can be a procedure obviously

function Compare(var PRecords: array of PCustomRecord; iLength: int64): Integer;
var
  i : Integer;
begin
  for i := 0 to High(PRecords) do
  begin
    Result := AnsiCompareStr(PRecords[i]^.fsubline, PRecords[i+1]^.fsubline);
    if Result=0 then
    begin
      if (PRecords[i].isrefrence = False) then
        PRecords[i].isduplicate := True
      else if (PRecords[i+1].isrefrence = False) then
        PRecords[i+1].isduplicate := True;
    end;
  end;
end; 

procedure MergeSort(var Vals:array of PCustomRecord;ACount:Integer);
var AVals:array of PCustomRecord;

  procedure Merge(ALo,AMid,AHi:Integer);
  var i,j,k,m:Integer;
  begin
    i:=0;
    for j:=ALo to AMid do
    begin
      AVals[i]:=Vals[j];
      inc(i);
      //copy lower half or Vals into temporary array AVals
    end;

    i:=0;j:=AMid + 1;k:=ALo;//j could be undefined after the for loop!
    while ((k < j) and (j <= AHi)) do
    if (AVals[i].fsubline) <= (Vals[j].fsubline) then
    begin
      Vals[k]:=AVals[i];
      inc(i);inc(k);
    end
    else if (AVals[i].fsubline) > (Vals[j].fsubline) then
    begin
      Vals[k]:=Vals[j];
      inc(k);inc(j);
    end;

    {locate next greatest value in Vals or AVals and copy it to the
     right position.}

    for m:=k to j - 1 do
    begin
      Vals[m]:=AVals[i];
      inc(i);
    end;
    //copy back any remaining, unsorted, elements
  end;

  procedure PerformMergeSort(ALo,AHi:Integer);
  var AMid:Integer;
  begin
    if (ALo < AHi) then
    begin
      AMid:=(ALo + AHi) shr 1;
      PerformMergeSort(ALo,AMid);
      PerformMergeSort(AMid + 1,AHi);
      Merge(ALo,AMid,AHi);
    end;
  end;

begin
  SetLength(AVals, ACount div 2 + 1);
  PerformMergeSort(0,ACount - 1);
end;

This is all very fast on small files taking less than one second. Deduping the items in the array that carry a duplicate flag and NOT a reference flag is quite challenging though. As mergesort is a stable sort I tried resorting by boolean flag but did not get what I expected. I used a TStringlist to see if my previous flags are being set up correctly and it works perfectly. The time went up from 1 second to 6 seconds. I know there has to be an easy way to mark the isUnique flag without TStringlist.

Here is what I tried:

function DeDupe(var PRecords: array of PCustomRecord; iLength: int64): Integer;
var
  i : Integer;
begin
  for i := 0 to High(PRecords) do
  begin
    if (PRecords[i]^.isrefrence = False) and (PRecords[i+1]^.isrefrence = false)then
    begin
      Result := AnsiCompareStr(PRecords[i]^.isduplicate, PRecords[i+1]^.isduplicate);
      if Result = 0 then PRecords[i]^.isUnique := True;
    end
    else
    begin
      Continue;
    end;
  end;
end;

This doesn't get all the values and I did not see a difference with it as I still see lots of duplicates. I think the logic is wrong.

Thanks to all the great souls helping out. To all please allow me the benefit that I may already know how to derive a TObject and how to use a TStringList so the focus is on arrays.

QUESTION: Help me do a function or procedure as above to mark the repeated items with the: isRefrence = false and isDuplicate = True and unique

EDIT 3: I was able to achieve the elimination of duplicates through the use of boolean flags. this helped in keeping the array stable without changing the size of the array. I believe it is much much faster than using TList descendant or TStringList. The use of a basic container such as an array has limitations in ease of coding but is very efficient so I would not pass on it. The pointers made the sorting a breeze. I'm not sure how after I set the pointers to my array when i used the pointer array exactly like I'm using my regular array. And it made no difference whether I derefrenced it or not. I set up the pointer array as such:

  iLength := Length(Custom_array); //get length of actual array
  SetLength(pcustomRecords, iLength); // make pointer array equal + 1

  for M := Low(Custom_array) to High(Custom_array) do //set up pointers
  begin
    pcustomRecords[M] := @Custom_array[M]; 
  end;

I tried seperating the sorting from the actual data being sorted as much as I can, but I'm sure there can be improvement.

///////////////////////////////////////////////////////////////////
function Comparesubstring(Item1, Item2: PCustomRecord): Integer;
begin
  Result := AnsiCompareStr(item1^.fsubline, item2^.fsubline);
end;
///////////////////////////////////////////////////////////////////
function CompareLine(Item1, Item2: PCustomRecord): Integer;
begin
  Result := AnsiCompareStr(item1^.fLine, item2^.fLine);
end;
///////////////////////////////////////////////////////////////////
function Compare(var PRecords: array of PCustomRecord; iLength: int64): Integer;
var
  M, i : Integer;
begin
  M := Length(PRecords);
  for i := 1 to M-1 do
  begin
    Result := Comparesubstring(PRecords[i-1], PRecords[i]);
    if Result=0 then
    begin
      if (PRecords[i-1].isRefrence = False) then
        PRecords[i-1].isduplicate := True
      else if (PRecords[i].isRefrence = False) then
        PRecords[i].isduplicate := True;
    end;
  end;
end;
///////////////////////////////////////////////////////////////////
Community
  • 1
  • 1
Merlin W.
  • 231
  • 2
  • 11
  • What is `Length(Vals) + 1 shr 1` meant to do? Do you realise that `1 shr 1` is equal to `0`? – David Heffernan Oct 11 '12 at 12:59
  • I'd expect quicksort to be the fastest. Why are you using mergesort for in memory? Do you need a stable sort? – David Heffernan Oct 11 '12 at 13:00
  • @Arioch'The part of that is my data is greater than few gigabytes at times. I use Mergesort on other routines in this program and it does work incredibly well even when I sort files the size greater than half of my available memory without complaint.The Quick Sort routine I was not sure where to set my property to true `Vals[k].isDuplicate := true;`. – Merlin W. Oct 11 '12 at 13:02
  • @DavidHeffernan `Length(Vals) + 1 shr 1` is `Length(Vals) + 1 div 2`. I do need a sort that will take large amount of data. By trial and error Mergesort does exactly this. I am using it to sort a 2.19 GB file with only 4GB of RAM and it works. – Merlin W. Oct 11 '12 at 13:06
  • Yes, but only because they are both equal to `Length(Vals)`. Because `1 div 2` is equal to `0`. Anyway, it is utterly crazy using `shr` to perform division. That results in unreadable code. For what possible gain? Why are you using shifts for arithmetic? – David Heffernan Oct 11 '12 at 13:08
  • @DavidHeffernan I appologize for my math bit function and certainly `div 2` is fine but does not change this question or results I'm getting LOL. – Merlin W. Oct 11 '12 at 13:11
  • Well perhaps you could remove the `1 shr 1` from the question since it equals `0` and is confusing. The question is really vague though. Personally I'd do what Arioch says and avoid copying these records all over the place. And I'd use quicksort. It will be quicker. Just because your quicksort doesn't work is not a good reason to abandon quicksort. The right approach is to use quicksort correctly. Fix you r bugs rather than blaming the algo. – David Heffernan Oct 11 '12 at 13:14
  • Why the downvote? Why is it people simply downvoting and not leaving comment about why they did that? There is no similar question here dealing with mergesort and array of records and I did look up any information on this before I asked. – Merlin W. Oct 11 '12 at 13:16
  • @DavidHeffernan it works as `shr 1` or `div 2` and in this case it is not equal to zero but I changed it anyway. This is minor as I am focused on the sorting part. Please note that my code runs fine but I am not getting the results I expect so hence the question. – Merlin W. Oct 11 '12 at 13:20
  • 2
    For the love of god, both `N + 1 div 2` and `N + 1 shr 1` are both equal to `N` – David Heffernan Oct 11 '12 at 13:28
  • @DavidHeffernan isn't this side stepping my question and you focusing on a piece that I have no problem with, did not ask about, and made a change to as you asked? My question is not about the math here it is about sorting an array of record and setting some of it's field accordingly, preferably using mergesort. Am I missing something here? I do not really want to discuss math and I will do any further changes you ask to make this question more readable for others seeking answers as well, so no problem there. Okay on this? – Merlin W. Oct 11 '12 at 13:35
  • 2
    I've given you an answer to the question. The rest was just trying to correct a misunderstanding that you have. You think that `N + 1 div 2` is equal to `(N+1) div 2` and I'm here to tell you that it is not so. – David Heffernan Oct 11 '12 at 13:38
  • @ALL LOL you think this school work? Well I am flattered really as this is showing my level to be at least comparable to someone studying this :). I am not studying this and in fact I am self taught and as David will attest based on my prior code `not very well` LOL. But this is the reason why I need concrete examples as I don't know lots of background. My code is trial and error but I have no deadlines and I do it for myself. Still it is flattering if that is what you thought :) – Merlin W. Oct 11 '12 at 13:52
  • @David - regarding "shr 1" - i believe that Merlin either shows as some TurboPascal-time code or making naive (letter to letter without understanding) poring of some ancient C code - the code that could not rely on compiler optimizing "div 2^x" constants or that tried to demonstrate every optimization ever possible. – Arioch 'The Oct 11 '12 at 14:07
  • @Arioch'The no the goal was to optimize as I read the shifting bit is faster than dividing but I just got it written wrong. David explained it well although I must say I was not understanding him at first then this explained it `N + 1 div 2` does not equal `(N + 1) div 2` – Merlin W. Oct 11 '12 at 15:00
  • Yes, shifts are faster, but 1) this may be just unobservably minor part of execution time, "not worth the candles" and "80/20". 2) modern compilers are smart to replace `div 2` wit h`shr 1` for you. 3) modern CPU are usually either doing the same optimization or have so fast integral DIV that it makes no differnse. But generically if you'd make ASM number-crunching, then that would be faster. But if u like optimizations - google for 0x5f3759df and loose your marbles :-D – Arioch 'The Oct 11 '12 at 15:13

3 Answers3

4

1) do not copy data! work with pointers. You should make list/array of pointers to those data records and sort pointers instead. After sort would be complete - just create new arrays of data based on pointers array. Pointer move is single CPU command. SizeOf(your record) is >> SizeOf(pointer) and is MUCH slower when moving.

2) Mergesort rocks on HUGE data amount, that does not fit into memory. If you have 10 gigabytes of data you can not sort them in 2GB memory allowed for Win32 programs. So you have to sort them while they are on-disk. That is the niche of Mergesort. Why not use ready QuickSort routines instead, if all your data is in-memory ?

So make a TList, fill it with type PCustomRecord = ^TCustomRecord; pointers, implement proper comparison function and call checked quicksort by TList.Sort method.

http://docwiki.embarcadero.com/CodeExamples/XE2/en/TListSort_(Delphi)

After list is sorted - create and populate new array of data. After that new array is created - free the list and remove the older source array.


If possible - check if data fits in memory. Only reside to on-disk search if memory is not enough. It wold be slower, much slower.


I did it in school... Mergesort is not recursive. It is VERY basic loop. I implemented it due to itse simplicity. I still do not have gut fealings for QuickSort, to compare with.

In pseudocode it looks like

FrameSize := 1;
Loop start:
  Phase 1: splitting
     Loop until not empty TempMergedDataFile:
        Read record by record from TempMergedDataFile 
            and write each of them into TempSplitDataFile-1
            up to FrameSize times
        Read record by record from TempMergedDataFile 
            and write each of them into TempSplitDataFile-2
            up to FrameSize times
     Loop end
     Delete TempMergedDataFile 
  Phase 2: sorting-merging
     Loop until not empty TempSplitDataFile-1 and TempSplitDataFile-2:
        Read record by record from both TempSplitDataFile-1 and TempSplitDataFile-2
          up to FrameSize each (2xFrameSize in total in each iteration)
          write them sorted into TempMergedDataFile
     end loop
     delete TempSplitDataFile-1 and TempSplitDataFile-2
  Phase 3: update expectations
     FrameSize := FrameSize * 2
     if FrameSize > actual number of records - then exit loop, sort complete
End loop

Be careful with Phase 2 implementation. comparison with either actual value or nil if frame is exceeded by one of files. Well, the idea is obvious and probably demoed somewhere. Just be pedantic in this part. Probably FSM implementation might be easy good.

Obvious optimizations:

  1. place all files on different physical dedicated HDDs, so each HDD would be in linear reading/writing mode
  2. merge phase 1 and phase 2: make TempMergedDataFile virtual, actually consisting of TempSplitDataFile-3 and TempSplitDataFile-4. Split the data into next-size frames while you are writing into it.
  3. if SSDs or flash cards are used for storage, then the data copying would wore out hardware. Better to sort some kind of "pointers" or "indexes" for actual sorting. There is also small chance, that while full data frames exceed RAM, the mere "array of indexes" would fit in. However with actual HDD without testing i'd better stick with naive "copy and copy and copy once again" approach.
Arioch 'The
  • 15,799
  • 35
  • 62
  • +1 for the suggestion of working with pointers, or alternatively indices into the original array. – David Heffernan Oct 11 '12 at 13:11
  • @Arioch that page is empty, and I am dealing with GB of data. Plus I'm not sure howto use the pointers at all for sorting. You have a step by step example? Maybe I can use pointers in mergesort? thank you. – Merlin W. Oct 11 '12 at 13:13
  • @David - why should one use disc indices ? That would cause heads thrashing. Actual data moving would provide for almost linear read-write process. Especially when multiple HDDs used. However both approaches should be tried. – Arioch 'The Oct 11 '12 at 13:24
  • @Arioch'The This is in memory. No thrashing here. – David Heffernan Oct 11 '12 at 13:26
  • @Arioch'The this page is empty http://docwiki.embarcadero.com/CodeExamples/XE2/en/TListSort_(Delphi) – Merlin W. Oct 11 '12 at 13:27
  • @David oh, i though you mean indices like in database files – Arioch 'The Oct 11 '12 at 13:33
  • @MErlin - fix the URL - look at borwesr address bar. Obviously StackOverflow missed ending parenthesis - recover it. – Arioch 'The Oct 11 '12 at 13:34
  • @Arioch'The No, I meant the indirection technique as per my answer. It's the exact same idea as pointers I think. – David Heffernan Oct 11 '12 at 13:36
  • @David oh, perdon me. May asm background could not believe in heresy of multiplying and adding on each data access :-D... And seriously, if the path is speed-critical, then probably to be avoided. However it is not speed-critical since it used external disks :-) – Arioch 'The Oct 11 '12 at 13:41
  • @Arioch'The Once your data is bigger than the CPU cache, then I guess indicies vs pointers make little difference. But yes it's clear that the pointer has fewer ops than indices. Now, there are no external disks here. OP is using in-memory sorting. – David Heffernan Oct 11 '12 at 13:44
  • Well, then he should just use TList.Sort - it that was not school homework. In latter case asking on SO is crime per se :-) – Arioch 'The Oct 11 '12 at 13:46
  • @David - see that comment below the question "part of that is my data is greater than few gigabytes at times". So he'd better implement both strategies, in-memory and on-disk sortings. But well, the fact that his original code had no explicit "data fetch" palces made me wonder as well. Those are the essence of MergeSort and they were completely missed there. Also there is something that looks like recursion there. I think topicstarter uses some mutation of QuickSort, while only *thinking* he uses MergeSort erroneously. – Arioch 'The Oct 11 '12 at 13:47
  • @Arioch'The Yeah, I know. But at the moment OP is way way from on-disk. First of all he/she needs to get on top of a sane separation of sorting from data, as I say in my answer. Once we can get that cracked, then let's move on to on-disk sorting. – David Heffernan Oct 11 '12 at 13:50
  • Oh! disregard recursion, i was wrong. There is Frankestein monster of two merge algorithms. Well, makes sense performance-wise. But definitely out of topicstarter abilities. @MerlinW - implement the most basic MergeSort you can. Understand how it works. Then start with careful optimizations. Starting with analyzing most fused misty hiper-optimized implementation is beyond your capabilities today. – Arioch 'The Oct 11 '12 at 13:53
  • @Arioch'The oh no argument there and agreed. I am implementing already available code so I did not write this sort, I'm just trying to modify it so I get the results that I think `should occur` but obviously there is a big disconnect from what I think should happen and what is actually happening. Hence the question ;) – Merlin W. Oct 11 '12 at 14:02
  • @Merlin why modify ? that code is essentially in-memory. Implement your own. That way you would understand it, understand sorting. And would have clear separation of sorting, data loading/writing and comparison, from ground up. Implement TList+pointer based searching when your data is less than 50% of physical RAM and less than 1,5 GB (leave some place in 2GB frame for other Delphi code and vars). Implement Merge-sort via disks. Choose the one that needed. – Arioch 'The Oct 11 '12 at 14:12
  • @Arioch'The why is it then a `TStringList` will not work on 2 GB file of strings but `Mergesort` does without problem when I use it on an array of string from that same 2 GB file? I understand the inmemory part but somehow mergesort works and even a custom descendant from TList does not. I will look into mergesort from disk though as it sounds like what I need. Seperating the sort from array manipulation seems like what I need to focus on but truly will need to read up on this as I don't know how to even start with it. – Merlin W. Oct 11 '12 at 15:05
  • Read some implementations, like David pointed or examples of TList.Sort or search RTL/VCL sources how TList sort is used there. You may even dare to get yourself http://en.wikipedia.org/wiki/The_Art_of_Computer_Programming :-) – Arioch 'The Oct 11 '12 at 15:15
  • TStringList is a more feature-reach beast than array and keeps more memory than merely strings. Probably it exhausts it earlier than primitive basic structure like array. – Arioch 'The Oct 11 '12 at 15:16
3

The first comment to make is that your basic design is very weak. You have mixed the sorting code and the compare/exchange code all in together. If ever you need to sort different data, then you'll have to start again. You need to decouple the sorting code from the code that understands the data.

The way to achieve that decoupling is to implement a generic sort routine that knows nothing about the data. Instead all it needs to know is how to compare two elements, and how to exchange two elements. All the common in-memory sorting routines can be implemented efficiently that way.

The other problem you have, I guess, is that your code will spend a lot of time copying the data around. Instead of doing that, use a layer of indirection. What I mean by that is that you should not attempt to modify the original array. Instead create an array of indices into the data array, and sort the array of indices rather than the array of data.

To give you an idea of that, here's how it might look:

var
  Data: array of TData;
  Indices: array of Integer;

function CompareIndices(Index1, Index2: Integer): Integer;
begin
  Result := CompareData(Data[Indices[Index1]], Data[Indices[Index2]]);
end;

procedure SwapIndices(Index1, Index2: Integer);
var
  Temp: Integer;
begin
  Temp := Indices[Index1];
  Indices[Index1] := Indices[Index2];
  Indices[Index2] := Temp;
end;

Then, in order to sort the array you do something like this:

N := Length(Data);
SetLength(Indices, N);
for i := 0 to high(Indices) do 
  Indices[i] := i;
Sort(CompareIndices, SwapIndices, N);

Or, as yet another alternative, instead of an array of indices, use an array of pointers to the elements of the data array.

Now, I've used global variables here for the sake of clarity of exposition. In reality you'd likely want to wrap this up into a class, or at least make the compare and swap functions be methods of objects. That's how I did it in my Delphi 6 code base. The interface looked like this:

type
  TCompareIndicesFunction = function(Index1, Index2: Integer): Integer of object;
  TExchangeIndicesProcedure = procedure(Index1, Index2: Integer) of object;

procedure QuickSort(Compare: TCompareIndicesFunction; 
  Exchange: TExchangeIndicesProcedure; const Count: Integer);

Once you get on top of the concept of separating the sort algo from the data then you'll make some progress. It then becomes trivial to swap out one sorting algo for another. You can compare them easily. You can readily measure whether or not the indirection approach is worthwhile. And so on.

So, my absolute number one piece of advice for you is to throw away the code in the question and separate sorting from data handling, as nature intended.

David Heffernan
  • 601,492
  • 42
  • 1,072
  • 1,490
  • @DavidHefferman thank you :) I like this advice greatly as it makes sense to me. Any good examples, that you know of, that has this done? – Merlin W. Oct 11 '12 at 13:53
  • You could do worse than study the sorting code in TurboPower's systools: http://tpsystools.svn.sourceforge.net/viewvc/tpsystools/trunk/source/StSort.pas?revision=8&view=markup You could also just use the sort method that `TList` provides. Only if you need on-disk sort would you need more than that. – David Heffernan Oct 11 '12 at 13:55
  • Definitely needed to separate. Especially since he sometimes have in-memory sort and sometimes "more than few GBs". Either separation and re-use, or copy-paste spaghetti code, unmaintainable. – Arioch 'The Oct 11 '12 at 13:56
  • David, note please that your CompareData should have const or var parameters, otherwise he'd end up with data copying again :-) – Arioch 'The Oct 11 '12 at 13:58
  • @Arioch'The Indeed. Those const parameters are present in the version of `CompareData` that exists only in my head! ;-) – David Heffernan Oct 11 '12 at 13:59
  • and u noted in your mind again :-) i actually meant note it in the text of answer :-) Who would read those pesky comments in a year? :-D – Arioch 'The Oct 11 '12 at 14:56
  • This answer also helped in my thinking of separating the sorting from the data. I tried to do this as much as I can. If the Mergesort takes generic fields of the Record then this would have been the most optimal but I was not able to do that part. Unfortunately I can pick one answer and in this case Arioch was closer to the solution I ended up using. – Merlin W. Oct 13 '12 at 05:41
0

Not answering the question directly I know, but provided the data will fit in memory - which it seems to as you are using an array.

I would dump all that, create some objects, put them in a TObjectList. Sort Using your own comparisons using TObjectList.Sort(@myComparefunction). You can have multiple sort routines declared. During the Sort function you can set other object properties at will. It's pretty quick and will save a lot of the pain you seem to be suffering :)

Despatcher
  • 1,745
  • 12
  • 18