I'm working on implementing a few different sorting methods and for some reason my merge sort algorithm will not work on large data sets. The sort will work for 115,000 words but stops working when it reaches 135,000 words. Once I get this high I end up getting a segmentation fault. I do not understand where the seg fault is coming from. The sort works successfully for text files containing 5K to 125K strings.
The readFile
array gets initialized with the number of words in the text file. When debugging it seems like the last numbers that get passed into the mergeSort()
function are the following:
#0 0x0000000000402a87 in merge (inputString=0x7fffffbde790, from=0, mid=67499, to=134999) at mergeSort.cpp:102
n1 = 67500
n2 = 67500
i = 0
j = 0
k = 32767
L = <error reading variable L (value requires 2160000 bytes, which is more than max-value-size)>
R = <error reading variable R (value requires 2160000 bytes, which is more than max-value-size)>
#1 0x0000000000402921 in mergeSort (inputString=0x7fffffbde790, from=0, to=134999) at mergeSort.cpp:88
mid = 67499
void mergeSort(string readFile[], int from, int to) {
if (from < to) {
int mid = from + (to - from) / 2;
mergeSort(readFile, from, mid);
mergeSort(readFile, mid + 1, to);
merge(readFile, from, mid, to);
}
}
void merge(string readFile[], int from, int mid, int to) {
int n1 = mid - from + 1;
int n2 = to - mid;
string L[n1];
string R[n2];
for (int i = 0; i < n1; i++) {
L[i] = readFile[from + i];
}
for (int i = 0; i < n2; i++) {
R[i] = readFile[mid + i + 1];
}
int i = 0;
int j = 0;
int k = from;
while (i < n1 && j < n2) {
if (L[i] <= R[j]) {
readFile[k] = L[i];
i++;
} else {
readFile[k] = R[j];
j++;
}
k++;
}
while (i < n1) {
readFile[k] = L[i];
i++;
k++;
}
while (j < n2) {
readFile[k] = R[j];
j++;
k++;
}
}