Why does mergesort perform better than quicksort as input size grows?

Question

I'm writing some data structures in C and I thought I would benchmark mergesort vs quicksort. The following code is part of a larger code base so it's missing some functions but it is self contained should it should compile and run.

#include <time.h>
#include <stdio.h>
#include <stdlib.h>

double execution_time;
clock_t start, end;

typedef struct {
    double qs_time;
    double ms_time;
} Tuple;

typedef struct vector {
    int* vec;
    int len;
    int cap;
} Vector;

Vector* ds_new_vector() {
    Vector* new_vec = malloc(sizeof(Vector));
    new_vec->vec =  malloc(1024 * sizeof(int));
    new_vec->len = 0;
    new_vec->cap = 1024;
    return new_vec;
}

static void double_vec_cap(Vector* vec) {
    int* new_ptr = realloc(vec->vec, (sizeof(int) * (u_int64_t) vec->cap * 2));
    if (new_ptr == NULL) {
        printf("Error: realloc failed in vector_double_vec_cap\n");
    }
    else {
        vec->vec = new_ptr;
        vec->cap *= 2;
    }
    
    return;
}

void vector_push(Vector* vec, int x) {
    if (vec == NULL) {
        vec = ds_new_vector();
    } else if (vec->cap == vec->len) {
        double_vec_cap(vec);
    }
    vec->vec[vec->len] = x;
    vec->len++;
    return;
}

void vector_print(Vector* vec) {
    printf("[");
    for (int i = 0; i < vec->len - 1; i++) {
        printf("%d, ", vec->vec[i]);
    }
    printf("%d]\n", vec->vec[vec->len - 1]);
    return;
}

void vector_print_partial(Vector* vec) {
    if (vec->len <= 10) {
        vector_print(vec);
        return;
    }
    printf("[");
    for (int i = 0; i < 5; i++) {
        printf("%d, ", vec->vec[i]);
    }
    printf("... , ");
    for (int i = vec->len - 5; i < vec->len - 1; i++) {
        printf("%d, ", vec->vec[i]);
    }
    printf("%d]\n", vec->vec[vec->len - 1]);
    return;
}

void vector_destroy(Vector* vec) {
    free(vec->vec);
    vec->vec = NULL;
    free(vec);
    vec = NULL;
    return;
}

static int* merge(int* left_arr, int left_arr_len, int* right_arr, int right_arr_len) {
    int* result = malloc(sizeof(int) * (u_int64_t) (left_arr_len + right_arr_len));
    int i = 0; int l = 0; int r = 0;

    while (l < left_arr_len && r < right_arr_len) {
        if (left_arr[l] <= right_arr[r]) {
            result[i] = left_arr[l];
            i++; l++;
        } else {
            result[i] = right_arr[r];
            i++; r++;
        }
    }
    while (l < left_arr_len) {
        result[i] = left_arr[l];
        i++; l++;
    }
    while (r < right_arr_len) {
        result[i] = right_arr[r];
        i++; r++;
    }  

    free(left_arr);
    left_arr = NULL;
    free(right_arr);
    right_arr = NULL;
    return result; 
}

static int* ds_mergesort(int* arr, int length) {
    if (length <= 1) return arr;
    int mid = length / 2;
    
    int* left_arr = malloc(sizeof(int) * (u_int64_t) mid);
    int* right_arr = malloc(sizeof(int) * (u_int64_t) (length - mid));
    int j = 0;
    for (int i = 0; i < length; i++) {
        if (i < mid) {
            left_arr[i] = arr[i];
        } else {
            right_arr[j] = arr[i];
            j++;
        }
    }
    free(arr);
    arr = NULL;
    left_arr = ds_mergesort(left_arr, mid);
    right_arr = ds_mergesort(right_arr, length - mid);
    
    return merge(left_arr, mid, right_arr, (length - mid));
}

void sort_vector_mergesort(Vector* vec) {
    vec->vec = ds_mergesort(vec->vec, vec->len);
    return;
}

static void quicksort(int arr[], int left, int right) {
    if (right < left) return;
    int pivot = arr[right];
    int i = left - 1;
    for (int j = left; j < right; j++) {
        if (arr[j] < pivot) {
            i++;
            int temp = arr[j];
            arr[j] = arr[i];
            arr[i] = temp;
        }
    }
    i++;
    int temp = arr[i];
    arr[i] = arr[right];
    arr[right] = temp;
    quicksort(arr, left, i - 1);
    quicksort(arr, i + 1, right);
}

void sort_vector_quicksort(Vector* vec) {
    quicksort(vec->vec, 0, vec->len - 1);
    return;
}

static double test_mergesort(Vector* vec) {
    start = clock();
    sort_vector_mergesort(vec);
    end = clock();
    execution_time = ((double)(end - start))/CLOCKS_PER_SEC;
    return execution_time;
}

static double test_quicksort(Vector* vec) {
    start = clock();
    sort_vector_quicksort(vec);
    end = clock();
    execution_time = ((double)(end - start))/CLOCKS_PER_SEC;
    return execution_time;
}

static void test_exponential_sort(Tuple* t, int size) {
    Vector* vec1 = ds_new_vector();
    Vector* vec2 = ds_new_vector();
    srand((u_int32_t) time(NULL));
    int num;
    for (int i = 0; i < size; i++) {
        num = rand() % 1000;
        vector_push(vec1, num);
        vector_push(vec2, num);
    }
    t->ms_time = test_mergesort(vec1);
    t->qs_time = test_quicksort(vec2);
    vector_destroy(vec1);
    vector_destroy(vec2);
}

int main () {
    Tuple* t = malloc(sizeof(Tuple));
    printf("\nSorting Exponetially larger vectors\n\n");
    for (int i = 1024; i < 10000000; i = i * 2) {
        test_exponential_sort(t, i);
        printf("Vector size: %d\n", i);
        printf("Mergesort Time: %fs  Quicksort Time: %fs\n", t->ms_time, t->qs_time);
        if (t->qs_time > t->ms_time) {
            printf("Mergesort was faster than Quicksort by: %lfs\n", t->qs_time - t->ms_time);
        } else {
            printf("Quicksort was faster than Mergesort by: %lfs\n", t->ms_time - t->qs_time);
        }
        printf("----------------------------------------------------\n\n");
    }
    free(t);
}

I would think that because quicksort does its sorting in place, that it would always perform faster than mergesort since the latter would have to deal with memory allocation in addition to the sorting, and that is the case until the "vector" size gets to be about 500,000. Here are the results I got.

Sorting Exponetially larger vectors

Vector size: 1024
Mergesort Time: 0.000318s  Quicksort Time: 0.000062s
Quicksort was faster than Mergesort by: 0.000256s
----------------------------------------------------

Vector size: 2048
Mergesort Time: 0.000638s  Quicksort Time: 0.000127s
Quicksort was faster than Mergesort by: 0.000511s
----------------------------------------------------

Vector size: 4096
Mergesort Time: 0.001377s  Quicksort Time: 0.000265s
Quicksort was faster than Mergesort by: 0.001112s
----------------------------------------------------

Vector size: 8192
Mergesort Time: 0.003064s  Quicksort Time: 0.000539s
Quicksort was faster than Mergesort by: 0.002525s
----------------------------------------------------

Vector size: 16384
Mergesort Time: 0.005424s  Quicksort Time: 0.001347s
Quicksort was faster than Mergesort by: 0.004077s
----------------------------------------------------

Vector size: 32768
Mergesort Time: 0.010996s  Quicksort Time: 0.002865s
Quicksort was faster than Mergesort by: 0.008131s
----------------------------------------------------

Vector size: 65536
Mergesort Time: 0.022966s  Quicksort Time: 0.007522s
Quicksort was faster than Mergesort by: 0.015444s
----------------------------------------------------

Vector size: 131072
Mergesort Time: 0.045921s  Quicksort Time: 0.021228s
Quicksort was faster than Mergesort by: 0.024693s
----------------------------------------------------

Vector size: 262144
Mergesort Time: 0.098435s  Quicksort Time: 0.067185s
Quicksort was faster than Mergesort by: 0.031250s
----------------------------------------------------

Vector size: 524288
Mergesort Time: 0.186068s  Quicksort Time: 0.230357s
Mergesort was faster than Quicksort by: 0.044289s
----------------------------------------------------

Vector size: 1048576
Mergesort Time: 0.377109s  Quicksort Time: 0.853521s
Mergesort was faster than Quicksort by: 0.476412s
----------------------------------------------------

Vector size: 2097152
Mergesort Time: 0.765805s  Quicksort Time: 3.259530s
Mergesort was faster than Quicksort by: 2.493725s
----------------------------------------------------

Vector size: 4194304
Mergesort Time: 1.534298s  Quicksort Time: 12.558161s
Mergesort was faster than Quicksort by: 11.023863s
----------------------------------------------------

Vector size: 8388608
Mergesort Time: 3.118347s  Quicksort Time: 48.325201s
Mergesort was faster than Quicksort by: 45.206854s
----------------------------------------------------

Quicksort ended up taking much longer to sort an array of size 8,388,608 in place vs mergesort. Is there something to do with memory caching that I'm not aware of? Any thoughts on this or how I've implemented these functions would be appreciated. I did try using different pivots for quicksort, picking an index at random, medium of three and just choosing the last index. Picking the last index seemed to be the most effective presumably because the numbers in the array are all random.

What happens if the initial size is large enough to hold all the data, without reallocation? — Old Dog Programmer, Apr 07 '23 at 23:13
Where do the values to be sorted come from? Are they randomly generated? — Old Dog Programmer, Apr 07 '23 at 23:13
@OldDogProgrammer If the initial size is large enough doesn't seem to make a difference. Marginally worse. ```Mergesort Time: 3.022910s Quicksort Time: 49.164170s``` — estevao, Apr 07 '23 at 23:21
@pmacfarlane Yes, I have verified that both algorithms do in fact sort correctly and yes the the numbers are being randomly generated using ```srand((u_int32_t) time(NULL));``` then ```vector_push(vec1, rand() % 1000);``` — estevao, Apr 07 '23 at 23:24
Then can you edit your question to contain a complete program we can compile and run? — pmacfarlane, Apr 07 '23 at 23:26
Does not look like a regular quicksort to me. I don't have the energy to look at your mergesort but I doubt it is better. — pmacfarlane, Apr 07 '23 at 23:49
@pmacfarlane I edited the question so it has the necessary code that you can run on your machine. — estevao, Apr 08 '23 at 01:43
mergesort is _always_ exactly O(n log2(n)) complexity, regardless of the data. quicksort can be [slightly] better than that, but [IIRC] it can be much worse: O(n^2), depending upon the data and the pivot point chosen. See: https://www.baeldung.com/cs/quicksort-time-complexity-worst-case — Craig Estey, Apr 08 '23 at 02:05
There must be something subtle wrong with your quicksort. I'm suspicious of the pivot selection. If I substitute a different one [here](https://gist.github.com/gene-ressler/4e6c61cfe59ee6183f0fd3b76a9540bc) with some standard bad performance mitigations, it wins every time. On the final example it's almost 8x faster. — Gene, Apr 08 '23 at 05:10
@Gene I posted an answer bellow with a better pivot selection and indeed it runs much faster. In the final example it finished in less than 1 second — estevao, Apr 08 '23 at 05:40
You should also eliminate nuance of content if you're algorithm-comparing. Ex: your generator loop should be building *identical* vectors by picking *one* random value on each iteration and pushing it into *both* vectors, not two random values with one push each. — WhozCraig, Apr 08 '23 at 16:22
@WhozCraig That's a good point. I edited the code to reflect that. — estevao, Apr 08 '23 at 16:53
Your `mergesort` can also significantly improved by simply preallocating the O(n) space, and passing it down along for the ride with `arr`. [Example here](https://pastebin.com/ANrHSB69). And, of course, compiole at `-O2` or better release-level optimizations. — WhozCraig, Apr 08 '23 at 18:17
@estevao Yeah for the last, mine finished in 0.54 seconds on my 2013 MacBook. — Gene, Apr 11 '23 at 01:27
@WhozCraig Yes! A lot. With the qsort link above and mergsort (sort2) [here](https://gist.github.com/gene-ressler/21c8a28979f5445a33d0db5277b6d28b), we get on my 2013 MacBook: `Vector size: 8388608 Mergesort Time: 0.885303s Quicksort Time: 0.512540s` — Gene, Apr 11 '23 at 01:39

score 0 · Answer 1 · answered Apr 08 '23 at 04:59

As @CraigEstey pointed out choosing a bad pivot point can cause quicksort to perform in O(n^2) in its worst case. This new updated quicksort uses the middle element as the pivot and it performs better than mergesort in all cases

static void quicksort(int arr[], int left, int right) {
    int mid = (left + right) / 2;
    int pivot = arr[mid];
    int i = left;
    int j = right;

    while (i <= j) {
        while (arr[i] < pivot) i++;
        while (arr[j] > pivot) j--;
        if (i <= j) {
            int temp = arr[i];
            arr[i] = arr[j];
            arr[j] = temp;
            i++;
            j--;
        }
    }

    if (left < j)
        quicksort(arr, left, j);
    if (i < right)
        quicksort(arr, i, right);
}

Here's the result for an array of size 8,388,608:

Vector size: 8388608
Mergesort Time: 2.964791s  Quicksort Time: 0.621795s
Quicksort was faster than Mergesort by: 2.342996s
----------------------------------------------------

Quicksort went from taking about 48s to less than 1s
I found this implementation here:
https://www.algolist.net/Algorithms/Sorting/Quicksort

A 4 way merge sort using nested if|else instead of a minheap is about as fast as quicksort. The third example code in [this answer](https://stackoverflow.com/a/34845789/3282056) is a 4 way merge sort. — rcgldr, Apr 08 '23 at 09:09

Why does mergesort perform better than quicksort as input size grows?

1 Answers1