0

I'm implementing a max binary heap in C++ and it seems to work fine.

However, when I'm repeating poll operation (delMax method in my code) until the heap if empty it takes way too long, it's longer than building the heap itself, in case of 10^7 elements it's around 6 seconds while I was told it should take almost no time.

Moreover, between printing heap summary and printing removing time I'm getting "Childless element!" printed twice (from leftChild and rightChild methods) but I have no idea where that comes from as those methods have not been called there.

My code:

  #include "pch.h"
  #include <iostream>
  #include "time.h"
  #include <random>
  #include <string>
  #include <functional>
  #include <vector>
 using namespace std;
 template <typename T>
 class BinaryHeap;

template <typename T>
class BinaryHeap
{
vector <T> heap;
int getParent(int childInd)
{
    if (heap.size() == 0)
    {
        return -1;
    }
    //int parent = 0;
    int parent = floor((childInd - 1) / 2);
    if (heap.size() <= childInd)
    {
        cout << "No such element!" << endl;
        return -1;
    }
    else
        return parent;
}
int leftChild(int parentInd)
{
    int left = floor(2 * parentInd + 1);
    if (heap.size() <= parentInd)
    {
        cout << "Childless element!" << endl;
        return -1;
    }
    else
        return left;
}
int rightChild(int parentInd)
{
    int right = (2 * parentInd + 2);
    if (heap.size() <= parentInd)
    {
        cout << "Childless element!" << endl;
        return -1;
    }
    else
        return right;
}
void heapifydown(int index)
{
    int left = leftChild(index);
    int right = rightChild(index);
    int largest = index;
    if (heap.size() > left&&heap[left] > heap[index])
    {
        largest = left;
    }
    if (heap.size() > right&& heap[right] > heap[largest])
    {
        largest = right;
    }
    if (largest != index)
    {
        int temp = heap[index];
        heap[index] = heap[largest];
        heap[largest] = temp;
        heapifydown(largest);
    }
}
void heapifyup(int index)
{
    int p = getParent(index);
    if (heap.size() > index&& heap[p] < heap[index])
    {
        int temp = heap[index];
        heap[index] = heap[p];
        heap[p] = temp;
        heapifyup(p);
    }
}

public:
BinaryHeap() {};
~BinaryHeap()
{
    heap.clear();
}
void addElement(T el)
{
    heap.push_back(el);
    int index = heap.size() - 1;
    heapifyup(index);
}
void printHeap()
{
    if (heap.size() == 0)
    {
        cout << "Empty heap!" << endl;
        return;
    }
    for (int i = 0; i < heap.size(); i++)
    {

        cout << heap[i] << endl;
    }
}
T delMax()
{
    if (heap.size() == 0)
    {
        return -1;
    }
    T temp = heap[0];
    T temp2 = heap[0];
    heap[0] = heap.at(heap.size()-1);
    heap.pop_back();
    //T parent = 0;
    //T child = 2 * parent + 2;
    heapifydown(0);
    return temp;
    //delete &temp;
}
void clearHeap()
{
    for (int i = heap.size(); i > 0; --i)
    {
        heap.pop_back();
    }
}
void print()
{
    if (heap.size() == 0)
    {
        cout << "Empty heap!" << endl;
        return;
    }
    cout << "Size: " << heap.size()<<endl;
    cout << " Root: " << heap[0] << endl;
    cout << "L. child: " << heap[1] << endl;
    cout << "R. Child: " << heap[2] << endl;
    cout << "3 last elements: " << heap[heap.size()-3] <<
    " "<<heap[heap.size() - 2]<<
    " "<<heap[heap.size() - 1]<<endl;
}
};
int randomInt()
{
static default_random_engine generator{ 10 };
static uniform_int_distribution<int> rozklad{ 0, 11000000 };
static function<int()> los{ bind(rozklad, generator) };
int l = los();
return l;
}
int main()
{
BinaryHeap<int>*bh = new BinaryHeap<int>();
const int MAX_ORDER = 7; // maksymalny rzad wielkosci dodawanych danych
for (int i = 1; i <= MAX_ORDER; i++)
{
    const int n = pow(10, i);
    clock_t start1 = clock();
    for (int i = 0; i < n; i++)
    {
        int el = randomInt();
        bh->addElement(el);
    }
    clock_t stop1 = clock();
    double adding_time = (stop1 - start1) / (double)CLOCKS_PER_SEC;
    cout << "Adding time: " << adding_time << "s" << endl;
    bh->print();
    clock_t start2 = clock();
    for (int j = 0; j < n; j++)
    {
        int out = bh->delMax();
        //cout << "WyciagniEty: "<<bh->delMax()<<endl;
        //delete &out;
    }
    clock_t stop2 = clock();
    double polling_time = (stop2 - start2) / (double)CLOCKS_PER_SEC;
    cout << "Removing time: " << polling_time << "s" << endl;
    bh->print();
    bh->clearHeap();
}
delete bh;
return 0;
}
greenbanzai
  • 79
  • 2
  • 7
  • Make sure to build your code with optimizations *enabled* before benchmarking performance. – Jesper Juhl Nov 27 '19 at 17:24
  • @JesperJuhl What does that mean? I'm not an experienced programmer, it's a task for school and it's required that polling until the heap is empty takes no time. – greenbanzai Nov 27 '19 at 17:26
  • @Jurek To enable optimization in Visual Studio? Go to the menu, "Build", "Configuration Manager", and set the "Active solution configuration" to "Release". If you're not using Visual Studio then google it I suppose. – user253751 Nov 27 '19 at 18:02
  • @Jurek it means that you should make sure to tell your compiler to optimize the code when building it. How to do that varies from compiler to compiler. For GCC and clang, add the `-O2` or `-O3` option, for Visual Studio ask for a "release" build. Most/all compilers produce unoptimized binaries by default, and those can be terribly slow (but can provide a better debugging experience). – Jesper Juhl Nov 27 '19 at 18:09
  • Oh, that's what you mean, I didn't mention that but I am running my program on 'release' build. – greenbanzai Nov 27 '19 at 18:32
  • In general, it will take longer to remove 10^7 elements from the heap than it will to add 10^7 elements. How long does adding take? For your other problem, the "Childless element" message, you could simply add a breakpoint to that line in the debugger, and then examine the stack trace when the breakpoint is hit. Don't know how to use the debugger? Now is the perfect time to learn. – Jim Mischel Nov 27 '19 at 20:37
  • Also, your `leftChild` function uses `floor(2*parentInd+1)`. No need for `floor` there. And in your `getParent` function, there's no need for `floor`. Integer division automatically truncates. – Jim Mischel Nov 27 '19 at 20:43
  • Note also that running a "release" build from within the Visual Studio IDE (typically by pressing F5) can be slow because the debugger is attached. You have to "run without debugging." – Jim Mischel Nov 27 '19 at 20:47
  • I did a little research and I learned that deleting elements is usually a little longer. After some changes in the code I managed to removing time to around 4,8 seconds from 6,5. That is true I'm not good at using the debugger and I will put some effort to learn it. Thank you all for valuable tips! – greenbanzai Nov 27 '19 at 20:51

1 Answers1

1

The "Childless element" message can occur in the leftChild and rightChild functions when the parent index is less than or equal to the heap size. From examining your code, I'd say it happens when you try to delete the last node in the heap. Consider your delMax function (I've removed the dead and commented-out code):

T delMax()
{
    if (heap.size() == 0)
    {
        return -1;
    }
    T temp = heap[0];
    heap[0] = heap.at(heap.size()-1);
    heap.pop_back();
    heapifydown(0);
    return temp;
}

When the heap size is 1, then the call to heap.pop_back() removes the last element and the size is now 0. You then call heapifydown(0).

heapifydown calls leftChild and rightChild, and both of them contain this code:

if (heap.size() <= parentInd)
{
    cout << "Childless element!" << endl;
    return -1;
}
else

heap.size() returns 0, because the heap is empty. And parentInd is 0 because that's the value of the parameter you passed. Thus, "Childless element" is printed.

I'm kind of surprised that the code even works, because both leftChild and rightChild are returning -1, and heapifyDown doesn't check the return value. As a result, the code to compute the largest child will report that heap.size() > left, and it will compare heap[-1] with heap[0], and could potentially corrupt your program's memory heap. It depends on what's in memory just before the start of your array.

My recommendation would be to modify your leftChild function to, simply:

return (parentInd * 2) + 1;

And make a similar modification to rightChild.

In your heapifyDown function, return if the left child is beyond the heap's bounds:

void heapifydown(int index) {
    int left;
    int right;
    int largest = index;

    left = leftChild(index);
    if (heap.size() <= left) {
        // left child index is outside range of the heap.
        // node has no children, so return.
        return;
    }

    if (heap[left] > heap[index]) {
        largest = left;
    }

    right = rightChild(index);
    if (heap.size() > right && heap[right] > heap[largest])
    {
        largest = right;
    }
    if (largest != index)
    {
        int temp = heap[index];
        heap[index] = heap[largest];
        heap[largest] = temp;
        heapifydown(largest);
    }
}
Jim Mischel
  • 131,090
  • 20
  • 188
  • 351