-2

I'm trying to implement a divide and conquer closest points algorithm. As standard as it gets, yet my head is about to explode, because my code seems to (randomly) give incorrect answers. I wrote a random number generator using stl for testing purposes and the error I keep coming across is that every few runs the algorithm returns a pair that is clearly farther apart than the closest pair (separated by 1 unit of distance, which I've input manually).

Please forgive the global variables, but this is the 3rd time I've rewritten this and I just felt it was slightly easier to work with. Pastebin link for those who like to see more on their screens: http://pastebin.com/93dtj81z

[EDIT] The incorrect values seem to be coming from the BruteCP function... I think... This is giving me a major headache...

#include <vector>
#include <algorithm>
#include <cmath>
#include <iostream>
#include <ctime>
#include <random>

using namespace std;
using point = pair<int, int>;
double MAX = 1000000000.0;
double GLOBAL_minDist = MAX;
pair<point, point> GLOBAL_nearestPoints;

bool Xcmp ( const point &c1, const point &c2 ) {
  return ( c1.first < c2.first );
}

bool Ycmp ( const point &c1, const point &c2 ) {
  return ( c1.second < c2.second );
}

inline ostream& operator<< ( ostream& os, const point& p ) {
  return os << p.first << " " << p.second << "\n";
}

inline ostream& operator<< ( ostream& os, const vector<point> &points ) {
  for( auto itr = points.begin(); itr < points.end(); itr++ ) {
    os << *itr;
  }
  return os;
}

inline ostream& operator<< ( ostream& os, const pair<point, point> nearestPair ) {
  return os << static_cast<int> (nearestPair.second.first) << " " << static_cast<int> (nearestPair.second.second) << "\n"
            << static_cast<int> (nearestPair.first.first) << " " << static_cast<int> (nearestPair.first.second);
}

inline double distance( const point a, const point b ) {
  return sqrt( pow(( a.first - b.first ), 2 ) + pow( a.second - b.second, 2 ));
}

void bruteCP( const vector<point> &Xs ) {
  for( auto it = Xs.begin(); it < Xs.end() - 1; it++ ) {
    for( auto it2 = it + 1; it2 < Xs.end(); it2++ ) {
      double minDist = distance( *it, *it2 );
      if( minDist < GLOBAL_minDist ) {
        cout << minDist << "\n";
        GLOBAL_minDist = minDist;
        GLOBAL_nearestPoints = pair<point, point> ( *it, *it2 );
      }
    }
  }
}

void divConCP( const vector<point>& Xs, const vector<point>& Ys ) {
  int Xsize = Xs.size();
  if( Xsize <= 3 ) { bruteCP( Xs ); return; }

  int mid =  Xsize / 2;
  int median = Xs[mid].first;

  vector<point> leftYs;
  copy_if( Ys.begin(), Ys.end(), back_inserter(leftYs), [median](const point& point) 
          {return point.first <= median;} );
  vector<point>leftXs;
  leftXs.insert( leftXs.end(), Xs.begin(), Xs.begin() + mid );
  divConCP( leftXs, leftYs );

  vector<point> rightYs, rightXs;
  copy_if( Ys.begin(), Ys.end(), back_inserter(leftYs), [median](const point& point) 
          {return point.first > median;} );
  rightXs.insert( rightXs.end(), Xs.begin() + mid, Xs.end() );
  divConCP( rightXs, rightYs );

  vector<point> strip;
  copy_if( Ys.begin(), Ys.end(), back_inserter(strip), [median, GLOBAL_minDist](const point& point) 
          {return abs(point.first - median) < GLOBAL_minDist;} );

  //vector<point> uniqStrip;
  //unique_copy( strip.begin(), strip.end(), uniqStrip.begin() );

  for( auto itr = strip.begin(); itr < strip.end(); itr++ ) {
    for( auto itr2 = itr + 1; itr2 < strip.end() && (*itr2).second < GLOBAL_minDist; itr2++ ) {
      double minDist = distance( *itr, *itr2 );
      if( minDist < GLOBAL_minDist) {
        //cout << minDist << "\n";
        //cout << *itr << " " << *itr2 << "\n";
        GLOBAL_minDist = minDist;
        GLOBAL_nearestPoints = pair<point, point> ( *itr, *itr2 );
      }
    }
  }
}

int main() {
  int n, x, y;
  vector<point> Xs, Ys;
/*
  cin >> n;
  for( int i = 0; i < n; i++ ) {
    cin >> x;
    cin >> y;
   // x = -i;
   // y = -i;

    point xy( x, y );
    Xs.push_back( xy );
    Ys.push_back( xy );
  }
*/
    // DEBUG //

  n = 100000;
  srand(time(0));
  std::default_random_engine gen(time(0));
  std::uniform_int_distribution<int> dis(-20000000, 20000000);
  for( int i = 0; i < n - 2; i++ ) {
    x = dis( gen );
    y = dis( gen );
    //x = i;
    //y = i;
    point xy( x, y );
    Xs.push_back( xy );
    Ys.push_back( xy );
  }

    Xs.push_back( point( 20001, 20001 ));
    Ys.push_back( point( 20001, 20001 ));
    Xs.push_back( point( 20000, 20001 ));
    Ys.push_back( point( 20000, 20001 ));

  // DEBUG //

  sort( Xs.begin(), Xs.end(), Xcmp );
  sort( Ys.begin(), Ys.end(), Ycmp );

  divConCP( Xs, Ys );
  //bruteCP( Xs );
  cout << GLOBAL_minDist << "\n";
  cout << GLOBAL_nearestPoints << "\n";

}
A.F.K.
  • 69
  • 7
  • 2
    Suggestion: In order to make errors reproducible, you could output the value actually used as PRNG seed. Accepting a certain seed as optional argument to the program then helps you once you know a faulty seed. – Ulrich Eckhardt Apr 20 '15 at 17:59
  • Thanks! Using 1 as the seed produces a bad result :) – A.F.K. Apr 20 '15 at 18:03
  • Why does `divConCP` take **two** vectors? There's only one set of points that you're investigating. Why doesn't `divConCP` return a number? Your implementation does not resemble the recursive closest points algorithm that I'm familiar with... – Barry Apr 20 '15 at 18:06
  • The trick to getting O(n*log(n)) complexity is to keep 2 presorted vectors - one by x values, the other by y values. When the time comes to find the closes pair in a strip around a median I don't have to sort the (x-sorted) values by y coordinate again to check the 7 nearest neighbors in the ordering. all I'm doing is "chopping" up an already presorted vector. I'm not sure if I've made it very clear... – A.F.K. Apr 20 '15 at 18:11
  • @A.F.K. You don't sort *all* the points by Y-values... just *some* of the points *after* the recursive step gives you the minimum distances on the two halves. Which I don't see how you can compute since `divConCP` is `void`. – Barry Apr 20 '15 at 18:16
  • I am a beginner, but I didn't realize my code was _that_ unreadable. in main I sort both vectors before passing them to the function. The function is void because it modifies 2 global variables - minimum encountered distance, and the pair separated by that distance. Sorting both vectors before calling divConCP costs c*n*log(n). In the recursion I filter the y sorted vector in such a way that only points to the left (or right) of my midpoint are left and because the vector was sorted by y coordinate it doesn't need to be sorted again. – A.F.K. Apr 20 '15 at 18:27
  • Once the program gets to the loop at the bottom of divConCP all it has to do is to check each point in the filtered y vector against 7 next ones in that vector, exactly the way it usually does. – A.F.K. Apr 20 '15 at 18:28
  • if you think its n^2 run it and compare it to BruteCP running on the same data. 100 000 points get crunched in about 0.05 seconds by divConCP and about 20 seconds by bruteCP. – A.F.K. Apr 20 '15 at 18:30
  • @Barry I think I might have misunderstood you. I guess I could forget about the y sorted vector and just keep passing the x sorted vector all the way to the bottom of the recursion and, once there, sort the inplace_sort and then inplace_merge appropriate ranges... Correct? That is a nice observation, but I would still like to know just why my current code doesn't work. – A.F.K. Apr 20 '15 at 18:37

1 Answers1

0

I've fixed the problem by rewriting the program. This time I'm not creating a vector sorted by y coordinates, chopping it up and passing relevant pieces down into recursive calls. Instead, I start with an empty vector that gets passed all the way down to the base case of the recursion and there it gets filled, sorted and passed up where it gets merged with other vectors that have also been filled this way. Then, relevant points are selected from this vector to make the strip that we need to test for closest points that lie across a given median.

One last question, though - I kept getting segmentation faults in std::merge that I couldn't understand, so I wrote my own merging function which worked fine... Any ideas as to where these errors are coming from? To reproduce them all you have to do is replace:

Ys = mergee(leftYs, rightYs);

//merge(leftYs.begin(), leftYs.end(), rightYs.begin(), rightYs.end(), Ys.begin());

with:

//Ys = mergee(leftYs, rightYs);

merge(leftYs.begin(), leftYs.end(), rightYs.begin(), rightYs.end(), Ys.begin());

The code, with a random data generator for testing: http://pastebin.com/yQCNh2J9

I'd appreciate it if the people who have down-voted this at least left a comment explaining why.

#include <vector>
#include <algorithm>
#include <cmath>
#include <iostream>
#include <ctime>
#include <random>

using namespace std;
using point = pair<int, int>;

bool Xcmp ( const point &c1, const point &c2 ) {
  return ( c1.first < c2.first );
}

bool Ycmp ( const point &c1, const point &c2 ) {
  return ( c1.second < c2.second );
}

inline ostream& operator<< ( ostream& os, const point& p ) {
  return os << p.first << " " << p.second << "\n";
}

inline ostream& operator<< ( ostream& os, const vector<point> &points ) {
  for( auto itr = points.begin(); itr < points.end(); itr++ ) {
    os << *itr;
  }
  return os;
}

inline ostream& operator<< ( ostream& os, const pair<point, point> nearestPair ) {
  return os << nearestPair.second.first << " " << nearestPair.second.second << "\n"
            << nearestPair.first.first << " " <<  nearestPair.first.second;
}

inline double distance( const point a, const point b ) {
  return sqrt( pow(( a.first - b.first ), 2 ) + pow( a.second - b.second, 2 ));
}

pair<pair<point, point>, double> bruteCP( const vector<point> &Xs, const int l, const int r ) {
  pair<point, point> nearestPair;
  double tempDist = 1000000000.0;
  for( int i = l; i < r; i++ ) {
    for( int j = i + 1; j <= r; j++ ) {
      double minDist = distance( Xs[i], Xs[j] );
      if( minDist < tempDist ) {
        tempDist = minDist;
        nearestPair = pair<point, point> ( Xs[i], Xs[j] );
      }
    }
  }
  return pair<pair<point, point>, double> (nearestPair, tempDist);
}

vector<point> makeStrip( const vector<point> Ys, int median, double minDist ) {
  vector<point> strip;
  for( auto it = Ys.begin(); it < Ys.end(); it++ )
    if( abs((*it).first - median) < minDist ) {
      strip.push_back( *it );
    }
    return strip;
}

vector<point> mergee(const vector<point> left, const vector<point> right) {
  vector<point> result;

  auto leftIterator = left.begin();
  auto rightIterator = right.begin();

  while(leftIterator != left.end() && rightIterator != right.end()) {
    if((*leftIterator).second < (*rightIterator).second) {
        result.push_back(*leftIterator);
        ++leftIterator;
    }
    else {
        result.push_back(*rightIterator);
        ++rightIterator;
    }
  }
  while(leftIterator != left.end()) {
    result.push_back(*leftIterator);
    ++leftIterator;
  }
  while(rightIterator != right.end()) {
    result.push_back(*rightIterator);
    ++rightIterator;
  }
  return result;
}

pair<pair<point, point>, double>  divConCP( const vector<point> &Xs, vector<point> &Ys, const int l, const int r ) {
  int Xsize = r - l + 1;
  if( Xsize <= 3 ) {
    for( int i = l; i <= r; i++ ) {
      Ys.push_back( Xs[i] );
    }
    sort( Ys.begin(), Ys.end(), Ycmp );
    return bruteCP( Xs, l, r );
  }

  int mid =  l - 1 + (r - l + 1) / 2;
  int median = Xs[mid].first;

  vector<point> leftYs;
  vector<point> rightYs;

  auto leftClosest = divConCP( Xs, leftYs, l, mid );
  auto rightClosest = divConCP( Xs, rightYs, mid + 1, r );

  auto lrClosest = leftClosest.second < rightClosest.second ? leftClosest : rightClosest;

  Ys = mergee(leftYs, rightYs);

  //merge(leftYs.begin(), leftYs.end(), rightYs.begin(), rightYs.end(), Ys.begin());

  auto strip = makeStrip( Ys, median, lrClosest.second );

  for( auto itr = strip.begin(); itr < strip.end(); itr++ ) {
    for( auto itr2 = itr + 1; itr2 < strip.end() && itr2 < itr + 8; itr2++ ) {
      double tempDist = distance( *itr, *itr2 );
      if( tempDist < lrClosest.second ) {
        //cout << minDist << "\n";
        //cout << *itr << " " << *itr2 << "\n";
        lrClosest.second = tempDist;
        lrClosest.first = pair<point, point> ( *itr, *itr2 );
      }
    }
  }
  return lrClosest;
}

int main() {
  int n, x, y, MAX = 1000000000;
  vector<point> Xs, Ys;
  pair<int, int> minPair( -MAX, -MAX );
  Xs.push_back(minPair);
  //Ys.push_back(minPair);

  cin >> n;
  for( int i = 1; i <= n; i++ ) {
    cin >> x;
    cin >> y;
   // x = -i;
   // y = -i;

    point xy( x, y );
    Xs.push_back( xy );
    //Ys.push_back( xy );
  }

    // DEBUG //
/*
   n = 1000000;
   srand(time(0));
   std::default_random_engine gen(time(0));
   std::uniform_int_distribution<int> dis(-20000000, 20000000);
   for( int i = 0; i < n - 2; i++ ) {
     x = dis( gen );
     y = dis( gen );
     //x = i;
     //y = i;
     point xy( x, y );
     Xs.push_back( xy );
     //Ys.push_back( xy );
   }

     Xs.push_back( point( 20001, 20001 ));
     //Ys.push_back( point( 20001, 20001 ));
     Xs.push_back( point( 20000, 20001 ));
     //Ys.push_back( point( 20000, 20001 ));
*/
  // DEBUG //

  sort( Xs.begin(), Xs.end(), Xcmp );
  //sort( Ys.begin(), Ys.end(), Ycmp );

  auto p = divConCP( Xs, Ys, 1, n );
  cout << p.first;
  //bruteCP( Xs );
  return 0;
}
A.F.K.
  • 69
  • 7