0

I want to remove a determined amount of random elements from a vector while preserving element order. I wrote this code for that purpose and it works well when I run it for small vectors but when I run it for large ones (1000 total elements removing 200 random elements) it doesn't seem to work properly.

Could anyone give me a kick in the right direction?

#include<iostream>
#include<cmath>
#include<stdio.h>
#include<stdlib.h>
#include<fstream>
#include<string>
#include<iomanip>
#include<vector>
#include "mersenne.cpp"
#include "userintf.cpp"
#include "stocc.h"
#include "stoc1.cpp"
#include<time.h>
#include <algorithm>
#include "./Mersenne-1.1/MersenneTwister.h"



MTRand mtrand1;

using namespace std ;

int main() 
{
    vector<string> stable ;


    stable.push_back("CCAAAATCAACTCCTCGAGGAAGTAAATGCGATGGCTGTGTTACAGCGTGTATCGCGTCATGTCCTTGTTGCTGTAATTTCCACTGTCAGGACGATGAAAGCGCCGGGACGAAGGGCCATCAGGGGCTACTCCAGACCGACGAGTTCCCTCTCTGCCAGAAAATATGTTGTGGTGCGAGTTTTAACATACACTGCGGGACCAGCAAGCCG") ;
    stable.push_back("CCAAAATCAACTCCTCGAGGAAGTAAATGCGATGGCTGTGTTACAGCGTGTATCGCGTCATGTCCTTGTTGCTGTAATTTCCACTGTCAGGACGATGAAAGCGCCGGGACGAAGGGCCATCAGGGGCTACTCCAGACCGATGAGTTCCCTCTCTGCCAGAAAATATGTTGTGGTGCGAGTTTTAACATACACTGCGGGACCAGCAAGCCA") ;
    stable.push_back("CCAAAATCAACTCCTCGAGGAAGTAAATGCGATGGCTGTGTTACAGCGTGTATCGCGTCATGTCCTTGTTGCTGTAATTTCCACTGTCAGGACGATGAAAGCGCCGGGACGAAGGGCCATCAGGGGCTACTCCAGACCGACGAGTTCCCTCTCTGCCAGAAAATATGTTGTGGTGCGAGTTTTAACATACACTGCGGGACCAGCAAGCCA") ;
    stable.push_back("CCAAAATCAACTCCTCGAGGAAGTAAATGCGATGGCTGTGTTACAGCGTGTATCGCGTCATGTCCTTGTTGCTGTAATTTCCACTGTCAGGACGATGAAAGCGCCGGGACGAAGGGCCATCAGGGGCTACTCCAGACCGACGAGTTCCCTCTCTGCCAGAAAATATGTTGTGGTGCGAGTTTTAACATACACTGCGGGACCAGCAAGCCA") ;
    stable.push_back("CCAAAATCAACTCCTCGAGGAAGTAAATGCGATGGCTGTGTTACAGCGTGTATCGCGTCATGTCCTTGTTGCTGTAATTTCCACTGTCAGGACGATGAAAGCGCCGGGACGAAGGGCCATCAGGGGCTACTCCAGACCGACGAGTTCCCTCTCTGCCAGAAAATATGTTGTGGTGCTAGTTTTAACATACACTGCGGGACCAGCAAGCCA") ;
    stable.push_back("CCAAAATCAACTCCTCGAGGAAGTAAATGCGATGGCTGTGTTACAGCGTGTATCGCGTCATGTCCTTGTTGCTGTAATTTCCACTGTCAGGACGATGAAAGCGCCGGGACGAAGGGCCATCAGGGGCTACTCCAGACCGACGAGTTCCCTCTCTGCCAGAAAATATGTTGTGGTGCTAGTTTTAACATACACTGCGGGACCAGCAAGCCA") ;
    stable.push_back("CCAAAATCAACTCCTCGAGGAAGTAAATGCGATGGCTGTGTTACAGCGTGTATCGCGTCATGTCCTTGTTGCTGTAATTTCCACTGTCAGGACGATGAAAGCGCCGGGACGAAGGGCCATCAGGGGCTACTCCAGACCGACGAGTTCCCTCTCTGCCAGAAAATATGTTGTGGTGCGAGTTTTAACATACACTGCGGGACCAGCAAGCCA") ; 
    stable.push_back("CCAAAATCAACTCCTCGAGGAAGTAAATGCGATGGCTGTGTTACAGCGTGTATCGCGTCATGTCCTTGTTGCTGTAATTTCCACTGTCAGGACGATGAAAGCGCCGGGACGAAGGGCCATCAGGGGCTACTCCAGACCGACGAGTTCCCTCTCTGCCAGAAAATATGTTGTGGTGCGAGTTTTAACATACACTGCGGGACCAGCAAGCCA") ;
    stable.push_back("CCAAAATCAACTCCTCGAGGAAGTAAATGCGATGGCTGTGTTACAGCGTGTATCGCGTCATGTCCTTGTTGCTGTAATTTCCACTGTCAGGACGATGAAAGCGCCGGGACGAAGGGCCATCAGTGGCTACTCCAGACCGACGAGTTCCCTCTCTGCCGGAAAATATGTCGTGGTGCGAGTTTTAACATACACTGCGGGACCAGCAAGCCA") ;
    stable.push_back("CCAAAATCAACTCCTCGAGGAAGTAAATGCGATGGCTGTGTTACAGCGTGTATCGCGTCATGTCCTTGTTGCTGTAATTTCCACTGTCAGGACGATGAAAGCGCCGGGACGAAGGGCCATCAGGGGCTACTCCAGACCGACGAGTTCCCTCTCTGCCAGAAAATATGTTGTGGTGCGAGTTTTAACATACACTGCGGGACCAGCAAGCCA") ;


////////////////////////////////////////////////////////////



    vector<int> dict ;//Remembers random values

    dict.push_back( mtrand1.randInt( 9 ) ) ;

    int dummy = 0 ;

    bool found = false ;

    int counter = 0 ;

    int randomvalue ;

    while( counter < 5 )
    {               
        dummy = dict.size() ;

        found = false ;

        randomvalue = mtrand1.randInt( 9 ) ;    

        for ( int j = 0 ; j < dummy ; j++ )
        {
            if ( dict[j] == randomvalue )
            {
                found = true ;

                break ;
            }
        }

        if(!found)
        {           
            dict.push_back( randomvalue ) ;

            stable[randomvalue] = "flag" ;      

            counter++ ; 
        }       
    }

    stable.erase( remove( stable.begin(), stable.end(), "flag" ), stable.end() );



/////////////////////////////////////////////////////////

cout << "This is the new stable array: " << endl ;

for( int i = 0 ; i < stable.size() ; i++ )
{
    cout << stable[i] << endl ; 
}

return 0;

}
  • 4
    What do you mean by "doesn't seem to work properly"? – j_random_hacker Oct 29 '13 at 01:21
  • 1
    What does `mtrand1.randInt( 9 )` return? – Beta Oct 29 '13 at 01:28
  • @j_random_hacker This code is part of a much larger program. The program makes a mutational robustness calculation after 200 elements get deleted. This process is repeated 500 times. This quantity is supposed to change over time but with this code it remains constant over these 500 iterations. – user2171927 Oct 29 '13 at 02:08
  • @Beta It returns a random number from 0 to 9. – user2171927 Oct 29 '13 at 02:09
  • If you need to remove random elements why not remove from 200 random positions in the vector array? – Rohit Vipin Mathews Oct 29 '13 at 04:24
  • It seems almost certain that the problem is in your random number generator, `MTRand`, which is in the code you haven't shown us. Each instance of `MTRand` is producing *the same sequence* of random numbers. If this is the case (and it's easy enough to test), you must either figure out how to initialize the different instances with different seeds, or construct one instance outside the loop and use it in all 500 iterations. – Beta Oct 29 '13 at 04:38
  • This isn't an answer to your question, and you might already know this, but a std::vector allocates a contiguous block of memory meaning that erasing from the vector will often result in a reallocation of memory and relocation of the items that aren't erased. If you need to delete from the middle of the collection a lot, a list might serve you better. – sbaker Oct 29 '13 at 05:36

1 Answers1

2

I would suggest using the algorithm described in Programming Pearls for this problem (Algorithm S from Knuth's Seminumerical Algorithms). The idea is to select elements in order with probability s/r where s is the number remaining to select and r is the number of elements remaining. This selects m elements from n in order with every element having an equal chance of being selected.

This implementation uses copy_if to copy selected elements to a new vector. This will likely usually be more efficient than trying to remove elements from the original vector as you avoid all of the moving down of elements within the vector when you erase. You could use move_iterators with C++11 if you don't need to preserve the original vector to avoid additional element copies.

#include <algorithm>
#include <iostream>
#include <iterator>
#include <random>
#include <string>
#include <vector>

using namespace std;

template<typename I1, typename I2, typename Engine>
I2 copyRandomM(I1 first, I1 last, I2 dest, int m, Engine& eng) {
    int n = distance(first, last);
    return copy_if(first, last, dest, [&](decltype(*first)) { 
        return uniform_int_distribution<>(0, --n)(eng) < m ? --m, true : false; });
}

int main() {
    mt19937 engine;
    auto v = vector<string>{ "orange", "apple", "banana", "pear", "kiwi", "tangerine" };
    vector<string> selection(4);
    copyRandomM(begin(v), end(v), begin(selection), selection.size(), engine);
    copy(begin(selection), end(selection), ostream_iterator<string>(cout, " "));
}
mattnewport
  • 13,728
  • 2
  • 35
  • 39