I am creating a C++ transform function with the HP Vertica database and I want to read data from 2 columns from 2 different tables and store them into 2 vectors. Then it will output the number of occurences of each element of one vector in the second one. The problem is that these columns do not have the same number of rows. When I logged the code I used I found that it was reading input a much great times than the table size. Here is the code i am using.
virtual void processPartition(ServerInterface &srvInterface,
PartitionReader &inputReader,
PartitionWriter &outputWriter)
{
try {
std::vector<string> vectcomb;
std::vector<string> vectrule;
if (inputReader.getNumCols() != 2)
vt_report_error(0, "Function only accepts 2 argument, but %zu provided", inputReader.getNumCols());
try{
int a=0 ;
do {
srvInterface.log("[occurence] data load %d ",a);
a++;
if(inputReader.isNull(1)){vectcomb.emplace_back(inputReader.getStringRef(0).str());}
else {vectcomb.emplace_back(inputReader.getStringRef(0).str());
vectrule.emplace_back(inputReader.getStringRef(1).str());}
} while (inputReader.next());
}catch(exception& e){ srvInterface.log("[occurence] exception catched");}
for(int i=0;i<vectcomb.size();i++) {
int occ=0;
srvInterface.log("[occurence] test %d ",i);
for(int j=0;j<vectrule.size();j++) {
if(vectrule.at(j).find(vectcomb.at(i)) != std::string::npos) {occ++ ;}
}
outputWriter.setInt(0,occ) ; outputWriter.next() ;
}
} catch(exception& e) {
// Standard exception. Quit.
vt_report_error(0, "Exception while processing partition: [%s]", e.what());
}
}
};