I want to modify an recursive function from an 'ternary search tree' library (sourceforge & http://code.google.com/p/ternary-search-tree/). The default behaviour is to search in an ternary search tree for all occurences of strings that match an specified wildcard string. i.e. having 'KEY', 'KE1', 'KE2' in the tree would find all entrys if I search for 'KE*'. But I need the opposite behaviour - search in an ternary search tree (which contains wildcards) all entrys that match an specified string. i.e. having 'KE*', 'KEY', 'K*' in the tree should find all entrys if I search for 'KEY'.
A tree/node is defined as following:
typedef struct TstNode {
TstNode( char c ) : splitChar(c), left(0), right(0), mid(0){
}
char splitChar;
TstTree left, right;
union {
TstTree mid;
int index;
};
} tstNode;
And the function with the default behaviour:
template <class Object>
void TernarySearchTree<Object>::partialMatchSearch(TstTree tree, const char *key)
{
if (!tree) return;
// partial match left
if (*key == '?' || *key == '*' || *key < tree->splitChar)
{
partialMatchSearch( tree->left, key );
}
// partial match middle
if (*key == '?' || *key == '*' || *key == tree->splitChar)
{
if ( tree->splitChar && *key )
{
if ( *key == '*' )
{
partialMatchSearch( tree->mid, key );
}
else
{
partialMatchSearch( tree->mid, key+1 ); // search next pattern char
}
}
}
if ( ( *key == 0 || *key == '*' ) && tree->splitChar == 0 )
{
pmVectorPtr->add( tree->index );
}
if (*key == '?' || *key == '*' || *key > tree->splitChar)
{
partialMatchSearch( tree->right, key );
}
}
pmVectorPtr is an Pointer to an Vector of int's and the function get's called with the root-element and the searchkey as argument. I already tried to adapt that, but can't get my head around it yet. My own code:
template <class Object>
void TernarySearchTree<Object>::partialMatchSearchInverted(TstTree tree, const char *key)
{
if (!tree) return;
if((tree->splitChar == '*') && ( *key != 0 )){
partialMatchSearchInverted( tree, key+1 );
}
if( *key != 0 ){
if (*key < tree->splitChar){
partialMatchSearchInverted( tree->left, key );
}
if (*key > tree->splitChar){
partialMatchSearchInverted( tree->right, key );
}
}
if ((*key == tree->splitChar) || (tree->splitChar == '*')){
if ( tree->splitChar || *key ){
partialMatchSearchInverted( tree->mid, key+1 ); // search next pattern char
}
}
if ( ( *key == 0 ) && ( tree->splitChar == 0 ) ){
pmVectorPtr->add( tree->index );
}
}
I've coded this with extensive use of the debugger and as far as I can tell, it 'seems' to work (even if the wildcard is at beginning or mid of a string). BUT if I add to example: 'K*' and 'Ke*' to the tree, it would find only one solution (in this case Ke*) for 'Key'. If I remove 'Ke*' from the tree, it finds 'K*' for an search query of 'Key'. I still don't get why.
Any ideas about that?
Appendix (my testcase):
#include <iostream>
#include "ternarySearchTree/ternarySearchTree.h"
int main(int argc, char *argv[]){
TernarySearchTree<string> tst;
Vector< TstItem<String> > itemVector;
{
TstItem<String> item( "Ke*", "Value" );
itemVector.add( item );
}
{
TstItem<String> item( "K*", "Value" );
itemVector.add( item );
}
{
TstItem<String> item( "Ka*", "Value" );
itemVector.add( item );
}
tst.buildBalancedTree(itemVector);
Vector<int> matches = tst.partialMatchSearchInverted("Key");// will only find Ke* (wrong - it should find Ke* and K*), if I remove Ke* above it would find K* (right), if I remove that also it would find nothing (also right)
for (unsigned j=0;j<matches.count();j++)
{
std::cout<<"Matching: "<< tst.getKey( matches[j] ) <<" - "<< tst.getValue( matches[j] )->c_str()<<std::endl;
}
std::cout<<"total matches "<< matches.count()<<std::endl;
return 0;
}