0

I'm experimenting with a recursive function to do wildcard expansion. The asterisk, '*', can expand to 0-n characters.

Example: The pattern '*b*' will expand to 'bb*', the asterisk removed and 'bb' remains. So far, so good. The pattern '*b*' will also expand to '*bb', the asterisk removed and 'bb' remains. This creates a duplicate.

My question is if there is something fundamentally wrong with my recursive code. My goal is to have the expansion not creating duplicates. The recursive code below is the bare minimum and I've added code so it's easy to see the unique strings and the duplicate ones.

#include <stdio.h>
#include <string>
#include <vector>
#include <algorithm>

using namespace std;

// Forward declarations
void WildcardExpansion(const char* key, int nodeLevel);    
size_t SeparateDuplicates(vector<string>& vec, vector<string>& dup);

// Global varables
int g_maxWordLen;
int g_minWordLen;
vector <string> g_words; // Generated words here

int main()
{
    int d, u, i;
    vector<string> duplicates;

    g_maxWordLen = 2;    // Max length of generated words
    g_minWordLen = 2;    // Min length

    WildcardExpansion("*b*", 0);                // Collects all generated words in g_words
    SeparateDuplicates(g_words, duplicates);    // Separates the duplicates from g_words

    u = (int)g_words.size();       // Size of unique words
    d = (int)duplicates.size();    // Size of duplicated words
    printf("Unique count: %d\n", u);
    for (i = 0; i < u; i++)
        printf("%s\n", g_words[i].c_str());
    printf("Duplicate count: %d\n", d);
    for (i = 0; i < d; i++)
        printf("%s\n", duplicates[i].c_str());
    return 0;
}
////////////////////////////////////////////////////////////////////////
// Recursive function to do wildcard expansion
// Key can contain one or more wildcards, '*' but not in sequence
// (** is equal to * but is not handled here)
//
void WildcardExpansion(const char* key, int charPos)
{
    int letter;
    int keyLen;
    int astCount;
    char c;
    char* p;
    char keyX[20];

    strcpy(keyX, key);
    astCount = 0;
    p = keyX;
    while (*p)                                // Count asterisks
        if (*p++ == '*')
            astCount++;
    keyLen = (int)strlen(keyX) - astCount;    // Letter count
    if (keyLen > g_maxWordLen)
        return;                               // Too long word

    do // while c 
    {
        c = key[charPos];
        switch (c)
        {
        case '*':         // -> key[nodeLevel] == '*'
        {
            //
            // Remove one asterisk
            //
            strcpy(keyX + charPos, key + charPos + 1);
            WildcardExpansion(keyX, charPos);            // Recurs same level
            strcpy(keyX, key);                           // Copy original with wildcard back for replacement below
            //
            // Replace * with letter a-z
             // *b* -> bb* -> bb AND *b* -> *bb -> bb => Duplicates!
            //
            for (letter = 0; letter < 26; letter++)
            {
                keyX[charPos] = ('a' + letter);                // Replace * with letter
                strcpy(keyX + charPos + 1, key + charPos);     // Expanded: abc -> abc* 
                WildcardExpansion(keyX, charPos + 1);          // Recurs next level
            }
            return;
        } // *
        case '\0':    // Found a complete word without wildcards
        {
            if (keyLen < g_minWordLen)
                return;
            g_words.push_back(key);
            break;
        }
        default:    // Dive deeper
        {
            charPos++;
        }
        } // switch
    } while (c);
}
///////////////////////////////////////////////////////////////////////
// Helper function to store the duplicates in a separate vector
//
size_t SeparateDuplicates(vector<string>& vec, vector<string>& dup)
{
    typename std::vector<string>::iterator it;

    std::sort(vec.begin(), vec.end());
    it = unique(vec.begin(), vec.end(), [&dup](auto& first, auto& second) -> bool
        {
            if (first == second)
            {
                dup.push_back(second);
                return true;
            }
            return false;
        });
    vec.resize(distance(vec.begin(), it));
    return dup.size();
}

Input "*b*", output length 2. Observed output:

Unique count: 51
ab
ba
bb
bc
bd
be
bf
bg
bh
bi
bj
bk
bl
bm
bn
bo
bp
bq
br
bs
bt
bu
bv
bw
bx
by
bz
cb
db
eb
fb
gb
hb
ib
jb
kb
lb
mb
nb
ob
pb
qb
rb
sb
tb
ub
vb
wb
xb
yb
zb
Duplicate count: 1
bb
  • Use a `std::set` instead of a vector for the words? – Some programmer dude Oct 22 '20 at 10:48
  • 2
    I don't understand the expansion rules - mainly because you don't give them. – bolov Oct 22 '20 at 10:55
  • Isn't this problem solved by always filling in asterisks from left to right? – Botje Oct 22 '20 at 10:56
  • 1
    And when you used your debugger to run your program, what did you see? This is precisely what a debugger is for. If you don't know how to use a debugger this is a good opportunity to learn how to use it to run your program one line at a time, monitor all variables and their values as they change, and analyse your program's logical execution flow. Knowing how to use a debugger is a required skill for every C++ developer, no exceptions. With your debugger's help you should be able to quickly find all problems in this and all future programs you write, without having to ask anyone for help. – Sam Varshavchik Oct 22 '20 at 10:58
  • "My question is if there is something fundamentally wrong with my recursive code". I don't understand: does your code work? If it does questions about code review are off topic; if it doesn't then you need to show exactly what the problem is complete with observed output vs expected output – bolov Oct 22 '20 at 11:00
  • Added observed output to the original question – user11698637 Oct 22 '20 at 11:55

0 Answers0