1

Good afternoon, Here is a first cut at a singly linked list C++ class which caches precompiled PCRE regex's. We want to run in the fastest manner. Is it possible to optimize this first cut which compiles and has been briefly tested? Thank you.

class cRegexList {
private:
    struct Internal{
        cPCRE* Regex;
        char* String; 
    } Item;

    cRegexList *Head, *Current, *Tail;
    int Count;

public:
    cRegexList(void);
    ~cRegexList(void);

    bool Add(const char *string1);

    cPCRE* FindRegex(const char* Expression);

    const char* GetCharacterField(void);

    bool RestartIterator(void);
    bool Iterate(void);

    int GetCount(void);

    const char* GetString(void);

    void Dump(void);
};

inline bool cRegexList::RestartIterator(void) {
    Current = Head;
    return (Current!=0);
}

inline bool cRegexList::Iterate(void) {
    if (Current==0)
        return false;
    if (Current && Current != Tail){
        memcpy(&Current,Current,sizeof(cRegexList*));
        return (Current!=0);
    }

    return false;
}

inline int cRegexList::GetCount(void) {
    return Count;
}

//  return NULL for empty list or a problem occurs
inline cPCRE* cRegexList::FindRegex(const char* Expression) {
    cRegexList* Temp;

    if (Current==0 || Expression == 0)
        return NULL;
    if (Current != 0){
        RestartIterator();
        Mary = Current;
        for(int pos=1; Iterate() ; pos++){
            if (strcmp(Current->Item.String, Expression) == 0){
                return Current->Item.Regex;
            }
        }
    }

    Current = Temp;
    if (Current 
        &&
        strcmp(Current->Item.String, Expression) == 0){
        return Current->Item.Regex;
    }
    return NULL; 
}

inline const char* cRegexList::GetCharacterField(void) {
    if (Current && Current->Item.String){
        return Current->Item.String;
    }
    return NULL;
}

inline const char *cRegexList::GetString(void) {
    if (Current==0)
        return "";
    return Current->Item.String; 
}

#endif

cRegexList.cpp

#include "Portability.h"

#include "cStringListTest.h"

cRegexList::cRegexList(void) {
    Head=Current=Tail=0;
    Count=0;
}

cRegexList::~cRegexList(void) {
}

bool cRegexList::Add(const char *string1_){ 
    cRegexList* newElement = (cRegexList*)new char[sizeof(cRegexList) 
                                +
                               strlen(string1_)
                                +
                                1];

    memset(newElement,'\x0', sizeof(cRegexList) + strlen(string1_)); 

    newElement->Item.String = new char[strlen(string1_) + 1];

    strcpy(newElement->Item.String, string1_);

    newElement->Item.Regex = new cPCRE();
    newElement->Item.Regex->SetOptions(PCRE_CASELESS);
    newElement->Item.Regex->Compile(string1_);

    if (Tail==0) {
        Head=Tail=newElement;
    } else {
        memcpy(Tail,&newElement,sizeof(cRegexList*));
        Tail=newElement;
    }

    Count++;
    return true;
}

void cRegexList::Dump(void) {
    int i=0;

    if (RestartIterator()) {
        do {
            printf(" %d: %s\n",i++,GetString());
        } while (Iterate());
    }
}
Jonas
  • 6,915
  • 8
  • 35
  • 53
Frank
  • 1,406
  • 2
  • 16
  • 42
  • undoubtedly possible to optimize it. Start by using almost anything but a linked list. – Jerry Coffin Dec 19 '12 at 19:50
  • @Jerry Coffin, I thought about using a Standard STL hash map. Would that be okay with you? Thank you for your reply. – Frank Dec 19 '12 at 19:54
  • Yes, a hash_map would probably be an improvement. More importantly, it's better to *start* with something simple, and only write a lot of code on your own when/if you see a good reason to do so. – Jerry Coffin Dec 19 '12 at 19:56
  • @Jerry Coffin, I will benchmark your Standard Template Library hash_map idea and compare it to very short linked lists. Thank you – Frank Dec 19 '12 at 19:59
  • Here is a test program. Thank you. int main(int argc, char* Argv_[]){ cStringList* xyz(NULL); xyz = new cRegexList; xyz->Add("ABC"); xyz->Add("DEF"); xyz>Add("GHI"); xyz->Dump(); } – Frank Dec 19 '12 at 20:08
  • I will close this item after the hash_map benchmark results are obtained. Thank you. – Frank Dec 19 '12 at 20:37
  • @Jerry Coffin, I just checked with our project director. The project director says hash_maps are best suited for random access. In our case with the data profiler we are building for a customer, the project director says the linked list of PCRE regular expressions will be very small(at most 3 to 4 regexes per linked list). In that case, sequential access of small linked lists is just as fast as STL hash_map random access. Thank you. – Frank Dec 19 '12 at 21:46
  • For that few items, yes, hashing is probably overkill -- but a vector will usually be quite a bit better than a linked list. – Jerry Coffin Dec 19 '12 at 21:59
  • @JerryCoffin, I will with check our project director tomorrow 8:00 AM abot the idea of Standard Template Library vector compared to pure hand-optimized singly linked lists. Thank you – Frank Dec 19 '12 at 22:24
  • @Jerry Coffin, Our project director says the main concern with STL template library vector is cross platform compataibility of STL libraries between Windows, Solaris UNIX, IBM AIX. HPUX and Red Hat Linux. The linked list class which was posted yesterday has been tested already on Windows, Solaris UNIX, IBM AIX. HPUX and RedHat Linux. Thank you. – Frank Dec 20 '12 at 18:56
  • @Jerry Coffin Happy Holidays!!!! – Frank Dec 21 '12 at 22:14

0 Answers0