Good afternoon, We are using the latest C/C++ version of PCRE on WINDOWS Visual Studio 8.0 and 9.0 with PCRE_CASELESS, PCRE_UTF8, PCRE_UCP. When we use the PCRE regex [\x{00E4}]{1} we are able to match Standard Latin code point U+00E4 with the string DAS tausendschöne Jungfräulein, also known as 44 41 53 20 74 61 75 73 65 6E 64 73 63 68 C3 B6 6E 65 20 4A 75 6E 67 66 72 C3 A4 75 6C 65 69 6E. Now we would like to match both the codepoints U+00E4(i,e.C3 B6) and U+00F6 (i.e. C3 A4) so we can implement a simple prototype C/C++ search and replace operation $1 $2. Is this possible to do? Thank you.
We are now using the PCRE regex [\x{00F6}\x{00E4}]{1,}
with the following C++ function:
void cInternational::RegExSearchReplace(cOrderedList *RegExList_,char **Input_) {
const char *replacement;
char substitution[dMaxRegExSubstitution];
int subString;
cPCRE *regEx;
unsigned char* Buffer;
Buffer = new unsigned char[1024];
if (*Input_[0]!='\x0' && RegExList_->ResetIterator()) {
do {
regEx=new cPCRE();
regEx->SetOptions(PCRE_CASELESS);
if (regEx->Compile(RegExList_->GetCharacterField(1))) {
// Search for Search RegEx:
while (regEx->Execute((char *)Buffer)>0) {
// Found it, get Replacement expression:
replacement=RegExList_->GetCharacterField(2);
int subLen=0;
// Build substitution string by finding each $# in replacement and replacing
// them with the appropriate found substring. Other characters in replacment
// are sent through, untouched.
for (int i=0;replacement[i]!='\x0';i++) {
if (replacement[i]=='$' && isdigit(replacement[i+1])) {
subString=atoi(replacement+i+1);
if (regEx->HasSubString(subString)) {
strncpy(substitution+subLen,
*Input_+regEx->GetMatchStart(),
regEx->GetMatchEnd() - regEx->GetMatchStart());
subLen+=(regEx->GetMatchEnd() - regEx->GetMatchStart()
}
i++
} else {
substitution[subLen++]=replacement[i];
}
}
substitution[subLen]='\x0';
// Adjust the size of Input_ accordingly:
int sizeDiff=strlen(substitution)-(regEx->GetMatchEnd()-regEx->GetMatchStart());
if (sizeDiff>0) {
char *newInput=new char[strlen(*Input_)+sizeDiff+1];
strcpy(newInput,*Input_);
delete[] *Input_;
*Input_=newInput;
}
memmove(*Input_ + regEx->GetMatchStart() + 1,
*Input_+regEx->GetMatchEnd() + 1,
regEx->GetMatchEnd()- regEx->GetMatchStart());
strncpy(*Input_,substitution,strlen(substitution));
(*Input_)[strlen(substitution)] = '\x0';
Buffer = Buffer + regEx->GetMatchEnd();
}
}
delete regEx;
} while (RegExList_->Next());
}
}