-2

I need to check whether a string consists of a special set of characters only (ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789<).

I could either use boost regex or a combination of isupper and isdigit. Which one would be considered the better choice when it comes to performance? The string lenghts I am testing are around 100 characters.

bool IsValid(string& rString)
{
  boost::regex regex("[0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ<]+");
  boost::cmatch match;

  return boost::regex_match(rString.c_str(), match, regex);
}

bool IsValid(string& rString)
{
        for (string::size_type i = 0; i < rString.size(); ++i)
            if (!isupper(rString[i]))
                if (!isdigit(rString[i]))
                    if (rString[i] != '<')
                        return false;
        return true;
}
tzippy
  • 6,458
  • 30
  • 82
  • 151

1 Answers1

0

Is YYY slower than YYY ?

Answer: time it

In this case (swapping boost::regex for std::regex):

#include <vector>
#include <iostream>
#include <iomanip>
#include <regex>

bool IsValid1(std::string const& rString)
{
    static const std::regex regex("[0-9A-Z<]+");

    return std::regex_match(rString, regex);
}

bool IsValid2(std::string const& rString)
{
    for (std::string::size_type i = 0; i < rString.size(); ++i)
        if (!std::isupper(rString[i]))
            if (!std::isdigit(rString[i]))
                if (rString[i] != '<')
                    return false;
    return true;
}

auto make_samples = []()
{
    std::vector<std::string> result;
    result.reserve(100000);
    std::generate_n(std::back_inserter(result), 100000, []
                    {
                        if (rand() < (RAND_MAX / 2))
                        {
                            return std::string("ABCDEF34<63DFGS");
                        }
                        else
                        {
                            return std::string("ABCDEF34<63DfGS");
                        }
                    });
    return result;
};

int main() {
    auto samples = make_samples();

    auto time = [](const char* message, auto&& func)
    {
        clock_t tStart = clock();
        auto result = func();
        clock_t tEnd = clock();
        std::cout << message << " yields " << result << " in " << std::fixed << std::setprecision(2) << (double(tEnd - tStart) / CLOCKS_PER_SEC) << '\n';
    };



    time("regex method: ", [&]()->std::size_t
         {
             return std::count_if(samples.begin(), samples.end(), IsValid1);
         });

    time("search method: ", [&]()->std::size_t
         {
             return std::count_if(samples.begin(), samples.end(), IsValid2);
         });
}

sample results:

regex method:  yields 49816 in 1.29
search method:  yields 49816 in 0.04
Richard Hodges
  • 68,278
  • 7
  • 90
  • 142