0

Can I setting split_regex working based on groups instead of using lookbehind?

The code I'm using is as follows:

string data = "xyz: 111.222: k.44.4: 12345";
vector<string> data_vec;

boost::algorithm::split_regex( data_vec, data, boost::regex("(:\s*)\d"));

my expected result is:

xyz
111.222: k.44.4
12345
Yen Dang
  • 268
  • 2
  • 9

1 Answers1

0

In case you are open to other solutions, one using std::regex would be:

  • Loop searching for a :\s* separator.
  • Keep a vector of tokens. Push back the first token and any token that starts with a digit. For tokens not starting with a digit (other than first one), add them to the last element of the container (together with, and after, the separator).
  • Remember to treat the token after the last separator as well.

[Demo]

#include <cctype>  // isdigit
#include <fmt/ranges.h>
#include <regex>
#include <string>
#include <vector>

void add_token(std::vector<std::string>& tokens, const std::string& token,
    const std::string& separator) {
    if (not token.empty()) {
        if (tokens.empty() or std::isdigit(token[0])) {
            tokens.push_back(token);
        } else {
            tokens.back() += separator;
            tokens.back() += token;
        }
    }
}

auto split_regex(std::string data) {
    std::vector<std::string> tokens{};
    std::regex pattern{R"(:\s*)"};
    std::smatch matches{};
    std::string last_separator{};
    while (std::regex_search(data, matches, pattern)) {
        last_separator = matches[0];
        add_token(tokens, matches.prefix(), last_separator);
        data = matches.suffix();
    }
    add_token(tokens, data, last_separator);
    return tokens;
}

int main() {
    std::string data{ "xyz: 111.222: k.44.4: 12345" };
    fmt::print("{}", fmt::join(split_regex(data), "\n"));
}

// Outputs:
//
//   xyz
//   111.222: k.44.4
//   12345
rturrado
  • 7,699
  • 6
  • 42
  • 62