Lookup table could be of use here (initialized on program start). I guess you already have profiling configured so I didn't profile solution and wonder what would be the results so please share when you get some.
char LOOKUP_TABLE[256][4];
void init_lookup_table() {
char digits[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'};
for (int i = 0; i < 10; ++i) {
LOOKUP_TABLE[i][0] = digits[i % 10];
LOOKUP_TABLE[i][1] = '\0';
LOOKUP_TABLE[i][2] = '\0';
LOOKUP_TABLE[i][3] = '\0';
}
for (int i = 10; i < 100; ++i) {
LOOKUP_TABLE[i][0] = digits[(i / 10) % 10];
LOOKUP_TABLE[i][1] = digits[i % 10];
LOOKUP_TABLE[i][2] = '\0';
LOOKUP_TABLE[i][3] = '\0';
}
for (int i = 100; i < 256; ++i) {
LOOKUP_TABLE[i][0] = digits[(i / 100) % 10];
LOOKUP_TABLE[i][1] = digits[(i / 10) % 10];
LOOKUP_TABLE[i][2] = digits[i % 10];
LOOKUP_TABLE[i][3] = '\0';
}
}
void append_octet(char **buf, unsigned char value, char terminator) {
char *src = LOOKUP_TABLE[value];
if (value < 10) {
(*buf)[0] = src[0];
(*buf)[1] = terminator;
(*buf) += 2;
}
else if (value < 100) {
(*buf)[0] = src[0];
(*buf)[1] = src[1];
(*buf)[2] = terminator;
(*buf) += 3;
}
else {
(*buf)[0] = src[0];
(*buf)[1] = src[1];
(*buf)[2] = src[2];
(*buf)[3] = terminator;
(*buf) += 4;
}
}
std::string parse_ipv4_address( const std::vector<unsigned char> & data, int start ) {
char ip_addr[16];
char *dst = ip_addr;
append_octet(&dst, data[start + 0], '.');
append_octet(&dst, data[start + 1], '.');
append_octet(&dst, data[start + 2], '.');
append_octet(&dst, data[start + 3], '\0');
return std::string( ip_addr );
}
int main() {
init_lookup_table();
std::vector<unsigned char> ip = { 0xc0, 0x8, 0x20, 0x0c };
std::cout << parse_ipv4_address( ip, 0 ) << std::endl;
}
Other way to improve performance would be to replace string with specialized object. In that case you will be able to implement required I/O methods (my guess is that you need string to print it somewhere) and will be freed from copying on string construction.
UPD on second thought I guess in my code lookup table is out of use so one could just copy code used to build lookup table to append_octet
directly making digits
global.
Updated code (thanks to MikeMB and Matteo Italia) which also looks very cache friendly
inline void append_octet(char **buf, unsigned char value, char terminator) {
if (value < 10) {
(*buf)[0] = '0' + (value % 10);
(*buf)[1] = terminator;
(*buf) += 2;
}
else if (value < 100) {
(*buf)[0] = '0' + ((value / 10) % 10);
(*buf)[1] = '0' + (value % 10);
(*buf)[2] = terminator;
(*buf) += 3;
}
else {
(*buf)[0] = '0' + ((value / 100) % 10);
(*buf)[1] = '0' + ((value / 10) % 10);
(*buf)[2] = '0' + (value % 10);
(*buf)[3] = terminator;
(*buf) += 4;
}
}
std::string parse_ipv4_address( const std::vector<unsigned char> & data, int start ) {
char ip_addr[16];
char *dst = ip_addr;
append_octet(&dst, data[start + 0], '.');
append_octet(&dst, data[start + 1], '.');
append_octet(&dst, data[start + 2], '.');
append_octet(&dst, data[start + 3], '\0');
return std::string( ip_addr );
}
int main() {
std::vector<unsigned char> ip = { 0xc0, 0x8, 0x20, 0x0c };
std::cout << parse_ipv4_address( ip, 0 ) << std::endl;
}
UPD 2 I guess I found a way to avoid extra copy (altough there's still extra copy on return). Here's versions with look up table and w/o it
#include <string>
#include <iostream>
#include <vector>
std::string LUT[256];
void init_lookup_table() {
for (int i = 0; i < 10; ++i) {
LUT[i].reserve(2);
LUT[i].push_back('0' + i);
LUT[i].push_back('.');
}
for (int i = 10; i < 100; ++i) {
LUT[i].reserve(3);
LUT[i].push_back('0' + (i/10));
LUT[i].push_back('0' + (i%10));
LUT[i].push_back('.');
}
for (int i = 100; i < 256; ++i) {
LUT[i].reserve(4);
LUT[i].push_back('0' + (i/100));
LUT[i].push_back('0' + ((i/10)%10));
LUT[i].push_back('0' + (i%10));
LUT[i].push_back('.');
}
}
std::string parse_ipv4_address_lut( const std::vector<unsigned char> & data, int start ) {
std::string res;
res.reserve(16);
res.append(LUT[data[start + 0]]);
res.append(LUT[data[start + 1]]);
res.append(LUT[data[start + 2]]);
res.append(LUT[data[start + 3]]);
res.pop_back();
return res;
}
inline void append_octet_calc(std::string *str, unsigned char value, char terminator) {
if (value < 10) {
str->push_back('0' + (value % 10));
str->push_back(terminator);
}
else if (value < 100) {
str->push_back('0' + ((value / 10) % 10));
str->push_back('0' + (value % 10));
str->push_back(terminator);
}
else {
str->push_back('0' + ((value / 100) % 10));
str->push_back('0' + ((value / 10) % 10));
str->push_back('0' + (value % 10));
str->push_back(terminator);
}
}
std::string parse_ipv4_address_calc( const std::vector<unsigned char> & data, int start ) {
std::string res;
res.reserve(16);
append_octet_calc(&res, data[start + 0], '.');
append_octet_calc(&res, data[start + 1], '.');
append_octet_calc(&res, data[start + 2], '.');
append_octet_calc(&res, data[start + 3], '\0');
return res;
}
int main() {
init_lookup_table();
std::vector<unsigned char> ip = { 0xc0, 0x8, 0x20, 0x0c };
std::cout << parse_ipv4_address_calc( ip, 0 ) << std::endl;
std::cout << parse_ipv4_address_lut( ip, 0 ) << std::endl;
}
UPD 3 I made some measurements (1 000 000 repeats)
clang++ -O3
orig...done in 5053 ms // original implementation by OP
c_lut...done in 2083 ms // lookup table -> char[] -> std::string
c_calc...done in 2245 ms // calculate -> char[] -> std::string
cpp_lut...done in 2597 ms // lookup table + std::string::reserve + append
cpp_calc...done in 2632 ms // calculate -> std::string::reserve + push_back
hardcore...done in 1512 ms // reinterpret_cast solution by @IInspectable
g++ -O3
orig...done in 5598 ms // original implementation by OP
c_lut...done in 2285 ms // lookup table -> char[] -> std::string
c_calc...done in 2307 ms // calculate -> char[] -> std::string
cpp_lut...done in 2622 ms // lookup table + std::string::reserve + append
cpp_calc...done in 2601 ms // calculate -> std::string::reserve + push_back
hardcore...done in 1576 ms // reinterpret_cast solution by @IInspectable
Note that 'hardcore' solution doesn't equivalent because of leading zeroes.