2

I need to convert large bit string like this :

unsigned char* key = "0111010111010101010101010100101011010";

to hexadecimal strings like this :

unsigned char* string = EBAAAA95A;

The problem is that my keys are usually longer than 50 or 60 bits, and when I try to convert them with strtoll(key, NULL, 2), the returned number is far bigger than what a long long int can even hold. Is there an efficiant way to convert this into hexadecimal directly ?

Thanks a lot for your help !

Thomas Dickey
  • 51,086
  • 7
  • 70
  • 105
Ouilliam
  • 61
  • 3
  • 4
    Do not use numbers for converting bits to hexadecimal. They are just strings of characters. Count the number of bits so you can group them into groups of four starting at the right, then convert each group of four (or the initial group of one-to-four) to one hexadecimal digit. – Eric Postpischil Sep 19 '21 at 12:01
  • 1
    You can divide the string into smaller numbers and parse them individually. [Here's an example I just hacked together that reads the string nibble-by-nibble](https://godbolt.org/z/h9qPqEn7f). I won't post it as an answer because I'm not completely satisfied with it (source string's length must be a multiple of 4, doesn't validate source string), but yeah, it's just an example. – mediocrevegetable1 Sep 19 '21 at 12:56
  • 1
    @mediocrevegetable1 I did [something similar](https://godbolt.org/z/h98azfYaP) but used a table instead. Both our solutions suffer from the same problem that the input must be a multiple of 4 or it will cause UB though. – Kaihaku Sep 19 '21 at 13:13
  • Even though product/library recommendations are off-topic: https://en.wikipedia.org/wiki/List_of_arbitrary-precision_arithmetic_software – Andrew Henle Sep 19 '21 at 17:15
  • If the bit string is not an exact multiple of four then add 0, 00 or 000 to the start to give an exact number of hex digits. – rossum Sep 20 '21 at 11:40

3 Answers3

1

I think you need to roll your own implementation. Here's one implementation taking inspiration from @mediocrevegetable

#include <stdlib.h>
#include <string.h>

static inline int bitstr_to_int(const char *str, int len)
{
        int h = 0;
        for (int i = 0; i < len; i++) {
                switch (str[i]) {
                        case '0': h *= 2; continue;
                        case '1': h *= 2; h++; continue;
                        default: return -1;
                }
        }
        return h;
}

char * bitstr_to_hexstr(const char *str, int len)
{
        static const char hex_digit[] = {
                '0', '1', '2', '3',
                '4', '5', '6', '7',
                '8', '9', 'A', 'B',
                'C', 'D', 'E', 'F'
        };

        int i = 0; //traces the input chars in bitstr
        int j = 0; //traces the output chars in hexstr
        int h = 0;
        int r = len % 4;
        int q = len / 4;

        char *hexstr = malloc(q + !!r + 1);
        if (hexstr == NULL) return NULL;

        if (r != 0) {
                h = bitstr_to_int(str, r);
                if (h == -1) goto err;
                hexstr[j++] = hex_digit[h];
        }

        for (i = r; i < len; i += 4) {
                h = bitstr_to_int(str+i, 4);
                if (h == -1) goto err;
                hexstr[j++] = hex_digit[h];
        }

        hexstr[j] = '\0';
        return hexstr;
err:
        free(hexstr);
        return NULL;
}

int main(void)
{
    const char *key = "0111010111010101010101010100101011010";

    char *res = bitstr_to_hexstr(key, strlen(key));
    printf("%s\n", res);
}
Fractal
  • 816
  • 5
  • 15
1

If you need to work with big numbers, a good library (perhaps the library of choice) is GMP. Here's a simple example demonstrating the conversion you asked about:

#include <stdio.h>
#include <gmp.h>

int main()
{
    char *instr = "0111010111010101010101010100101011010";
    char outstr[100];
    mpz_t x;
    mpz_init(x);
    mpz_set_str(x, instr, 2);                     /* convert from base 2 */
    printf("%s\n", mpz_get_str(outstr, 16, x));   /* convert to base 16 */
}
Steve Summit
  • 45,437
  • 7
  • 70
  • 103
0

If the string is longer than 64 bits, you can convert one hex digit at a time, making sure only the first digit uses less than 4 bits.

Here is an implementation with test cases:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* convert len bits to a number */
static unsigned bin2hex_val(const char *str, size_t len) {
    unsigned v = 0;
    while (len --> 0) {
        v = v * 2 + (*str++ - '0');
    }
    return v;
}

char *bin2hex_str(const char *str) {
    static const char hex_digits[16] = "0123456789ABCDEF";
    size_t len = strspn(str, "01"); /* number of binary digits */
    size_t hexlen = (len + 3) / 4;     /* number of hex digits */
    char *p = malloc(hexlen + 1);   /* allocate the hex string */
    size_t i = 0;

    if (p) {
        /* special case the first hex digit if fewer than 4 bits */
        if (len % 4) {
            p[i++] = hex_digits[bin2hex_val(str, len % 4)];
            str += len % 4;
            len -= len % 4;
        }
        /* convert remaining groups of 4 bits as hex digits */
        while (len > 0) {
            p[i++] = hex_digits[bin2hex_val(str, 4)];
            str += 4;
            len -= 4;
        }
        p[i] = '\0';
    }
    return p;
}

void test(const char *str) {
    char *p = bin2hex_str(str);
    printf("%s -> %s\n", str, p);
    free(p);
}

int main() {
    test("");
    test("0");
    test("1");
    test("01");
    test("011");
    test("0111");
    test("01110");
    test("011101");
    test("0111010");
    test("01110101");
    test("01110101110101010101010101001010");
    test("0111010111010101010101010100101011010011101011101010101010101010");
    test("01110101110101010101010101001010110100111010111010101010101010100101011010");
    test("ABC");
    test("0.0");
    test("123");
    return 0;
}

Output:

 ->
0 -> 0
1 -> 1
01 -> 1
011 -> 3
0111 -> 7
01110 -> 0E
011101 -> 1D
0111010 -> 3A
01110101 -> 75
01110101110101010101010101001010 -> 75D5554A
0111010111010101010101010100101011010011101011101010101010101010 -> 75D5554AD3AEAAAA
01110101110101010101010101001010110100111010111010101010101010100101011010 -> 1D755552B4EBAAAA95A
ABC ->
0.0 -> 0
123 -> 1
chqrlie
  • 131,814
  • 10
  • 121
  • 189