Cut string into parts of 2 charactered string and then to hex format (char * to unsigned char) in C

Question

I need some help in cutting a string into a pair of characters and then converting it to HEX format.

eg. char *ADDRESS = "0011AABB"; I want the above address to be split "00", "11", "AA" and "BB" and after that is split converting it to 0x00, 0x11, 0xAA and 0xBB which will be stored in an unsigned char.

Thanks

What language are you doing this in? C ? – Kairan Nov 17 '12 at 02:37 — Kairan, Nov 17 '12 at 02:37
Yes, the programming language is C. – eecs Nov 20 '12 at 00:25 — eecs, Nov 20 '12 at 00:25

Mike Kinghan · Accepted Answer · 2013-05-09T21:51:01.883

You suggest that your address-strings are of type char *, but I assume you want a solution that gaurantees not to trash them, i.e. one that takes them as type char const *.

I assume also that the addresses they can represent are 32-bit, as per the example char *ADDRESS = "0011AABB".

In that case a solution that does exactly what you ask for in an obvious way is:

#include <assert.h>
#include <stdlib.h>
#include <string.h>

#define ADDRESS_BYTES 4 // Bytes in an address

/* Convert a hex numeral encoding an address to the unsigned chars that it 
    encodes

    `addr_str` - in: a hex numeral encoding an address
    `bytes` - out: The unsigned char bytes of the address, high-byte first.

    return - The number of bytes output: ADDRESS_BYTES if `addr_str` is a 
        valid hex numeral, otherwise 0.
*/  
unsigned address_bytes(char const *addr_str, unsigned char bytes[ADDRESS_BYTES])
{
    char buf[3] = {0}; // 3-bytes of 0-filled working space.
    char *endp;
    unsigned i = 0;
    unsigned j = 0;
    assert(strlen(addr_str) == 2 * ADDRESS_BYTES); // Insist on 8-char string
    for (   ;i < 2 * ADDRESS_BYTES; ++j) { // Take chars 2 at a time
        buf[i % 2] = addr_str[i]; ++i; // Next char to buf[0]
        buf[i % 2] = addr_str[i]; ++i; // Next + 1 char to buf[1]
        // Convert buffer from hex numeral to unsigned char in next byte.
        bytes[j] = (unsigned char)strtoul(buf,&endp,16);
        if (*endp) { // Check for invalid hex. 
            return 0; // Failure
        }
    }
    return j; // = 4
}

// A test program...

#include <stdio.h>

int main(void)
{
    unsigned char bytes[ADDRESS_BYTES];
    char const * address = "0011AABB";
    unsigned done_bytes = address_bytes(address,bytes);
    printf("The %d valid address bytes are (hex):",done_bytes);
    unsigned i = 0;
    for (   ;i < done_bytes; ++i) {
        printf(" %02x",(unsigned)bytes[i]);
    }
    putchar('\n');
    return 0;
}

However, exactly what you ask for is not an efficient solution. You can accomplish your goal by simply converting an 8-char hex-numeral encoding a 32-bit address into the encoded 32-bit unsigned integer, and then getting the 4 unsigned char bytes that compose this unsigned integer in high-byte-first order. Converting the hex numeral to a uint32_t can be done with a single call to strtoul. Then getting the unsigned char bytes of this uint32_t in high-byte-first order is simply a matter of knowing whether that uint32_t is big-endian or little-endian. So here is a better solution:

#include <assert.h>
#include <stdlib.h>
#include <string.h> 
#include <inttypes.h>

unsigned address_bytes(char const *address, unsigned char bytes[ADDRESS_BYTES])
{
    union {
        uint32_t i;
        char c[ADDRESS_BYTES];
    } endian_tester = {0x01020304};

    int big_endian = endian_tester.c[0] == 1; 
    uint32_t addr = 1;
    char *endp;
    assert(strlen(address) == 2 * ADDRESS_BYTES);
    addr = (uint32_t)strtoul(address,&endp,16);
    if (*endp) {
        return 0;
    }
    endp = (char *)&addr;
    if (big_endian) {
        // The least significant byte is highest in memory
        bytes[0] = endp[0];
        bytes[1] = endp[1];
        bytes[2] = endp[2];
        bytes[3] = endp[3];
    } else {
        // The least significant byte is lowest in memory
        bytes[0] = endp[3];
        bytes[1] = endp[2];
        bytes[2] = endp[1];
        bytes[3] = endp[0];
    }
    return ADDRESS_BYTES;
}

And if you are able and willing to make the non-portable assumption that the address-strings are encoded in ASCII, then you could avoid calling strtoul entirely and compute the output bytes directly from the input chars, using the characters' positions in the ASCII collating sequence to get the unsigned char values that they encode:

#include <assert.h>
#include <string.h> 
#include <ctype.h>

unsigned address_bytes(char const *address, unsigned char bytes[ADDRESS_BYTES])
{
    unsigned i = 0;
    unsigned j = 0;
    assert(strlen(address) == 2 * ADDRESS_BYTES);
    for (   ; i < 2 * ADDRESS_BYTES; ++i,++j) {
        // First character of a pair..
        if (isdigit(address[i])) {
            // A decimal digit encodes its ASCII value - '0'
            bytes[j] = address[i] - '0';
        } else if (isxdigit(address[i])) {
            // A hex digit encodes 10 + its ASCII value - 'A'
            bytes[j] = 10 + address[i] - 'A';
        } else {
            return 0; // Invalid hex
        }
        ++i; // Second character of a pair...
        bytes[j] <<= 4; // Shift the 1st character's value 1 nibble high
        // OR the 2nd character's value into the low nibble of the byte...
        if (isdigit(address[i])) {
            bytes[j] |= address[i] - '0';
        } else if (isxdigit(address[i])) {
            bytes[j] |= 10 + address[i] - 'A';
        } else {
            return 0; // Invalid hex
        }
    }
    return ADDRESS_BYTES;
}

The last might be the fastest if that matters.

Built and tested with GCC 4.7.2 and clang 3.2

Cut string into parts of 2 charactered string and then to hex format (char * to unsigned char) in C

1 Answers1