6

I'm trying to implement my own strcmp function, my strcmp bahaves differently when I use special characters.

#include <string.h>    

int my_strcmp(const char *s1, const char *s2)
{
    const char  *str1;
    const char  *str2;

    str1 = s1;
    str2 = s2;
    while ((*str1 == *str2) && *str1)
    {
        str1++;
        str2++;
    }
    return (*str1 - *str2);
}

int main()
{
   char *src = "a§bcDef";
   char *des = "acbcDef";
   printf("%d %d\n", my_strcmp(des, src), strcmp(des, src));
   return(0);
}

OUTPUT

161 -95

fluter
  • 13,238
  • 8
  • 62
  • 100
Junius L
  • 15,881
  • 6
  • 52
  • 96
  • 3
    You need to return `*str2 - *str1` methinks. – fuz May 15 '16 at 10:26
  • @FUZxxl when I do that, I get `-161 and -95`, I'm not sure if, it is right because the sign are the same. – Junius L May 15 '16 at 10:30
  • 1
    If the signs match, your function works correctly. – fuz May 15 '16 at 11:58
  • As FUZxxl said, if signs match your function works correctly. Return values of `strcmp()` are specified as being negative, zero, or positive. Specific negative or positive values are neither required not guaranteed. – Peter May 15 '16 at 13:45

2 Answers2

3

char is signed in many implementations, and your strcmp implementation considers char values < 0 to be smaller than those greater than 0. Perhaps you want to compare the unsigned values instead.

const unsigned char *str1 = (unsigned char*) s1;
const unsigned char *str2 = (unsigned char*) s2;
Joni
  • 108,737
  • 14
  • 143
  • 193
1

Here's what the standard says about strcmp, with a relevant section highlighted in bold:

The sign of a non-zero return value shall be determined by the sign of the difference between the values of the first pair of bytes (both interpreted as type unsigned char) that differ in the strings being compared.

Your code is taking the difference of the bytes as char, which if signed differs from the spec.

Instead:

return (unsigned char)(*str1) - (unsigned char)(*str2);

Here's some test cases for the original code (my_strcmp), the currently accepted answer of dasblinkenlight (my_strcmp1), and this answer (my_strcmp2). Only my_strcmp2 passes the tests.

#include <string.h>
#include <stdio.h>

int my_strcmp(const char *s1, const char *s2) {
    const signed char *str1 = (const signed char*)(s1);
    const signed char *str2 = (const signed char*)(s2);

    while ((*str1 == *str2) && *str1)
    {
        str1++;
        str2++;
    }
    return (*str1 - *str2);
}

int my_strcmp1(const char *s1, const char *s2) {
    const signed char *str1 = (const signed char*)(s1);
    const signed char *str2 = (const signed char*)(s2);

    while ((*str1 == *str2) && *str1)
    {
        str1++;
        str2++;
    }
    return (signed char)(*str1 - *str2);
}

int my_strcmp2(const char *s1, const char *s2) {
    const signed char *str1 = (const signed char*)(s1);
    const signed char *str2 = (const signed char*)(s2);

    while ((*str1 == *str2) && *str1)
    {
        str1++;
        str2++;
    }
    return (unsigned char)(*str1) - (unsigned char)(*str2);
}


int sgn(int a) {
    return a > 0 ? 1 : a < 0 ? -1 : 0;
}

#define TEST(sc, a, b) do { \
    if (sgn(sc(a, b)) != sgn(strcmp(a, b))) { \
        printf("%s(%s, %s) = %d, want %d\n", #sc, a, b, sc(a, b), strcmp((const char*)a, (const char*)b)); \
        fail = 1; \
    } } while(0)

int main(int argc, char *argv[]) {
    struct {
        const char *a;
        const char *b;
    }cases[] = {
        {"abc", "abc"},
        {"\x01", "\xff"},
        {"\xff", "\x01"},
        {"abc", "abd"},
        {"", ""},
    };
    int fail = 0;
    for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
        TEST(my_strcmp, cases[i].a, cases[i].b);
        TEST(my_strcmp1, cases[i].a, cases[i].b);
        TEST(my_strcmp2, cases[i].a, cases[i].b);
    }
    return fail;
}

(note: I put in some explicit signed in the implementations so that the code can be tested on compilers with unsigned char). Also, sorry about the macro -- this was a quick hack to test!

Paul Hankin
  • 54,811
  • 11
  • 92
  • 118