2

Code snippet:

char str[] = "String1::String2:String3:String4::String5";
char *deli = "::";
char *token = strtok(str,deli);

while(token != NULL)
{
  printf("Token= \"%s\"\n", token);
  token=strtok(NULL,deli);
}

The above code snippet produces the output:

Token="String1"
Token="String2"
Token="String3"
Token="String4"
Token="String5"

but I want the output to be:

Token="String1"
Token="String2:String3:String4"
Token="String5"

I know that I am not getting the expected output because each character in the second argument of strtok is considered as a delimiter.

To get the expected output, I've written a program that uses strstr(and other things) to split the given string into tokens such that I get the expected output. Here is the program:

#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>

int myStrtok(char* str,char* deli)
{
    if(str==NULL || deli==NULL)
        return -1;

    int tokens=0;
    char *token;
    char *output=str;


    while((token=strstr(output,deli))!=NULL)
    {

        bool print=true;

        if(output != token)
        {
            printf("Token = \"");
            tokens++;
            print=false;
        }

        while(output != token)
        {
            putchar(*output);
            output++;
        }

        if(print==false)
            printf("\"\n");
        output+=strlen(deli);
    }

    if(strlen(output)>0)
    {
        printf("Token = \"%s\"",output);
        tokens++;
    }
    printf("\n\n");
    return tokens;
}

int main(void)
{
    char str[]="One:1:Two::Three::::";
    char *deli="::";

    int retval;
    printf("Original string=\"%s\"\n\n",str);

    if((retval=myStrtok(str,deli))==-1)
        printf("The string or the delimeter is NULL\n");
    else
        printf("Number of tokens=%d\n", retval);
    return(EXIT_SUCCESS);
}

The above program produces the expected output.

I'm wondering if there are any easier/simpler ways to do it. Are there any?

Spikatrix
  • 20,225
  • 7
  • 37
  • 83
  • Do you want to keep the same style as `strtok`, multiple calls subsequently given `NULL` instead of delimiter? Or would you go with something returning an array? – Eregrith Apr 24 '15 at 12:49
  • I'd change the function prototype to have `char const * deli`. Also, it's supposed to return a `char *`, not an `int`. – EOF Apr 24 '15 at 12:50
  • Duplicate to http://stackoverflow.com/questions/7079694/is-there-a-way-to-split-a-string-on-multiple-characters-in-c – Anshul Apr 24 '15 at 12:51
  • @Eregrith , I don't really care if the style has `NULL` as the first argument or not. The same goes with returning an array. – Spikatrix Apr 24 '15 at 12:51
  • @EOF , Good point. The declaration of `deli` in `main` can also be done the same way to avoid problems. – Spikatrix Apr 24 '15 at 12:52
  • @CoolGuy I'd go with `char **foo(const char* delim, char *str)`, looping through `str`, `strncmp`-ing on each char with `delim`, replacing it in `str` if found with `\0`es and storing pointers to after each of those delim blocks. You have to modify `str` though so it might not be usable everytime depending on what you need to do. – Eregrith Apr 24 '15 at 12:59
  • @Eregrith , I see. Could you post an answer? – Spikatrix Apr 24 '15 at 13:01
  • possible duplicate of [How to extract the string if we have have more than one delimiters?](http://stackoverflow.com/questions/22827998/how-to-extract-the-string-if-we-have-have-more-than-one-delimiters) – Jongware Apr 24 '15 at 13:08

3 Answers3

4

A string-delimiter function that uses strtok's prototype and mimicks its usage:

char *strtokm(char *str, const char *delim)
{
    static char *tok;
    static char *next;
    char *m;

    if (delim == NULL) return NULL;

    tok = (str) ? str : next;
    if (tok == NULL) return NULL;

    m = strstr(tok, delim);

    if (m) {
        next = m + strlen(delim);
        *m = '\0';
    } else {
        next = NULL;
    }

    return tok;
}
M Oehm
  • 28,726
  • 3
  • 31
  • 42
  • This produces an extra token when I have `::` at the end or start of the string `str` – Spikatrix Apr 25 '15 at 12:13
  • Yes, that's by design. Empty tokens at the beginning, at the end or between delimiters are extracted. I admit that this isn't quite `strtok`'s usage, but unlike `strtok`, this function can't match an arbitrary stretch of delimiters; it must match the delimiter exactly. (This behaviour is in acordance with how Python implements its `split`.) – M Oehm Apr 25 '15 at 13:32
  • You can ignore empty tokens by adding this before the `return` statement in the last line: `if (m == tok || *tok == '\0') return strtokm(NULL, delim);` – M Oehm Apr 25 '15 at 13:33
1

If you don't care about the same usage as strtok I would go with this:

// "String1::String2:String3:String4::String5" with delimiter "::" will produce
// "String1\0\0String2:String3:String4\0\0String5"
// And words should contain a pointer to the first S, the second S and the last S.
char **strToWordArray(char *str, const char *delimiter)
{
  char **words;
  int nwords = countWords(str, delimiter); //I let you decide how you want to do this
  words = malloc(sizeof(*words) * (nwords + 1));

  int w = 0;
  int len = strlen(delimiter);
  words[w++] = str;
  while (*str != NULL)
  {
    if (strncmp(str, delimiter, len) == 0)
    {
      for (int i = 0; i < len; i++)
      {
        *(str++) = 0;
      }
      if (*str != 0)
        words[w++] = str;
      else
        str--; //Anticipate wrong str++ down;
    }
    str++;
  }
  words[w] = NULL;
  return words;
}
Eregrith
  • 4,263
  • 18
  • 39
  • I get `Warning: comparison with pointer and integer` here: `while (*str != NULL)`. Should I use `while (*str != '\0')` or `while (*str)`? And should `countWords` return the number of tokens or the number of words in `str`? What should it return in case of `str` being `"String1::String2:String3:String4::String5"`? – Spikatrix Apr 25 '15 at 12:40
  • Ah yes sorry it's indeed what you said. And countwords should return the number of "tokens", for your example it's 3 – Eregrith Apr 25 '15 at 12:57
  • 1
    @CoolGuy You tried with a string litteral which is readonly, and your printing loop misses an increment. – Eregrith Apr 26 '15 at 10:34
  • Oops. I changed `words[i]` to `words[i++]`in the `printf` in the `while` loop in `main`. Still doesn't work. I used `"::"` as a string literal as It won't be modified. Now the program runs, but doesn't split the string the way I want. It just prints `Token="String1::String2:String3:String4::String5" ` – Spikatrix Apr 26 '15 at 10:48
  • Also added `else str--` as in your edit. Didn't work. – Spikatrix Apr 26 '15 at 13:44
  • 1
    @CoolGuy You string needs to not be readonly for this method to work, as I told you. A string literal won't do it. Try using `char *str = strdup("Sring1::String2:String3:String4::String5")` – Eregrith Apr 26 '15 at 15:52
  • But `str` isn't read only as it is an array. I've also tried with `strdup` and it didn't work. – Spikatrix Apr 27 '15 at 06:25
  • @CoolGuy omg DUH... Sorry, I forgot `strcmp` expected `\0`. I had a feeling I should use `strncmp` but did not remember why. I fixed my answer – Eregrith Apr 27 '15 at 07:32
  • Ok. [It worked now](http://rextester.com/RELQ24046). Thanks. I too had felt confused about why you had used `strcmp`. – Spikatrix Apr 27 '15 at 07:38
  • @CoolGuy Yes, sorry. Not enough coffee – Eregrith Apr 27 '15 at 07:50
  • Should there be a free somewhere to balance the malloc? – Jiminion Mar 06 '19 at 16:19
  • @Jiminion No sir, because what is malloc'd is returned the responsibility becomes that of the caller to free the allocated memory when done. For that reason make sure your function names reflect in some consistent way if they allocated their result and you need to free it. – Eregrith Mar 06 '19 at 16:22
0

code derived from strsep https://code.woboq.org/userspace/glibc/string/strsep.c.html

char *strsepm( char **stringp, const char *delim ) {

    char *begin, *end;

    begin = *stringp;

    if  ( begin == NULL ) return NULL;

    /* Find the end of the token.  */
    end = strstr( begin , delim );

    if ( end != NULL ) {

        /* Terminate the token and set *STRINGP past NUL character.  */
        *end = '\0';

        end  += strlen( delim );

        *stringp = end;

    } else {

        /* No more delimiters; this is the last token.  */
        *stringp = NULL;  
    }

    return begin;
}

int main( int argc , char *argv [] ) {

    char            *token_ptr;
    char            *token;
    const char      *delimiter = "&&";

    char            buffer [ 256 ];

    strcpy( buffer , " && Hello && Bernd && waht's && going && on &&");

    token_ptr = buffer;

    while ( ( token = strsepm( &token_ptr , delimiter ) ) != NULL ) {

        printf( "\'%s\'\n" , token );

    }
}

Result:

' '   
' Hello '    
' Bernd '    
' waht's '    
' going '    
' on '    
''
sg7
  • 6,108
  • 2
  • 32
  • 40