17

I need to use strtok to read in a first and last name and seperate it. How can I store the names where I can use them idependently in two seperate char arrays?

#include <stdio.h>
#include <string.h>

int main ()
{
  char str[] ="test string.";
  char * test;
  test = strtok (str," ");
  while (test != NULL)
  {
    printf ("%s\n",test);
    test= strtok (NULL, " ");
  }
  return 0;
}
Kerrek SB
  • 464,522
  • 92
  • 875
  • 1,084
shinjuo
  • 20,498
  • 23
  • 73
  • 104
  • can I use those to read a char array until a space? – shinjuo Nov 12 '11 at 19:00
  • No, I meant "use them in conjunction with `strtok`". I.e. copy the token (pointed to by `test`) into your target string. – Kerrek SB Nov 12 '11 at 19:01
  • @KerrekSB though using `strchr` and `strndup` would be faster and more flexible (no need to clobber the input) – sehe Nov 12 '11 at 19:27
  • @sehe: True. Many ways to skin this cat. The OP seems to have decided on `strtok` already, so I just went with it... – Kerrek SB Nov 12 '11 at 19:29
  • 1
    @KerrekSB: I added a strtok free way to skin this cat none-the-less. Removes all the problems associated with strtok. – sehe Nov 12 '11 at 19:46

7 Answers7

20

Here is my take at a reasonably simple tokenize helper that

  • stores results in a dynamically growing array
  • null-terminating the array
  • keeps the input string safe (strtok modifies the input string, which is undefined behaviour on a literal char[], at least I think in C99)

To make the code re-entrant, use the non-standard strtok_r

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

char** tokenize(const char* input)
{
    char* str = strdup(input);
    int count = 0;
    int capacity = 10;
    char** result = malloc(capacity*sizeof(*result));

    char* tok=strtok(str," "); 

    while(1)
    {
        if (count >= capacity)
            result = realloc(result, (capacity*=2)*sizeof(*result));

        result[count++] = tok? strdup(tok) : tok;

        if (!tok) break;

        tok=strtok(NULL," ");
    } 

    free(str);
    return result;
}

int main ()
{
    char** tokens = tokenize("test string.");

    char** it;
    for(it=tokens; it && *it; ++it)
    {
        printf("%s\n", *it);
        free(*it);
    }

    free(tokens);
    return 0;
}

Here is a strtok-free reimplementation of that (uses strpbrk instead):

char** tokenize(const char* str)
{
    int count = 0;
    int capacity = 10;
    char** result = malloc(capacity*sizeof(*result));

    const char* e=str;

    if (e) do 
    {
        const char* s=e;
        e=strpbrk(s," ");

        if (count >= capacity)
            result = realloc(result, (capacity*=2)*sizeof(*result));

        result[count++] = e? strndup(s, e-s) : strdup(s);
    } while (e && *(++e));

    if (count >= capacity)
        result = realloc(result, (capacity+=1)*sizeof(*result));
    result[count++] = 0;

    return result;
}
sehe
  • 374,641
  • 47
  • 450
  • 633
  • 2
    I think the `realloc` line should have `sizeof(*result)`, not `sizeof(result)`, and the first argument should obviously be `result` and not `realloc`. – Arkku Nov 12 '11 at 19:21
  • Added a `strtok`-free version (that doesn't need to modify it's input, using `strpbrk`. This is going to be more efficient). – sehe Nov 12 '11 at 19:51
  • strdup + strndup are not C standard, also strtok_r is'nt, it's only POSIX. – user411313 Nov 12 '11 at 20:11
  • @user411313 AFAICT both [strdup](http://pubs.opengroup.org/onlinepubs/009604499/functions/strdup.html) and [strtok_r](http://pubs.opengroup.org/onlinepubs/009604499/functions/strtok_r.html) are part of [IEEE Std 1003.1,2004 Edition](http://www.unix.org/version3/ieee_std.html). Obviously, good point on the `strndup` but, it is easily wrapped (or just use the `strtok` version) – sehe Nov 12 '11 at 20:32
  • `char str[] ="test string."` here `str` is a `char[13]` and perfectly safe to modify. (It was unclear to me what you meant by "char[] literal") – u0b34a0f6ae Nov 12 '11 at 22:00
  • @kaizer.se: it could be me confusing C99 and C++03 (I'm a C++ guy mostly) – sehe Nov 12 '11 at 22:03
7

Do you need to store them separately? Two pointers into a modified char array will yield two separate perfectly usable strings.

That is we transform this:

char str[] ="test string.";

Into this:

char str[] ="test\0string.";
             ^     ^
             |     |
char *s1 -----     |
char *s2 -----------

.

#include <stdio.h>
#include <string.h>

int main ()
{
  char str[] ="test string.";
  char *firstname = strtok(str, " ");
  char *lastname = strtok(NULL, " ");
  if (!lastname)
    lastname = "";
  printf("%s, %s\n", lastname, firstname);
  return 0;
}
u0b34a0f6ae
  • 48,117
  • 14
  • 92
  • 101
4

What about using strcpy:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define MAX_NAMES 2

int main ()
{
  char str[] ="test string.";
  char *names[MAX_NAMES] = { 0 };
  char *test;
  int i = 0;

  test = strtok (str," ");
  while (test != NULL && i < MAX_NAMES)
  {
    names[i] = malloc(strlen(test)+1);
    strcpy(names[i++], test);
    test = strtok (NULL, " ");
  }

  for(i=0; i<MAX_NAMES; ++i)
  {
    if(names[i])
    {
      puts(names[i]);
      free(names[i]);
      names[i] = 0;
    }
  }
  return 0;
}

It contains much clutter to maintain a complete program and clean its resources, but the main point is to use strcpy to copy each token into its own string.

Christian Rau
  • 45,360
  • 10
  • 108
  • 185
2
#include <stdio.h>
#include <string.h>
#include <stdlib.h>


char** split(const char *str, const char *delimiter, size_t *len){
    char *text, *p, *first, **array;
    int c;
    char** ret;

    *len = 0;
    text=strdup(str);
    if(text==NULL) return NULL;
    for(c=0,p=text;NULL!=(p=strtok(p, delimiter));p=NULL, c++)//count item
        if(c==0) first=p; //first token top

    ret=(char**)malloc(sizeof(char*)*c+1);//+1 for NULL
    if(ret==NULL){
        free(text);
        return NULL;
    }
    strcpy(text, str+(first-text));//skip until top token
    array=ret;

    for(p=text;NULL!=(p=strtok(p, delimiter));p=NULL){
        *array++=p;
    }
    *array=NULL;
    *len=c;
    return ret;
}

void free4split(char** sa){
    char **array=sa;

    if(sa!=NULL){
        free(array[0]);//for text
        free(sa);      //for array
    }
}

int main(void){
    char str[] ="test string.";
    char **words;
    size_t len=0;
    int i;

    words = split(str, " \t\r\n,.", &len);

/*
    for(char **wk = words; *wk ;wk++){
        printf("%s\n", *wk);
    }
*/
    for(i = 0;i<len;++i){
        printf("%s\n", words[i]);
    }
    free4split(words);
    return 0;
}
/* result:
test
string
*/
BLUEPIXY
  • 39,699
  • 7
  • 33
  • 70
1

Copy the results from strtok to a new buffer using a function such as

/*
 * Returns a copy of s in freshly allocated memory.
 * Exits the process if memory allocation fails.
 */
char *xstrdup(char const *s)
{
    char *p = malloc(strlen(s) + 1);
    if (p == NULL) {
        perror("memory allocation failed");
        exit(1);
    }
    strcpy(p, s);
    return p;
}

Don't forget to free the return values when you're done with them.

Fred Foo
  • 355,277
  • 75
  • 744
  • 836
1

IMO, you don't need (and probably don't want) to use strtok at all (as in, "for this, or much of anything else"). I think I'd use code something like this:

#include <string.h>
#include <stdlib.h>

static char *make_str(char const *begin, char const *end) { 
    size_t len = end-begin;
    char *ret = malloc(len+1);
    if (ret != NULL) {
        memcpy(ret, begin, len);
        ret[len]='\0';
    }
    return ret;
}

size_t tokenize(char *tokens[], size_t max, char const *input, char const *delims) { 
    int i;
    char const *start=input, *end=start;

    for (i=0; *start && i<max; i++) {
        for ( ;NULL!=strchr(delims, *start); ++start)
            ;
        for (end=start; *end && NULL==strchr(delims, *end); ++end)
            ;
        tokens[i] = make_str(start, end);
        start = end+1;
    }
    return i;
}

#ifdef TEST

#define MAX_TOKENS 10

int main() { 
    char *tokens[MAX_TOKENS];
    int i;
    size_t num = tokenize(tokens, MAX_TOKENS, "This is a longer input string ", " ");
    for (i=0; i<num; i++) {
        printf("|%s|\n", tokens[i]);
        free(tokens[i]);
    }
    return 0;
}

#endif
Jerry Coffin
  • 476,176
  • 80
  • 629
  • 1,111
0

U can do something like this too.

    int main ()
    {
    char str[] ="test string.";

    char * temp1;
    char * temp2; 

    temp1 = strtok (str," ");

    temp2 = strchr(str, ' '); 
    if (temp2 != NULL)
        temp2++;

    printf ("Splitted string :%s, %s\n" , temp1 , temp2);
    return 
    }
Thulasi
  • 23
  • 6