67

I want to write a program in C that displays each word of a whole sentence (taken as input) at a seperate line. This is what I have done so far:


void manipulate(char *buffer);
int get_words(char *buffer);

int main(){
    char buff[100];

    printf("sizeof %d\nstrlen %d\n", sizeof(buff), strlen(buff));   // Debugging reasons

    bzero(buff, sizeof(buff));

    printf("Give me the text:\n");
    fgets(buff, sizeof(buff), stdin);

    manipulate(buff);
    return 0;
}

int get_words(char *buffer){                                        // Function that gets the word count, by counting the spaces.
    int count;
    int wordcount = 0;
    char ch;

    for (count = 0; count < strlen(buffer); count ++){
        ch = buffer[count];
        if((isblank(ch)) || (buffer[count] == '\0')){                   // if the character is blank, or null byte add 1 to the wordcounter
            wordcount += 1;
        }
    }
    printf("%d\n\n", wordcount);
    return wordcount;
}

void manipulate(char *buffer){
    int words = get_words(buffer);
    char *newbuff[words];
    char *ptr;
    int count = 0;
    int count2 = 0;
    char ch = '\n';
    
    ptr = buffer;
    bzero(newbuff, sizeof(newbuff));

    for (count = 0; count < 100; count ++){
        ch = buffer[count];
        if (isblank(ch) || buffer[count] == '\0'){
            buffer[count] = '\0';
            if((newbuff[count2] = (char *)malloc(strlen(buffer))) == NULL) {
                printf("MALLOC ERROR!\n");
                exit(-1);
            }
            strcpy(newbuff[count2], ptr);
            printf("\n%s\n",newbuff[count2]);
            ptr = &buffer[count + 1];
            count2 ++;
        }
    }
}

Although the output is what I want, I have really many black spaces after the final word displayed, and the malloc() returns NULL so the MALLOC ERROR! is displayed in the end. I can understand that there is a mistake at my malloc() implementation, but I do not know what it is.

Is there another more elegant or generally better way to do it?

starball
  • 20,030
  • 7
  • 43
  • 238
redsolja
  • 673
  • 1
  • 5
  • 4
  • 68
    Ouch. Every time I read a C string manipulation question, I thank god that I don't have to use C for this. –  Dec 22 '10 at 20:19
  • 5
    Are you aware of the standard library function `strtok` (or the safer extension `strtok_r`)? – ephemient Dec 22 '10 at 20:20
  • Thanks for the info, i was not aware of the function. The point is that i do not know if i do need to use it since i want to get input from files, print out the words on screen and other file and remove the duplicate words etc. The man page does not make me understand much about the specific function. Also i would like to do it manually first, in order to gain a better grasp from the C language. – redsolja Dec 22 '10 at 20:30
  • 1
    @delnan - It's not that bad if you do it right. And @redsolja is not doing it right. String manipulation can be rather elegant in C... – asveikau Dec 22 '10 at 20:33
  • 1
    @delnan: Given the relationship between spaghetti code and programming questions, it makes more sense to thank the Flying Spaghetti Monster (FSM). You will note that FSM also stands for Finite State Machine. – Dave Jarvis Dec 22 '10 at 20:38
  • @delnan and C is the language of Unix.. an "all is text" OS.. – xealits Jul 13 '15 at 14:18

9 Answers9

111

http://www.cplusplus.com/reference/clibrary/cstring/strtok/

Take a look at this, and use whitespace characters as the delimiter. If you need more hints let me know.

From the website:

char * strtok ( char * str, const char * delimiters );

On a first call, the function expects a C string as argument for str, whose first character is used as the starting location to scan for tokens. In subsequent calls, the function expects a null pointer and uses the position right after the end of last token as the new starting location for scanning.

Once the terminating null character of str is found in a call to strtok, all subsequent calls to this function (with a null pointer as the first argument) return a null pointer.

Parameters

  • str
    • C string to truncate.
    • Notice that this string is modified by being broken into smaller strings (tokens). Alternativelly [sic], a null pointer may be specified, in which case the function continues scanning where a previous successful call to the function ended.
  • delimiters
    • C string containing the delimiter characters.
    • These may vary from one call to another.

Return Value

A pointer to the last token found in string. A null pointer is returned if there are no tokens left to retrieve.

Example

/* strtok example */
#include <stdio.h>
#include <string.h>

int main ()
{
  char str[] ="- This, a sample string.";
  char * pch;
  printf ("Splitting string \"%s\" into tokens:\n",str);
  pch = strtok (str," ,.-");
  while (pch != NULL)
  {
    printf ("%s\n",pch);
    pch = strtok (NULL, " ,.-");
  }
  return 0;
}
elixenide
  • 44,308
  • 16
  • 74
  • 100
Hortinstein
  • 2,667
  • 1
  • 22
  • 22
  • 1
    This link is much more helpful than the strtok man page, thanks much and i will look at it – redsolja Dec 22 '10 at 20:46
  • 12
    Be aware that `strtok()` manipulates its input `str` directly. If you don't want its value to change, copy it to another c-string first before calling `strtok()`. – Kevin Mar 02 '14 at 18:41
  • 1
    This gives me a segmentation fault at line `pch = strtok (NULL, " ,.-");`. Running on Ubuntu 14.04 with GCC 4.8.4 (it's an online environment). – Noein Feb 28 '17 at 06:47
  • Do not use `strtok()` in most cases, it is not thread safe and you get problems when you use it somewhere and in a subroutine called from there. – 12431234123412341234123 Dec 21 '20 at 12:18
  • @Kevin I'm curious - how and why does `strtok` modify it's `str` argument ? – user426 Dec 27 '21 at 04:44
4

For the fun of it here's an implementation based on the callback approach:

const char* find(const char* s,
                 const char* e,
                 int (*pred)(char))
{
    while( s != e && !pred(*s) ) ++s;
    return s;
}

void split_on_ws(const char* s,
                 const char* e,
                 void (*callback)(const char*, const char*))
{
    const char* p = s;
    while( s != e ) {
        s = find(s, e, isspace);
        callback(p, s);
        p = s = find(s, e, isnotspace);
    }
}

void handle_word(const char* s, const char* e)
{
    // handle the word that starts at s and ends at e
}

int main()
{
    split_on_ws(some_str, some_str + strlen(some_str), handle_word);
}
wilhelmtell
  • 57,473
  • 20
  • 96
  • 131
1

malloc(0) may (optionally) return NULL, depending on the implementation. Do you realize why you may be calling malloc(0)? Or more precisely, do you see where you are reading and writing beyond the size of your arrays?

ephemient
  • 198,619
  • 38
  • 280
  • 391
  • I am calling malloc() because i need to arrange space for the array of pointers newbuff. I am really confused about the r and w beyond the size of my arrays. The only thing i notice is the " ptr = &buffer[count + 1]; " which makes the ptr pointer to point one byte further from the array. It is important, i know and i can change this, but... What else? Thank you very much for your response. – redsolja Dec 22 '10 at 20:36
  • @redsolja: You only have `words` places in `newbuff`, yet `count` goes from 0 to 99. Your `malloc` strings do not include space for the trailing NUL. You don't handle consecutive spaces. You never count the last word in a sentence. And so on... – ephemient Dec 22 '10 at 20:45
1

Consider using strtok_r, as others have suggested, or something like:

void printWords(const char *string) {
    // Make a local copy of the string that we can manipulate.
    char * const copy = strdup(string);
    char *space = copy;
    // Find the next space in the string, and replace it with a newline.
    while (space = strchr(space,' ')) *space = '\n';
    // There are no more spaces in the string; print out our modified copy.
    printf("%s\n", copy);
    // Free our local copy
    free(copy);
}
Dave Jarvis
  • 30,436
  • 41
  • 178
  • 315
Stephen Canon
  • 103,815
  • 19
  • 183
  • 269
0

Something going wrong is get_words() always returning one less than the actual word count, so eventually you attempt to:

char *newbuff[words]; /* Words is one less than the actual number,
so this is declared to be too small. */

newbuff[count2] = (char *)malloc(strlen(buffer))

count2, eventually, is always one more than the number of elements you've declared for newbuff[]. Why malloc() isn't returning a valid ptr, though, I don't know.

Doddy
  • 1,311
  • 1
  • 17
  • 31
  • About the get_words(), this is why i print the result, and it does work correctly (I think).: sizeof 100 strlen 0 Give me the text: wordone wordtwo 1 ... Where wordone is word number 0 and wordtwo is word number 1 – redsolja Dec 22 '10 at 20:42
  • It may work, but you're accessing memory that you haven't declared, which is dangerous. `count2` eventually becomes greater than (words - 1) so the value being returned by `malloc()` is being written to an invalid memory location. Also, at that point, you're getting the length of an empty string, so the `malloc(0)` fails. – Doddy Dec 22 '10 at 20:51
0

You should be malloc'ing strlen(ptr), not strlen(buf). Also, your count2 should be limited to the number of words. When you get to the end of your string, you continue going over the zeros in your buffer and adding zero size strings to your array.

Bartosz Milewski
  • 11,012
  • 5
  • 36
  • 45
0

Just as an idea of a different style of string manipulation in C, here's an example which does not modify the source string, and does not use malloc. To find spaces I use the libc function strpbrk.

int print_words(const char *string, FILE *f)
{
   static const char space_characters[] = " \t";
   const char *next_space;

   // Find the next space in the string
   //
   while ((next_space = strpbrk(string, space_characters)))
   {
      const char *p;

      // If there are non-space characters between what we found
      // and what we started from, print them.
      //
      if (next_space != string)
      {
         for (p=string; p<next_space; p++)
         {
            if(fputc(*p, f) == EOF)
            {
               return -1;
            }
         }

         // Print a newline
         //
         if (fputc('\n', f) == EOF)
         {
            return -1;
         }
      }

      // Advance next_space until we hit a non-space character
      //
      while (*next_space && strchr(space_characters, *next_space))
      {
         next_space++;
      }

      // Advance the string
      //
      string = next_space;
   }

   // Handle the case where there are no spaces left in the string
   //
   if (*string)
   {
      if (fprintf(f, "%s\n", string) < 0)
      {
         return -1;
      }
   }

   return 0;
}
asveikau
  • 39,039
  • 2
  • 53
  • 68
0

you can scan the char array looking for the token if you found it just print new line else print the char.

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>

    int main()
    {
        char *s;
        s = malloc(1024 * sizeof(char));
        scanf("%[^\n]", s);
        s = realloc(s, strlen(s) + 1);
        int len = strlen(s);
        char delim =' ';
        for(int i = 0; i < len; i++) {
            if(s[i] == delim) {
                printf("\n");
            }
            else {
                printf("%c", s[i]);
            }
        }
        free(s);
        return 0;
    }
Fahad Alotaibi
  • 416
  • 3
  • 9
-1
char arr[50];
gets(arr);
int c=0,i,l;
l=strlen(arr);

    for(i=0;i<l;i++){
        if(arr[i]==32){
            printf("\n");
        }
        else
        printf("%c",arr[i]);
    }