1

I am trying to tokenize a string to give an array of strings but it seems like my code is wrong.

Here is my code:

asmInstruction *tokenizeLine(char *charLine) {
    int words = countTokens(charLine);
    char *tokens = (char*) malloc(MAX_LINE_LENGTH);

    asmInstruction *instr = (asmInstruction*) malloc(sizeof(asmInstruction*));
    instr->args = (char**) malloc(MAX_LINE_LENGTH);

    int count = 1;
    tokens = strtok(charLine, " ,");
    while (count <= words) {
        tokens = strtok(NULL, " ,");
        instr->args[count - 1] = (char*)malloc(MAX_LINE_LENGTH);
        instr->args[count - 1] = tokens;
        ++count;
    }

    free(tokens);
    return instr;
}

 /* Reads a file and returns the number of lines in this file. */
    uint32_t countLines(FILE *file) {
    uint32_t lines = 0;
    int32_t c;
    while (EOF != (c = fgetc(file))) {
        if (c == '\n') {
            ++lines;
        }
    }
    /* Reset the file pointer to the start of the file */
    rewind(file);
    return lines;
}

And the structure:

typedef struct {
    char **args; /* An array of strings*/
} asmInstruction;

My main is here:

int main() {
    char s[] = "ldr r2,r1";
    asmInstruction *instr = tokenizeLine(s);
    printf("%s", instr->args[0]);
}

/* Counts the number of tokens in a line */
uint32_t countTokens(char line[]) {
    /* The correct way to do this! */
    uint32_t numberOfTokens = 0;
    /* Split at spaces and commas */
    char *tokens = strtok(line, " ,");
    while (tokens != NULL) {
        tokens = strtok(NULL, " ,");
        numberOfTokens++;
    }
    return numberOfTokens;
}

So, this should print ldr. However, it prints null. If I loop over the tokens it doesn't print them out but null. I'm expecting to print out the tokens

ldr r2 r1

But only the first one gets printed out.

It seems like instr->args[count-1] never gets assigned something because apparently tokens hasn't been assigned something either.

Why is that? Thanks.

Nubcake
  • 449
  • 1
  • 8
  • 21
  • 2
    sizeof(asmInstruction*) != sizeof(asmInstruction) – Adriano Repetti Jun 05 '15 at 16:10
  • ... or just `sizeof *instr`. The memory leak from `instr->args[count - 1] = tokens` that throws away the allocation from the prior line isn't helping your cause either, nor is `free(tokens);`, which invokes undefined behavior, as it no longer points to the original allocation address by the time your loop is done with it. – WhozCraig Jun 05 '15 at 16:11
  • I changed it back but it still gives me null , I think it is a problem with allocating memory for the individual strings? – Nubcake Jun 05 '15 at 16:14
  • Additionally, after `instr->args[count - 1] =(char*)malloc(MAX_LINE_LENGTH);` you have to use strcpy or the like to copy the result into the memory allocated, as just assigning to the temporary token leaks the memory allocated and then that memory you now point to with `token` is later trashed because it points to a static buffer in the strtok implementation instead of the memory you allocated, which will change the result after each call to strtok. – JohnH Jun 05 '15 at 16:15
  • I tried strcpy(instr->args[count - 1],tokens); but now it seg faults on this line. – Nubcake Jun 05 '15 at 16:23
  • in the function: countTokens(), in the while() loop, the order on the statements is incorrect. they should be: numberOfTokens++; tokens = strtok(NULL, " ,"); – user3629249 Jun 06 '15 at 17:32

2 Answers2

1

the following code:

handles errors
has many/ most of the logic errors corrected
properly defines the struct asmInstruction
performs the functionality indicated in the question.

suggest elimination of the struct asmInstruction as it is not needed, just use a char** args = NULL; in the tokenizeLine() function and return args.

it is not necessary, nor desirable to malloc memory for 'tokens'. Because that memory pointer will be overlayed each time 'tokens' is set from the returned value from strtok() If a malloc is done, then there will be a memory leak.

In the following code, there still needs to be some additional logic for freeing malloc'd memory and for closing the file before calling 'exit( EXIT_FAILURE );'

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>


struct asmInstruction
{
    char **args; /* An array of strings*/
};

#define MAX_LINE_LENGTH (100)

// prototypes
uint32_t countTokens(char line[]);
uint32_t countLines(FILE *file);
struct asmInstruction *tokenizeLine(char *charLine);

int main( void )
{
    char s[] = "ldr r2,r1";
    struct asmInstruction *instr = tokenizeLine(s);
    printf("%s", instr->args[0]);
    return( 0 );
} // end function: main


/* Counts the number of tokens in a line */
uint32_t countTokens(char line[])
{
    /* The correct way to do this! */
    uint32_t numberOfTokens = 0;
    /* Split at spaces and commas */
    char *tokens = strtok(line, " ,");

    while (tokens != NULL)
    {
        tokens = strtok(NULL, " ,");
        numberOfTokens++;
    }
    return numberOfTokens;
} // end function: countTokens


struct asmInstruction *tokenizeLine(char *charLine)
{
    int words = countTokens(charLine);

    char *tokens = NULL;

    struct asmInstruction *instr = NULL;
    if( NULL == (instr = malloc(sizeof( struct asmInstruction)) ) )
    { // then malloc failed
        perror( "malloc for struct asmInstruction failed" );
        exit( EXIT_FAILURE );
    }

    // implied else, malloc successful

    instr->args = NULL;
    if( NULL == (instr->args = malloc(words*sizeof(char*)) ) )
    { // then malloc failed
        perror( "malloc for array of char pointers failed:" );
        exit( EXIT_FAILURE );
    }

    // implied else, malloc successful

    memset( instr->args, '\0', words*sizeof(char*) );

    int count = 0;
    tokens = strtok(charLine, " ,");

    while ( tokens )
    {
        if( NULL == (instr->args[count] = malloc(strlen(tokens)+1) ) )
        { // then, malloc failed
            perror( "malloc for arg failed" );
            exit( EXIT_FAILURE );
        }

        // implied else, malloc successful

        strcpy(instr->args[count], tokens );
        ++count;
        tokens = strtok(NULL, " ,");
    } // end while

    return instr;
} // end function: tokenizeLine


 /* Reads a file and returns the number of lines in this file. */
    uint32_t countLines(FILE *file)
    {
    uint32_t lines = 0;
    int32_t c;

    while (EOF != (c = fgetc(file)))
    {
        if (c == '\n') {
            ++lines;
        }
    }

    /* Reset the file pointer to the start of the file */
    rewind(file);
    return lines;
} // end function: countLines
user3629249
  • 16,402
  • 1
  • 16
  • 17
0
asmInstruction *tokenizeLine(char *charLine) {
    int words = countTokens(charLine);
    char *tokens;//don't need malloc for this, because just pointer holder.

    asmInstruction *instr = (asmInstruction*) malloc(sizeof(asmInstruction));//allocate size isn't sizeof(asmInstruction*)
    instr->args = (char**) malloc((words+1) * sizeof(char*));//+1 for NULL, or add member E.g instr->numOfWords = words

    int count = 0;
    tokens = strtok(charLine, " ,");
    while (tokens) {
        instr->args[count] = malloc(strlen(tokens)+1);
        strcpy(instr->args[count++], tokens);
        //or  process for each line
        //instr->args[count++] = tokens;//no need allocate for word 
        tokens = strtok(NULL, " ,");//get next tokens
    }
    instr->args[count] = NULL;//set sentinel

    return instr;
}
BLUEPIXY
  • 39,699
  • 7
  • 33
  • 70
  • I incorporated the changes but it prints null for the second element , I'm expecting it to print out all the tokens. I can't seem to figure out why it doesn't print out the other tokens? – Nubcake Jun 05 '15 at 16:42
  • @Nubcake I get all elements. [test code](http://ideone.com/QNRQNB) You have something wrong to other. Does `countTokens` return `3` in case of `"ldr r2,r1"` ? – BLUEPIXY Jun 05 '15 at 16:48
  • My guess is `countTokens` changed the string. – BLUEPIXY Jun 05 '15 at 16:56
  • I posted the code to countTokens, yes I forgot I was also using strtok there too. – Nubcake Jun 05 '15 at 17:00
  • @Nubcake your `countTokens` change `"ldr r2,r1"` to `"ldr\0r2\0r1"`. then `asmInstruction` process `"ldr"`. `countTokens` don't change the string for processing after. – BLUEPIXY Jun 05 '15 at 17:04