0

I'm parsing char arrays from the serial bus and copying their contents into global arrays for handling into other functions. I'm noticing odd behavior when I use strcpy() and strtok() repeatedly. With the Arduino Mega, can repeated calls to strcpy corrupt the memory address?

This is for a low-level instrument, having the Arduino act as the local microcontroller taking-in commands thru serial. I've done a few different ways of initializing the global arrays, including;

//char testDate = "YYmmDD"; //failed
//char testDate[6] = "";
//char testDate[6] = "YYmmDD";
//char testDate[6] = {'Y', 'Y', 'm', 'm', 'D', 'D'};
//char testTime = "HHMMSS"; //failed
//char testTime[6] = "";
//char testTime[6] = "HHMMSS";
//char testTime[6] = {'H', 'H', 'M', 'M', 'S', 'S'};
//char logfile[24] = "BATCH_YYmmDD_HHMMSS$.txt"; // 20 + 4, exact size
//char logfile[24] = {' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' '};

//char testDate[6] = ""; // Null
//char testTime[6] = ""; // Null
char testDate[6] = {'Y', 'Y', 'm', 'm', 'D', 'D'};
char testTime[6] = {'H', 'H', 'M', 'M', 'S', 'S'}; // is appending?
char logfile[24] = ""; // Null

The demo code, in it's simplest form;

#include <String.h>

char gva_logfile[24] = {' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' '};
char gva_testDate[6] = {'Y', 'Y', 'm', 'm', 'D', 'D'}; // is appending?
char gva_testTime[6] = {'H', 'H', 'M', 'M', 'S', 'S'}; 

char lva_testDate[6];

//String y = "BATCH|190117;151442$";
//int lan = y.length(); // should be 20

char x[20] = "BATCH|190117;151442$";
int lan = strlen(x);
//y.toCharArray(x, lan);
//strcpy(x, y);
//y.toCharArray(x, 20);

//strcpy(x, "BATCH|190117;151442$");

void setup() {
  // put your setup code here, to run once:
  Serial.begin(9600);

  Serial.print("<");
  for(int i=0; i<6; i++){
    Serial.print(gva_testDate[i]); // works
    //Serial.print(gva_testTime[i]); // works
  }
  Serial.println(">"); 
}

void loop() {
  // put your main code here, to run repeatedly:
  char tele[6] = {' ', ' ', ' ', ' ', ' ', ' '};
  while(1){
    char flarb[lan];
    strcpy(flarb, x);
    //Serial.println(flarb);

    if(strstr(flarb, "BATCH|")){
      char * strtokIndx;
      strtokIndx = strtok(x, "|");
      //strcpy(tele, strtokIndx);     // did nothing?
      strtokIndx = strtok(NULL, ";");
      strcpy(gva_testDate, strtokIndx); // missing?

      Serial.println(gva_testDate); // Not missing
      for(int i=0; i<6; i++){
        lva_testDate[i] = gva_testDate[i];  
      }      

      strtokIndx = strtok(NULL, "$");
      strcpy(gva_testTime, strtokIndx); // is fine...

      Serial.println(gva_testDate); // MISSING
      Serial.println(lva_testDate);

      if(strstr(gva_testDate, "YYmmDD")!=NULL || strstr(gva_testTime, "HHMMSS")!=NULL){
          //if((gva_testDate == "YYmmDD") || (gva_testTime == "HHMMSS")){  
          char io[28]; // 16 + 2*6
          sprintf(io, "063 ERROR: %s,%s", gva_testDate, gva_testTime);
          //logArdData(io);
          Serial.print("<");
          Serial.print(io);
          Serial.println(">");
          Serial.flush();
        }
        //else if((strstr(gva_testDate, "YYmmDD") && strstr(gva_testTime, "HHMMSS"))==NULL){
        else if((strstr(gva_testDate, "YYmmDD")==NULL && strstr(gva_testTime, "HHMMSS")==NULL)){  
          //else if((gva_testDate != "YYmmDD") && (gva_testTime != "HHMMSS")){  
          char io[26]; // 14 + 2*6

          //sendArdData(gva_testDate); // is combinined?
          //sendArdData(gva_testTime); // still itself

          sprintf(io, "Assigned %s,%s", gva_testDate, gva_testTime); // is combining testDate and testTime, then printing testTime?
          Serial.print("<");
          Serial.print(io);
          Serial.println(">");
          //sendArdData(io);
          //logArdData(io);
          Serial.flush();        
        }
    }
  }
}

When I try to re-print the stored value of gva_testDate after assigning gva_testTime, somehow gva_testDate has become NULL instead of 190117. I expected that gva_testDate would retain its newly-assigned value, however this does not appear to be the case.

The C++ Reference pages for strcpy() http://www.cplusplus.com/reference/cstring/strcpy/?kw=strcpy and strtok() http://www.cplusplus.com/reference/cstring/strtok/ do not make mention of memory-corruption issues/warnings, so I would not expect repeated calls to be modifying the original string x or the parsed char arrays gva_testDate & gva_testTime.

gre_gor
  • 6,669
  • 9
  • 47
  • 52
  • I'm hoping to understand this behavior so I can understand why I'm getting similar issues on a more complex, `String` and `char` -heavy script... – Thomas Solley Jan 18 '19 at 15:08
  • 2
    The first instance of undefined behavior in your code is at `int lan = strlen(x);`. `x` is not a valid string. The second instance is `strcpy(flarb, x)`; the source is not a valid string and the destination is too small. – melpomene Jan 18 '19 at 16:22
  • Keep the nul-terminator in mind. `"YYmmDD"` for example is 7 bytes long. – alain Jan 18 '19 at 16:25
  • 2
    With `char testDate[6] = {'Y', 'Y', 'm', 'm', 'D', 'D'};` the size is correct, but there is no nul-terminator, so you can't use `strcpy` and `strtok` with it. – alain Jan 18 '19 at 16:30
  • @melpomene can you expand-upon what you mean by "`x` is not a valid string"? Am I not creating a 20-char array of name `x` and assigning it the values `BATCH|190117;151442$`? – Thomas Solley Jan 18 '19 at 16:35
  • 1
    A char array by itself is not a string. A string includes a terminating NUL byte. – melpomene Jan 18 '19 at 16:37
  • You are not dealing with C++ `String` that has built-in properties to store it's length. C strings are simply arrays of characters, and there must be a NULL byte(0x00) to signal it's end, without it strcpy and the likes of it won't know when to stop. So you must allocate 1 extra byte for the null byte. The guys that commented on the issue before should post an answer, no? – Arthur Moraes Do Lago Jan 18 '19 at 16:48
  • 1
    @ThomasSolley If you think that's a valid string, explain how `strlen` would work on it. How would it know when it had reached the end of the string? – David Schwartz Jan 18 '19 at 19:34

2 Answers2

1

I did a test and found out exactely what is happening here.

It is the issue with strtok, and also the fact that you are giving character arrays to all these string functions without space for a string dilimiter, ie '0'.

This is the test functio which I wrote on a computer to test stuff out

void printAllObjects(int location,char *x, char* strtokIndx, char *gva_testDate,char *gva_testTime)
{
    printf("At location %d\n x pointer %d, string %s\n"
            " strtokIndx pointer %d, string %s\n "
            "gva_testDate pointer %d, string %s\n "
            "gva_testTime pointer %d, string %s\n ",location,x,x,strtokIndx,strtokIndx,gva_testDate,gva_testDate,gva_testTime,gva_testTime);
}

Then I copied all of your arduino code over to my computer commented out all serial commands, added the stdio header, and used the above funcition at several locations.

#include <cstdlib>
#include <String.h>
#include <stdio.h>

using namespace std;

/*
 * 
 */

char gva_logfile[24] = {' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' '};
char gva_testDate[6] = {'Y', 'Y', 'm', 'm', 'D', 'D'}; // is appending?
char gva_testTime[6] = {'H', 'H', 'M', 'M', 'S', 'S'}; 

char lva_testDate[6];

//String y = "BATCH|190117;151442$";
//int lan = y.length(); // should be 20

char x[] = "BATCH|190117;151442$";
int lan = strlen(x);
//y.toCharArray(x, lan);
//strcpy(x, y);
//y.toCharArray(x, 20);

//strcpy(x, "BATCH|190117;151442$");


void printAllObjects(int location,char *x, char* strtokIndx, char *gva_testDate,char *gva_testTime)
{
    printf("At location %d\n x pointer %d, string %s\n"
            " strtokIndx pointer %d, string %s\n "
            "gva_testDate pointer %d, string %s\n "
            "gva_testTime pointer %d, string %s\n ",location,x,x,strtokIndx,strtokIndx,gva_testDate,gva_testDate,gva_testTime,gva_testTime);
}

void setup() {
  // put your setup code here, to run once:
//  Serial.begin(9600);
//
//  Serial.print("<");
//  for(int i=0; i<6; i++){
//    Serial.print(gva_testDate[i]); // works
//    //Serial.print(gva_testTime[i]); // works
//  }
//  Serial.println(">"); 
}



void loop() {
  // put your main code here, to run repeatedly:
  char tele[6] = {' ', ' ', ' ', ' ', ' ', ' '};
  while(1){
    char flarb[lan];
    strcpy(flarb, x);
    //Serial.println(flarb);

    if(strstr(flarb, "BATCH|")){
      char * strtokIndx;
      printAllObjects(1,x,strtokIndx,gva_testDate,gva_testTime);
      strtokIndx = strtok(x, "|");
      printAllObjects(2,x,strtokIndx,gva_testDate,gva_testTime);
      //strcpy(tele, strtokIndx);     // did nothing?
      strtokIndx = strtok(NULL, ";");
      printAllObjects(3,x,strtokIndx,gva_testDate,gva_testTime);
      strcpy(gva_testDate, strtokIndx); // missing?
      printAllObjects(4,x,strtokIndx,gva_testDate,gva_testTime);

//      Serial.println(gva_testDate); // Not missing
      for(int i=0; i<6; i++){
        lva_testDate[i] = gva_testDate[i];  
      }      

      strtokIndx = strtok(NULL, "$");
      printAllObjects(5,x,strtokIndx,gva_testDate,gva_testTime);
      strcpy(gva_testTime, strtokIndx); // is fine...
      printAllObjects(6,x,strtokIndx,gva_testDate,gva_testTime);

//      Serial.println(gva_testDate); // MISSING
//      Serial.println(lva_testDate);

      if(strstr(gva_testDate, "YYmmDD")!=NULL || strstr(gva_testTime, "HHMMSS")!=NULL){
          //if((gva_testDate == "YYmmDD") || (gva_testTime == "HHMMSS")){  
          char io[28]; // 16 + 2*6
          sprintf(io, "063 ERROR: %s,%s", gva_testDate, gva_testTime);
          //logArdData(io);
//          Serial.print("<");
//          Serial.print(io);
//          Serial.println(">");
//          Serial.flush();
        }
        //else if((strstr(gva_testDate, "YYmmDD") && strstr(gva_testTime, "HHMMSS"))==NULL){
        else if((strstr(gva_testDate, "YYmmDD")==NULL && strstr(gva_testTime, "HHMMSS")==NULL)){  
          //else if((gva_testDate != "YYmmDD") && (gva_testTime != "HHMMSS")){  
          char io[26]; // 14 + 2*6

          //sendArdData(gva_testDate); // is combinined?
          //sendArdData(gva_testTime); // still itself

          sprintf(io, "Assigned %s,%s", gva_testDate, gva_testTime); // is combining testDate and testTime, then printing testTime?
//          Serial.print("<");
//          Serial.print(io);
//          Serial.println(">");
          //sendArdData(io);
          //logArdData(io);
//          Serial.flush();        
        }
    }
  }
}
int main(int argc, char** argv) {
    setup();
    printf("Entering loop\n");
    loop();
    return 0;
}

This is the output I got

Entering loop
At location 1
 x pointer 199946368, string BATCH|190117;151442$
 strtokIndx pointer 0, string (null)
 gva_testDate pointer 199946344, string YYmmDDHHMMSS
 gva_testTime pointer 199946350, string HHMMSS
 At location 2
 x pointer 199946368, string BATCH
 strtokIndx pointer 199946368, string BATCH
 gva_testDate pointer 199946344, string YYmmDDHHMMSS
 gva_testTime pointer 199946350, string HHMMSS
 At location 3
 x pointer 199946368, string BATCH
 strtokIndx pointer 199946374, string 190117
 gva_testDate pointer 199946344, string YYmmDDHHMMSS
 gva_testTime pointer 199946350, string HHMMSS
 At location 4
 x pointer 199946368, string BATCH
 strtokIndx pointer 199946374, string 190117
 gva_testDate pointer 199946344, string 190117
 gva_testTime pointer 199946350, string 
 At location 5
 x pointer 199946368, string BATCH
 strtokIndx pointer 199946381, string 151442
 gva_testDate pointer 199946344, string 190117
 gva_testTime pointer 199946350, string 
 At location 6
 x pointer 199946368, string BATCH
 strtokIndx pointer 199946381, string 151442
 gva_testDate pointer 199946344, string 190117151442
 gva_testTime pointer 199946350, string 151442

Based on this data this is my conclusion.

The strtok funciton modifies the original string and replaces the search caracter with a null, then copies over the string to a new index and gives the index back.

strcpy is not doing anything to your code, you have written the buffer values in such a manner that it seems like strcpy is doing nasty things, but it is just doing what it is supposed to do. copying strings till it finds a null. In most cases it would screw things alot for you but not in this specific case.

As can be seen in the results, gva_testDate amd gva_testTime have a pointer difference of 6, as such when gva_testTime gets populated, if you use gva_testDate as a string, the null is seen only at the end of 12 characters.

Also lva_testDate is supposed to be a buffer for you, you need to give it a value to start with or it becomes a null buffer and might end up modifying differt things based on what the code is compiled to.

To solve the issue, I initialised all your variables as so, the output is printed below that.

char gva_testDate[] = "YYmmDD"; // is appending?
char gva_testTime[] = "HHMMSS";

char lva_testDate[20];

//String y = "BATCH|190117;151442$";
//int lan = y.length(); // should be 20

char x[] = "BATCH|190117;151442$";
int lan = strlen(x);

output

Entering loop
At location 1
 x pointer 107077760, string BATCH|190117;151442$
 strtokIndx pointer 0, string (null)
 gva_testDate pointer 107077736, string YYmmDD
 gva_testTime pointer 107077743, string HHMMSS
 At location 2
 x pointer 107077760, string BATCH
 strtokIndx pointer 107077760, string BATCH
 gva_testDate pointer 107077736, string YYmmDD
 gva_testTime pointer 107077743, string HHMMSS
 At location 3
 x pointer 107077760, string BATCH
 strtokIndx pointer 107077766, string 190117
 gva_testDate pointer 107077736, string YYmmDD
 gva_testTime pointer 107077743, string HHMMSS
 At location 4
 x pointer 107077760, string BATCH
 strtokIndx pointer 107077766, string 190117
 gva_testDate pointer 107077736, string 190117
 gva_testTime pointer 107077743, string HHMMSS
 At location 5
 x pointer 107077760, string BATCH
 strtokIndx pointer 107077773, string 151442
 gva_testDate pointer 107077736, string 190117
 gva_testTime pointer 107077743, string HHMMSS
 At location 6
 x pointer 107077760, string BATCH
 strtokIndx pointer 107077773, string 151442
 gva_testDate pointer 107077736, string 190117
 gva_testTime pointer 107077743, string 151442

I hope this helps.

Nitro
  • 1,063
  • 1
  • 7
  • 17
0

As melpomene and alain suggested in the comments, your problems stem from missing NULL-terminators in your strings. From your comment, I suppose you may be used with C++ String class, that keeps properties that tell you the size of the string right away. In C however, strings are simply arrays of characters in memory, and there is no immediate way of telling their length besides counting characters until we reach a NULL(0x00) character.

That's why strcpy is doing nasty things in your code, when copying strings, it starts copying bytes in the memory until it reaches a null character somewhere and returns. In your case, it won't find a null character right away and will keep copying until it finds one by luck in your memory, and subscribe other variables.

I see in your code that some of your strings are intended to be fixed size, and a null character can be redundant. In this case, when you know the string size(either because you hard-code it, or by keeping it in some variable), the function you want is memcpy. It will simply copy a set number of bytes from one address to another, without relying on null terminators. BUT Serial.print will still expect null terminators to print it, so you would need to change that too.

The easiest solution is simply to allow more space in your strings. For example, if you want to create a string "abc", you should use char myString[4] = "abc". The compiler will automatically populate the memory starting from myString with 0x61 0x62 0x63 0x00, because the compiler knows that in C, strings(marked by ") should be null-terminated, you must only provide one extra-byte for the null-terminator. If your string will have variable lengths, you must provide enough memory for the worst case scenario plus a null terminator, so you variable x may benefit from more than 21 bytes. You can't change this size later, but what determines your string length is the null terminator, not the amount of memory it has reserved.

Lines of this form however:

char gva_testTime[6] = {'H', 'H', 'M', 'M', 'S', 'S'};

will not include a null character, because instead of string, you declared an array of characters. if you want to use strcpy or strtok in this variable, you will need to include the null terminator yourself:

char gva_testTime[7] = {'H', 'H', 'M', 'M', 'S', 'S', 0};

Or declare it as a string:

char gva_testTime[7] = "HHMMSS";

So basically, review all your strings declaration to include one extra byte for null terminators and ensure the way you declare it will actually include a null terminator.