1

I want to read a csv file and load it into an array of struct. I used the code that I found on youtube and github (https://github.com/portfoliocourses/c-example-code/blob/main/csv_to_struct_array.c). Now I want to change all the members of the struct into an array of character (or a string). This works if I change sequentially each struct (member type and age at least) but if I change the member average into an array of character, I get the error message File format incorrect. I am suspecting thatit si because there is a problem with the character EOF in the file during the while (!feof(file)); loop. How could I solve the problem?

Here is the original code:


/*******************************************************************************
*
* Program: Read CSV File Data To An Array Of Structs
* 
* Description: Example of reading CSV file data into an array of structs in C.
*
* YouTube Lesson: https://www.youtube.com/watch?v=rbVt5v8NNe8 
*
* Author: Kevin Browne @ https://portfoliocourses.com
*
*******************************************************************************/

#include <stdio.h>

// A struct for representing student data in a file formatted like this:
//
// U,Virat Kohli,23,95.6
// U,Serena Williams,22,83.2
// G,Wayne Gretzky,19,84.2
//
// with a "student type" (e.g. undergraduate, graduate) single character, 
// followed by the student's name, age and then average.
//
typedef struct 
{
  // members for the student's type, name, age and average
  char type;
  char name[50];
  int age;
  double average;
} Student;

int main(void)
{
  // file pointer variable for accessing the file
  FILE *file;
  
  // attempt to open file.txt in read mode to read the file contents
  file = fopen("file.txt", "r"); 
  
  // if the file failed to open, exit with an error message and status
  if (file == NULL)
  {
    printf("Error opening file.\n");
    return 1;
  }
  
  // array of structs for storing the Student data from the file
  Student students[100];
  
  // read will be used to ensure each line/record is read correctly
  int read = 0;
  
  // records will keep track of the number of Student records read from the file
  int records = 0;

  // read all records from the file and store them into the students array
  do 
  {
    // Read a line/record from the file with the above format, notice in 
    // particular how we read in the student's name with %49[^,] which matches
    // up to 49 characters NOT including the comma (so it will stop matching 
    // at the next comma).  The name member can store 50 characters, so 
    // factoring in the NULL terminator this is the maximum amount of characters
    // we can read in for a number.  fscanf() will return the number of values 
    // it was able to read successfully which we expect to be 4, and we store 
    // that into read.
    //
    read = fscanf(file,
                  "%c,%49[^,],%d,%lf\n",
                  &students[records].type, 
                  students[records].name, 
                  &students[records].age, 
                  &students[records].average); 
    
    // if fscanf read 4 values from the file then we've successfully read 
    // in another record
    if (read == 4) records++;
    
    // The only time that fscanf should NOT read 4 values from the file is 
    // when we've reached the end of the file, so if fscanf did not read in 
    // exactly 4 values and we're not at the end of the file, there has been
    // an error (likely due to an incorrect file format) and so we exit with 
    // an error message and status.
    if (read != 4 && !feof(file))
    {
      printf("File format incorrect.\n");
      return 1;
    }
    
    // if there was an error reading from the file exit with an error message 
    // and status
    if (ferror(file))
    {
      printf("Error reading file.\n");
      return 1;
    }

  } while (!feof(file));

  // close the file as we are done working with it
  fclose(file);
  
  // print out the number of records read
  printf("\n%d records read.\n\n", records);
  
  // print out each of the records that was read 
  for (int i = 0; i < records; i++)
    printf("%c %s %d %.2f\n", 
           students[i].type, 
           students[i].name,
           students[i].age,
           students[i].average);
  printf("\n");

  return 0;
}


Here is the modified code:

#include <stdio.h>

typedef struct 
{
  //char type; \\original commented code
  //char name[50]; \\original commented code
  //int age; \\original commented code
  //double average; \\original commented code
  char type[50];
  char name[50];
  char age[50];
  char average[50];
} Student;

int main(void)
{
  FILE *file;
  file = fopen("file.txt", "r"); 
  if (file == NULL)
  {
    printf("Error opening file.\n");
    return 1;
  }
  Student students[100];
  int read = 0;
  int records = 0;

  do 
  {
    read = fscanf(file,
                  //"%c,%49[^,],%d,%lf\n" \\original commented code
                  "%49[^,],%49[^,],%49[^,],%49[^,]\n",
                  students[records].type, 
                  students[records].name, 
                  students[records].age, 
                  students[records].average); 
                  //students[records].average); 
    
    if (read == 4) records++;

    if (read != 4 && !feof(file))
    {
      printf("File format incorrect.\n");
      return 1;
    }
    
    if (ferror(file))
    {
      printf("Error reading file.\n");
      return 1;
    }

  } while (!feof(file));

  fclose(file);
  
  printf("\n%d records read.\n\n", records);
  
  for (int i = 0; i < records; i++)
    //printf("%c %s %d %.2f\n", \\original commented code
    printf("%s %s %s %.s\n", 
           students[i].type, 
           students[i].name,
           students[i].age,
           students[i].average);
  printf("\n");

  return 0;
}

EDIT 1 I changed the " %49[^,],%49[^,],%49[^,],%49[^\n]",

and got this output:


3 records read.

U Virat Kohli 23
U Serena Williams 22
G Wayne Gretzky 19

There is an improvement in the sense that the file can be parsed but the last column (or item after the last comma of each line) disappears as it should read:

3 records read.

U Virat Kohli 23 95.60
U Serena Williams 22 83.20
G Wayne Gretzky 19 84.20

Here is the source code of the modified version

#include <stdio.h>

typedef struct 
{
  //char type; \\original commented code
  //char name[50]; \\original commented code
  //int age; \\original commented code
  //double average; \\original commented code
  char type[50];
  char name[50];
  char age[50];
  char average[50];
} Student;

int main(void)
{
  FILE *file;
  file = fopen("file.txt", "r"); 
  if (file == NULL)
  {
    printf("Error opening file.\n");
    return 1;
  }
  Student students[100];
  int read = 0;
  int records = 0;

  do 
  {
    read = fscanf(file,
                  //"%c,%49[^,],%d,%lf\n" \\original commented code
                  //"%49[^,],%49[^,],%49[^,],%49[^,\n]",
                  " %49[^,],%49[^,],%49[^,],%49[^\n]",
                  students[records].type, 
                  students[records].name, 
                  students[records].age, 
                  students[records].average); 
                  //students[records].average); 
    
    if (read == 4) records++;

    if (read != 4 && !feof(file))
    {
      printf("File format incorrect.\n");
      return 1;
    }
    
    if (ferror(file))
    {
      printf("Error reading file.\n");
      return 1;
    }

  } while (!feof(file));

  fclose(file);
  
  printf("\n%d records read.\n\n", records);
  
  for (int i = 0; i < records; i++)
    //printf("%c %s %d %.2f\n", \\original commented code
    printf("%s %s %s %.s\n", 
           students[i].type, 
           students[i].name,
           students[i].age,
           students[i].average);
  printf("\n");

  return 0;
}

with the source record file.txt (copied from the original code):

U,Virat Kohli,23,95.6
U,Serena Williams,22,83.2
G,Wayne Gretzky,19,84.2

ecjb
  • 5,169
  • 12
  • 43
  • 79
  • To solve it, first figure out what is causing it. If you would print some info instead of just the error message, that could help figuring that out. For example, try printing `printf("File format incorrect: line %d.\n", records);` instead. Then you know what line the error occurs at. If it is the last, it might have something to with EOF. Otherwise, perhaps the line is formatted differently. – Emanuel P Feb 24 '23 at 20:03

1 Answers1

1

Rewrite the format string like

" %49[^,],%49[^,],%49[^,],%49[^\n]",

Pay attention to the leading space in the format string. It allows to skip white space characters.

I assume that whole records are not ended with a comma.

Another approach is to declare a character array large enough to store a record from the file and to use fgets instead of scanf. Then you can parse an obtained record using either strtok or sscanf.

Also in the call of printf

printf("%s %s %s %.s\n", 
       students[i].type, 
       students[i].name,
       students[i].age,
       students[i].average);

there is incorrect conversion specifier %.s. Instead just write %s.

Vlad from Moscow
  • 301,070
  • 26
  • 186
  • 335
  • thank you for your answer @VladfromMoscow. I changed the code as you suggested. Now the file can be parsed and there is a printed output excepts for the last item of each line (after the last comma). I edited the question to present the problem – ecjb Feb 24 '23 at 20:47
  • @ecjb Show an example of source records. – Vlad from Moscow Feb 24 '23 at 20:55
  • I copied the source code of edited version at the end of the edit in the question. Did I miss something? – ecjb Feb 24 '23 at 21:05
  • @ecjb I do not see a source record. And I can not reproduce your output. Show a source record. – Vlad from Moscow Feb 24 '23 at 21:10
  • thank you for your comments: I added the `file.txt` (copied from the original code) at the end of the question (just after the edited version of the source code). Please tell me if it's still not clear – ecjb Feb 24 '23 at 21:14
  • The first line of `file.txt` is `U,Virat Kohli,23,95.6`. So to me there are still 4 fields (with a space inside the second record). No? – ecjb Feb 24 '23 at 21:18
  • @ecjb You are incorrectly outputting records. See my appended answer. – Vlad from Moscow Feb 24 '23 at 21:22