-5

I have csv file with below format :

name,birthmonth,country,hobby
jack,jan,england,soccer
roben,july,germany,soccer
emma,dec,china,tennis
yannick,sep,france,music
alex,nov,england,cricket
thomas,apr,germany,tennis
mike,oct,netherlands,cycling
michelle,feb,france,poetry
yui,mar,japan,coding
feng,jun,china,reading

I want to parse this file using C, and put all the lines with same country name in a consecutive manner i.e shown below:

name,birthmonth,country,hobby
jack,jan,england,soccer
alex,nov,england,cricket
roben,july,germany,soccer
thomas,apr,germany,tennis
emma,dec,china,tennis
feng,jun,china,reading
yannick,sep,france,music
michelle,feb,france,poetry
mike,oct,netherlands,cycling
yui,mar,japan,coding

So far, I have tried this code below, however not able to match things properly and proceed further:

#include<stdio.h>
#include<stdlib.h>
#include<ctype.h>
#include<fcntl.h>
#include<string.h>

int main (int argc, char **argv) {
    //int line;
    char line[200];
    char *inputFile =  argv[1];
    FILE *input_csv_file;
    char a,b,c,d,e;

    input_csv_file = fopen(inputFile, "rt");

    if(input_csv_file ==0) {
        printf("Can not open input file \n");
    } 
    else {    
        //while((line = fgetc(input_csv_file)) != EOF) {
        while(fgets(line, sizeof line, input_csv_file) != NULL) {
            printf ("line = %s\n", line);
            if(sscanf(line, "%s,%s,%s,%s,%s", a,b,c,d,e)) {
            //if(sscanf(line, "%[^,], %[^,], %[^,], %[^,], %[^,]", a,b,c,d,e)) {
                printf("d=%s\n",d);

            }         

        }
    } 
    return 0;

}

I am a newbie in C/C++. Any help would be much appreciated Thanks.

H.Burns
  • 419
  • 2
  • 9
  • 22
  • I think you are looking towards hash table. You need to store country in hash table, see if another entry has same hash function, save data accordingly. – user14063792468 Jun 15 '18 at 18:10
  • @ЯрославМашко a demonstration would be helpful. – H.Burns Jun 15 '18 at 18:14
  • Will you accept C++ ? – user14063792468 Jun 15 '18 at 18:17
  • Your `scanf` code wants 5 strings but each line of the CSV file has 4. Moreover, you can't read strings into the variables `char a,b,c,d,e;`. They should be `char a[50],b[50],c[50],d[50],e[50];` for example. – Weather Vane Jun 15 '18 at 18:19
  • 1
    @WeatherVane Hi, yes that was a typo while putting the code here, even I change it 4 strings that does not solve the problem, please provide appropriate syntax.Thanks. – H.Burns Jun 15 '18 at 18:22
  • 2
    I added some to my previous comment while you were typing. Additionally the `sscanf` line should be `if(sscanf(line, "%s,%s,%s,%s", a,b,c,d) == 4)` . – Weather Vane Jun 15 '18 at 18:26
  • 2
    Please pay attention to compiler warnings. Your code generates about a dozen of them. Get the input correct, before you start to think about arranging the lines. – Weather Vane Jun 15 '18 at 18:31
  • @ЯрославМашко I am looking for a solution in C, however C++ is still okay as a last resort. Kindly provide if possible. Thanks in advance. – H.Burns Jun 15 '18 at 19:17
  • Your code is 100% C code. Why do you have the C++ tag? The two languages have diverged a great deal since C++ was first defined, and it is seldom useful to speak of them as interchangeable. – Tom Zych Jun 15 '18 at 19:23
  • @Tom Zych Well I have removed C++ tag, actually I was thinking if a C++ solution as an alternative helps me to convert it to C if nobody answers in C. – H.Burns Jun 15 '18 at 19:26
  • 1
    That probably would not be useful. In C, you have to do it at a low level — C has been described as portable assembly language, and that’s a fair statement. A good solution in C++ would involve a lot of high-level types and functions from the C++ standard library, and would not be easy to translate into C. – Tom Zych Jun 15 '18 at 19:30
  • @TomZych Thanks for the explanation, could u please help me get a solution in C as I am stuck. – H.Burns Jun 15 '18 at 19:33
  • 2
    Wait, do you expect us to just write the code for you? That's not how StackOverflow works. You're supposed to ask a specific question with a specific answer. – melpomene Jun 15 '18 at 19:33
  • Read this: https://ericlippert.com/2014/03/05/how-to-debug-small-programs/ – Tom Zych Jun 15 '18 at 19:35
  • You have 4 words per line. Use this: `if(sscanf(line, "%[^,],%[^,],%[^,],%[^,]\n", a, b, c, d) == 4) printf("%s|%s|%s|%s\n", a, b, c, d);` – Barmak Shemirani Jun 15 '18 at 23:59

1 Answers1

0

I could write the code to get the required output. Below is the code:

#include<stdio.h>
#include<stdlib.h>
#include<ctype.h>
#include<fcntl.h>
#include<string.h>

int main(int argc, char ** argv) 
{
    struct filedata {
        char nation[8];
        char content[50];
    };

    char line[100];
    char *inputFile = argv[1];
    FILE *input_csv_file;
    int iter = 0, c;

    char * tok;
    int count = 0;
    char ch;
    char country[] = "country";
    char header_line[50];

    input_csv_file = fopen(inputFile, "rt");

    //count line numbers of the input csv
    for(ch = getc(input_csv_file); ch!= EOF; ch=getc(input_csv_file))
        if(ch == '\n')
            count = count + 1;


    fclose(input_csv_file);


    count =  count -1;

    struct filedata * record[count];
    input_csv_file = fopen(inputFile, "rt");

    if(input_csv_file == 0) 
    {
        printf("Can not open input file\n");
    } else 
    {
        while(fgets(line, sizeof line, input_csv_file) != NULL) 
        {
            //printf("-- line = %s\n", line);
            int s_line = sizeof line;
            char dup_line[s_line];
            strcpy(dup_line, line);

            int h = 0;
            int s_token;

            tok = strtok(line, ",");

            while(tok != NULL) 
            {
                h++;
                if(h == 3)
                {
                    s_token = sizeof tok;
                    break;
                }
                tok = strtok(NULL, ",");
            }


            // skipping the line having column headers
            if(compare_col(tok, country) == 0) {
                strcpy(header_line, dup_line);
                continue;
            }

            iter++;
            c = iter - 1;

            record[c] = (struct filedata*)malloc(sizeof(struct filedata));
            strcpy(record[c]->nation, tok);
            strcpy(record[c]->content, dup_line);
        } //while

        struct filedata * temp;

        FILE * fptr;
        fptr = fopen("nation_csv.txt", "w");
        if(fptr == NULL)
        {
            printf("Error in opening the file to write\n");
            exit(1);
        }

        // sorting the arr of struct nation wise
        for(iter=1; iter < count; iter++)
            for(c =0 ; c < count -1; c++) {
                if(strcmp(record[c]->nation, record[c+1]->nation) > 0) {
                    temp = record[c];
                    record[c] = record[c+1];
                    record[c+1] = temp;
                }
            } 

        for(iter=0; iter < count; ++iter) 
        {
            if(iter == 0) {
            fprintf(fptr, "%s", header_line);
                continue;
            }

            fprintf(fptr, "%s", record[iter]->content);
        }
    fclose(fptr);
    }
    fclose(input_csv_file);
}

int compare_col(char a[], char b[] )
{
    int c = 0;
    while(a[c] == b[c]) {
        if(a[c] == '\0' || b[c] == '\0')
            break;
        c++;

    }

    if(a[c] == '\0' && b[c] == '\0')
        return 0;
    else 
        return -1;
}   

Thanks for all your inputs. Any further inputs to make it better are much appreciated.

Thanks

H.Burns
  • 419
  • 2
  • 9
  • 22