0

I'm writing a C program to share files between 2 computers (running Windows OS) connected to a common network (Just a lab project). When I read non .txt files like .docx or .gif I couldn't get the entire file. Most of the time, the server program sent some empty (or some unknown character) to the client which the client never receives. THis is my server program :

#include<io.h>
#include<stdio.h>
#include<winsock2.h>
#include<string.h>

#pragma comment(lib,"ws2_32.lib") //Winsock Library

int main(int argc , char *argv[])
{
    WSADATA wsa;
    SOCKET s , new_socket;
    struct sockaddr_in server , client;
    int c;
    char message[10240];
    char filename[100];
    char response[50];
    int k = 0;

    printf("Enter filename :");
    scanf("%s",&filename);

    FILE *f = fopen(filename, "rb");
    fseek(f, 0, SEEK_END);
    long fsize = ftell(f);
    fseek(f, 0, SEEK_SET);  /* same as rewind(f); */

    char *string = malloc(fsize + 1);
    int count = 0;
    while(count < fsize){
        string[count++] = getc(f);
        if(getc(f) == EOF) 
            fseek(f, 0, count+1);
    }
    fclose(f);

    printf("\nInitialising Winsock...");
    if (WSAStartup(MAKEWORD(2,2),&wsa) != 0)
    {
        printf("Failed. Error Code : %d",WSAGetLastError());
        return 1;
    }

    printf("Initialised.\n");

    //Create a socket
    if((s = socket(AF_INET , SOCK_STREAM , 0 )) == INVALID_SOCKET)
    {
        printf("Could not create socket : %d" , WSAGetLastError());
    }

    printf("Socket created.\n");

    //Prepare the sockaddr_in structure
    server.sin_family = AF_INET;
    server.sin_addr.s_addr = INADDR_ANY;
    server.sin_port = htons( 8888 );

    //Bind
    if( bind(s ,(struct sockaddr *)&server , sizeof(server)) == SOCKET_ERROR)
    {
        printf("Bind failed with error code : %d" , WSAGetLastError());
    }

    puts("Bind done");

    //Listen to incoming connections
    listen(s , 3);

    //Accept and incoming connection
    puts("Waiting for incoming connections...");

    c = sizeof(struct sockaddr_in);
    new_socket = accept(s , (struct sockaddr *)&client, &c);
    if (new_socket == INVALID_SOCKET)
    {
        printf("accept failed with error code : %d" , WSAGetLastError());
    }

    puts("Connection accepted");

    //Reply to client
    // message = "Hello Client , I have received your connection. But I have to go now, bye\n";
    send(new_socket , filename , strlen(filename) , 0);

    int len = recv(new_socket, response, 50, 0);
    response[len] = '\0';

    if(strcmp(response, "OKAY") == 0) printf("SUCCESS");
    else printf("ERROR");

    int i = 0;
    char cn = 'a';
    // getchar();
    int j = 0;
    int ssize = strlen(string);
    printf("FIle size is : %d",ssize);

    getchar(); getchar();

    while(j < ssize){
        while(i < 1024){
            message[i++] = string[j++];
            if(j == ssize) break;
        }
        printf("sent :%s\n",message);
        send(new_socket, message, strlen(message), 0);
        printf("%d\n",k); k++;
        i = 0;
    }

    send(new_socket, "...END...CONNECTION...", strlen("...END...CONNECTION..."), 0);
    printf("sent end connection");
    recv(new_socket, response, strlen(response), 0);

    if(strcmp(response, "...END...CONNECTION...") != 0)
        send(new_socket, "...END...CONNECTION...", strlen("...END...CONNECTION..."), 0);

    printf("SUCCESS");
    closesocket(s);
    WSACleanup();

    return 0;
}

And this is my client program:

#include<stdio.h>;
#include<winsock2.h>;

#pragma comment(lib,&quot;ws2_32.lib&quot;) //Winsock Library

int main(int argc , char *argv[])
{
    WSADATA wsa;
    SOCKET s;
    struct sockaddr_in server;
    char *message , server_reply[1024];
    int recv_size;

    printf("\nInitialising Winsock...");
    if (WSAStartup(MAKEWORD(2,2),&wsa) != 0)
    {
        printf(";Failed. Error Code : %d",WSAGetLastError());
        return 1;
    }

    printf("Initialised.\n");

    //Create a socket
    if((s = socket(AF_INET , SOCK_STREAM , 0 )) == INVALID_SOCKET)
    {
        printf("Could not create socket : %d" , WSAGetLastError());
    }

    printf("Socket created.\n");


    server.sin_addr.s_addr = inet_addr("127.0.0.1");
    server.sin_family = AF_INET;
    server.sin_port = htons( 8888 );

    //Connect to remote server
    if (connect(s , (struct sockaddr *)&server , sizeof(server)) < 0)
    {
        puts("connect error");
        return 1;
    }

    puts("Connected");

    if((recv_size = recv(s , server_reply , 1024 , 0)) == SOCKET_ERROR)
    {
        puts("recv failed");
    }

    puts("filename : ");
    server_reply[recv_size] = '\0';
    puts(server_reply);

    FILE* fptr = fopen(server_reply, "wb");

    send(s , "OKAY" , strlen("OKAY") , 0);
    int i = 0;
    while(1){
        if((recv_size = recv(s , server_reply , 1024, 0)) == SOCKET_ERROR)
        {
            puts("recv failed"); break;
        }
        if(strcmp(server_reply, "...END...CONNECTION...") == 0) {
            printf("recieved end connection"); break;
        }
        fputs(server_reply, fptr);
        server_reply[recv_size] = '\0';
        printf("%s\n", server_reply);
        printf("%d,\n",i++);
    }

    send(s, "...END...CONNECTION...", strlen("...END...CONNECTION..."), 0);
    printf("sent : end connection");
    fclose(fptr);
    puts("File received\n");

    return 0;
}

Then I wanted to know how many bytes does my program actually reads. So I wrote a simple program to read a file, but this time, I will find the EOF using fseek function and print it, read the file, then print the length of buffer it read. This is the code I used :

#include <stdio.h>
#include <stdlib.h>

void main(){
    printf("Enter file : ");
    char filename[50];
    scanf("%s",&filename);

    FILE *f = fopen(filename, "rb");
    fseek(f, 0, SEEK_END);
    long fsize = ftell(f);
    fseek(f, 0, SEEK_SET);  /* same as rewind(f); */

    printf("File size is : %ld bytes (according to SEEK_END)\n", fsize);

    char *string = malloc(fsize + 1);
    fread(string, 1, fsize, f);
    long len = strlen(string);

    printf("File size is : %ld bytes (according to freed)\n", len);

    fclose(f);

    string[fsize] = 0;
    puts(string);
}

And the output I got is this :

Enter file : 1.docx
File size is : 12920 bytes (according to SEEK_END)
File size is : 5 bytes (according to freed)
PK

Can someone help me with this :( I really don't know why these two values are different. They are supposed to be equal, right?

San
  • 453
  • 3
  • 14
  • Why are you asking *us* if an error occurred. You should be asking your computer! Add some error checking! – ikegami May 26 '20 at 15:37
  • 1
    Re "*according to freed*", Not quite. `fread` returns the amount read, which could be more than what `strlen` returns. – ikegami May 26 '20 at 15:40
  • how can the value returned by fread and the length of the string read be different ‍♂️ – San May 26 '20 at 18:04
  • 1
    in C, strings are terminated with 0 (also called a NUL terminator). `strlen` counts characters until it encounters a 0, in which case it stops and returns its count. `fread` does no such thing, it reads data regardless of what it is. I haven't really looked at your code, but you should not be using C string functions like `strlen` on binary data files like .docx and .gif. The C string functions are expecting printable character strings, using them for general-purpose memory applications will lead to trouble. – yano May 26 '20 at 18:22
  • *YES* just now I checked. fread() returns the actual size of the data (12920 bytes). Do you know in which data type I could store the binary values? I think the char data type is the actual problem because these files contain UTF encoded characters too.. – San May 26 '20 at 18:28
  • You're reading the raw bytes of a .docx file into memory, then calling `strlen` on that data. That's a meaningless operation. There's going to be tons of extra data in a .docx file format, whatever that is. Probably a header, formatting information, who knows. Even the actual printable content is surely stored in multi-byte unicode characters or something. So it looks like `strlen` made it to byte 6 before it encountered a 0. If you're trying to extract the text from a Word document, that's going to be a lot of work. – yano May 26 '20 at 18:33
  • storing the raw data as `char` should be fine, although I'd use `unsigned char`. It's just raw binary data, You can open any file type in the world and read its raw data into memory. Until you understand the file format you're dealing with, and know how to parse that data, you won't be able to do anything particularly interesting with it, other than copy it somewhere else or hash it. – yano May 26 '20 at 18:37
  • 1
    thank you so much for your help, that works – San May 26 '20 at 19:00

1 Answers1

0

So I tried to read a .docx file using fsanf, fgets, read. So what I understood is, Fscanf and fgets searches for ascii characters in files, so the are not suitable for reading binaries. fread reads the binaries but you can't store it in char array, instead we can go for uint8_t. On making these changes, now it works good.

So the conclusion is, all functions read till EOF, but the data type we used to store them can't hold all values. That's why I got different values when I used strlen().

This is the output I got

Enter file : 1.docx
File size is : 12920 bytes (according to SEEK_END)
File size is : 12920 bytes (according to freed)
PK

when I changed the code to this

#include <stdio.h>
#include <stdlib.h>

void main(){
    printf("Enter file : ");
    char filename[50];
    scanf("%s",&filename);

    FILE *f = fopen(filename, "rb");
    fseek(f, 0, SEEK_END);
    long fsize = ftell(f);
    fseek(f, 0, SEEK_SET);  /* same as rewind(f); */

    printf("File size is : %ld bytes (according to SEEK_END)\n", fsize);

    char *string = malloc(fsize + 1);
    long len = fread(string, 1, fsize, f);
    // long len = strlen(string);

    printf("File size is : %ld bytes (according to freed)\n", len);

    fclose(f);

    string[fsize] = 0;
    puts(string);
}

which makes more sense now.

San
  • 453
  • 3
  • 14
  • regarding: `scanf("%s",&filename);` This enables the user to overflow the input buffer. The result is undefined behavior and can lead to a seg fault event. To correct this problem, use a MAX CHARACTERS modifier that is 1 less than the length of the input buffer. 1 less because that `input format conversion` specifier always appends a NUL byte to the end of the input. Also, the `scanf()` family of functions returns the number of successful specifiers (or EOF) . For the current scenario, any value returned, other than 1, indicates an error occurred – user3629249 May 26 '20 at 23:22
  • regarding: `void main(){` there are two valid signatures for `main()` both return `int`, not `void` – user3629249 May 26 '20 at 23:24
  • regarding: `FILE *f = fopen(filename, "rb");` always check (!=NULL) the returned value to assure the operation was successful. If not successful (==NULL) call `perror( "fopen failed" )` probably followed by `exit( EXIT_FAILURE );` – user3629249 May 26 '20 at 23:26
  • the functions: `fseek()` and `ftell()` can fail, so always check the returned value to assure they are successful. – user3629249 May 26 '20 at 23:28
  • regarding: `char *string = malloc(fsize + 1);` The function: `malloc()` can fail. Therefore, always check (!=NULL) the returned value to assure the operation was successful. – user3629249 May 26 '20 at 23:29
  • when your reading a binary file, then: `string[fsize] = 0; puts(string);` will not work, especially if the file contains a NUL characters.. Suggest using: `#include ` and `write( 1, string, fsize );` and the code should be checking: returned value == fsize to assure the operation was successful – user3629249 May 26 '20 at 23:46