0

I just found a *.srt file that mpv fails to load. So, I thought let's make my own subtitle parser that takes subtitle's path as command-line argument. Here's what I tried :

/* Intended to be a program for parsing *.srt subtitles as an alternative to video players' */

#include <ncurses.h>
#include <unistd.h>

#define SEC_IN_MIN 60
#define MIN_IN_HR 60

long get_duration(FILE *fp); // to get the duration of a dialogue in seconds
long turn_to_sec(int hours, int minutes, int seconds); // returns sum of hours and minutes, all in seconds

int main(int argc, char **argv)
{
    FILE *fp;
    long sec;
    char ch;

    if(argv[1] == NULL)
    {
        printf("Please enter a filename!\n");
        return 1;
    }
    printf("Trying to open specified file %s\n",argv[1]);

    fp = fopen(argv[1],"r");
    if(fp == NULL)
    {
        printf("Error while opening file %s\n",argv[1]);
        return 1;
    }

    initscr(); // initialise nCurses window

    ch = getc(fp);
    while(ch != EOF)
    {
        clear();
        sec = get_duration(fp);
        while(1)
        {
            if((ch = getc(fp)) == '\n')
            {
                if((ch = getc(fp)) == '\n' || ch == EOF)
                    break;
                else
                    addch(ch);
            }
            addch(ch);
        }
        refresh();
        sleep(sec);
    }

    endwin(); // close nCurses
    fclose(fp); // close the file
    return 0;
}

long get_duration(FILE *fp)
{
    long duration = 0;
    char ch;
    short hour_start = 0, hour_end = 0, minute_start = 0, minute_end = 0, second_start = 0, second_end = 0;
    short count=0;

    /* just to get to the point where time-specs of the dialogue start */
    while((ch = getc(fp)) != '\n');

    /* extract characters until ':' to get hour_start */
    while((ch = getc(fp)) != 58)
    {
        hour_start += ch;
        count++;
    }
    hour_start -= (hour_start/(49*count));

    /* extract characters until ':' to get minute_start */
    count = 0;
    while((ch = getc(fp)) != 58)
    {
        minute_start += ch;
        count++;
    }
    minute_start -= (minute_start/(49*count));

    /* extract characters until ',' to get second_start */
    count = 0;
    while((ch = getc(fp)) != 44)
    {
        second_start += ch;
        count++;
    }
    second_start -= (second_start/(49*count));

    /* now, see if you can find a '>' */
    while((ch = getc(fp)) != 62);
    ch = getc(fp); // to get rid of that space after "-->"

    /* extract characters until ':' to get hour_end */
    while((ch = getc(fp)) != 58)
    {
        hour_end += ch;
        count++;
    }
    hour_end -= (hour_end/(49*count));

    /* extract characters until ':' to get minute_end */
    count = 0;
    while((ch = getc(fp)) != 58)
    {
        minute_end += ch;
        count++;
    }
    minute_end -= (minute_end/(49*count));

    /* extract characters until ',' to get second_end */;
    count = 0;
    while((ch = getc(fp)) != 44)
    {
        second_end += ch;
        count++;
    }
    second_end -= (second_end/(49*count));

    /* finally, gonna get those values */
    second_end -= second_start;
    minute_end -= minute_start;
    hour_end -= hour_start;

    duration += (turn_to_sec(hour_end, minute_end, second_end));

    /* positioning the fp to the right position just to keep the 'main()' :) */
    while((ch = getc(fp)) != '\n' || ch != EOF);

    return duration;
}

long turn_to_sec(int hours, int minutes, int seconds)
{
    long temp;

    /* manipulating hours */
    temp = hours;
    temp *= MIN_IN_HR;
    temp *= SEC_IN_MIN;

    seconds += temp;

    /* manipulating minutes */
    temp = minutes;
    temp *= SEC_IN_MIN;

    seconds += temp;

    return seconds;
}

On first attempt, I was using just the dialogue's start time as dialogue's duration, i.e end_time - start_time and that's why, this part was missing :

/* extract characters until ':' to get hour_end */
    while((ch = getc(fp)) != 58)
    {
        hour_end += ch;
        count++;
    }
    hour_end = (hour_end/(49*count));

    /* extract characters until ':' to get minute_end */
    count = 0;
    while((ch = getc(fp)) != 58)
    {
        minute_end += ch;
        count++;
    }
    minute_end = (minute_end/(49*count));

    /* extract characters until ',' to get second_end */
    count = 0;
    while((ch = getc(fp)) != 44)
    {
        second_end += ch;
        count++;
    }
    second_end = (second_end/(49*count));

and variables' names were a bit different and then I realised I was wrong but this is all irrelevant . I am just saying this because till then, code was working just fine (results were unexpected though there was some garbage) but now it just stucks and does nothing. Why is that? Much thanks for your time!

Here's the file I am trying : https://gist.github.com/gaurav712/6646ad7dfd3c487536dce9b0712471e7

  • It's not really clear what problem you are having. This question also presupposes knowledge of what an `.srt` file format looks like. – Sean Bright Mar 27 '19 at 20:47
  • Thanks for suggestion. Gonna add the file I was trying. –  Mar 27 '19 at 20:49
  • You read two characters - say "2" and "5", i.e. 0x32 (50d) and 0x35 (53d) - and sum them, so you have 103. And divide them by 49 by 2? This looks wrong to me. If you're trying to implement Horner's algorithm, you should have `while((ch=getch()) != ':') { min_count *= 60; min_count += (ch - '0'); }` or something like that. Better yet read a full line with `fgets()` and parse it with `strtok()` and `atoi()`, or maybe `sscanf()`. – LSerni Mar 27 '19 at 20:52
  • I am reading the return values from `getc` and as that's the corresponding `ASCII` value for those numbers, I had to make em integers so, it's kind of a hack but it wasn't supposed to be what it is. I missed a subtraction. Lemme edit it –  Mar 27 '19 at 20:59
  • @LSerni, it's fine now. –  Mar 27 '19 at 21:07
  • An an aside, why use the decimal ASCII values instead of just `':'` and `','`, etc. in your comparisons? The latter would be more readable. – Sean Bright Mar 27 '19 at 21:10
  • @Gaurav are you sure? The code looks identical to me. More to the point, I have tested it with all sixty numbers from "00" to "59" - the correct result is never returned. For example when minutes are "00", you add 48 and 48, obtaining 96. Then you subtract "minute_end/(49*count)" with a count of 2, so you divide minute_end, which is 96, by 49*2 which is 98. Being integers, 96/98 is zero; thus you obtain a minute value of 96. I would suggest writing a function that read digit bytes up to a non-digit, and returned their decimal values. – LSerni Mar 27 '19 at 21:50

2 Answers2

0

One of your problems, possibly the one your are experiencing now, is that getc() does not return the ASCII value of the character read. It may also return EOF if there's nothing more to read.

And since your cycles end when a certain character is found (e.g. ":"), and EOF is most definitely not that character, your program will, in those cases, loop forever.

I would suggest encapsulating that logic into a digit-reading function:

/**
 * Reads a positive number (hopefully less than INT_MAX) from a stream
 * Returns -1 if the stream is at end-of-file
 * 
 * @param FILE *fp
 * @return int            the number read, or -1 if EOF
 */

int readDigits(FILE *fp) {
    int value = 0, c;
    if (feof(fp)) {
        return -1;
    }
    for (;;) {
        c = fgetc(fp);
        // EOF is not a digit so we catch it in the digit check
        // if (EOF == c) {
        //    break;
        // }
        if ((c < '0') || (c > '9')) {
            break;
        }
        value *= 10;
        value += (c - '0');
    }
    return value;
}

int seconds(int h, int m, int s) {
    return (h*60 + m) * 60 + s;
}

Now you can do:

hour_start = readDigits(fp);
if (hour_start < 0) {
    // Error, do something
}
min_start = readDigits(fp);
// check if -1
sec_start = readDigits(fp);
// check if -1

sec_start = seconds(hour_start, min_start, sec_start);

...
LSerni
  • 55,617
  • 10
  • 65
  • 107
  • It helps a little bit, i.e it starts execution but now when I use you function to get input, it just reaches to the EOF directly and I con't figure out that issue. –  Mar 28 '19 at 09:43
0

I solved it :

I rewrote it from scratch with a shitload of comments

/* Third attempt to create a subtitle parser
 * 29 March, 2019
 * 12:55
 */

#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <ncurses.h>

/* to convert strings of format hh:mm:ss,m_s to seconds (ignoring milliseconds for now) */
int strToTime(char start[13], char end[13]);

int main(int argc, char **args){

    short count, shouldExit = 0;
    int duration, timeBwDialogues;
    char ch, startTimeStr[13], endTimeStr[13], prevTimeStr[13];
    FILE *fp;

    endTimeStr[0] = 0;

    /* Check if argument is present */
    if(args[1] == NULL){
        printf("No file specified!\n");
        exit(1);
    }

    /* Opening file for reading */
    printf("Trying to open file \"%s\"\n", args[1]);
    fp = fopen(args[1], "r");
    /* Checking if file was opened properly */
    if(fp == NULL){
        printf("Failed to open file \"%s\"!\n", args[1]);
        exit(2);
    }

    /* Checking if file has contents or not */
    if((ch = getc(fp)) == EOF){
        printf("File has no contents!\n");
        exit(3);
    }
    ungetc(ch, fp); /* putting ch back as file isn't empty */

    /* Section for setting up dialogue-number details
     * If I ever want to do that
     */

    /* initialising screen for nCurses */
    initscr();

    /* Here comes the actual stuff */
    while(1){
        /* As we aren't considering dialogue number, let's just skip that*/
        while((getc(fp)) != '\n');

        /* Reading dialogue's starting time :
         * It says keep reading until you find a character other that 0-9, ','
         * or ':' and store it to startTimeStr[]. Finally pass startTimeStr[]
         * to strToTime() to convert it to seconds(for now)
         */
        count = 0; /* Setting-up counter for the loop */
        while(1){
            ch = getc(fp);
            if(ch == ' ' || ch == '-')
                break;

            startTimeStr[count] = ch;
            count++;
        }
        startTimeStr[count] = '\0'; /* to terminate the string */

        /* Making a copy of endTimeStr[] in prevTimeStr[] to get timeBwDialogues */
        if(endTimeStr[0]){
            strcpy(prevTimeStr, endTimeStr);

            /* Calculating timeBwDialogues */
            timeBwDialogues = strToTime(prevTimeStr, startTimeStr);
        } else
            timeBwDialogues = strToTime("00:00:00,000", startTimeStr);

        /* For better readability */
        clear();
        refresh();
        /* Sleeping when there's no voice for synchronisation */
        sleep(timeBwDialogues);

        /* Placing the pointer to right position for reading ending time.
         * Using do-while to make sure at least one character is read before checking the condition
         */
        while((getc(fp)) != '>');
        if((ch = getc(fp)) == ' ');
        else
            ungetc(ch, fp);

        /* Just similar to what was done above, reading ending time */
        count = 0; /* Setting-up counter for the loop */
        while(1){
            ch = getc(fp);
            if(ch == '\n' || ch == ' ')
                break;

            endTimeStr[count] = ch;
            count++;
        }
        endTimeStr[count] = '\0'; /* to terminate the string */

        /* Calculating duration for individual dialogues */
        duration = strToTime(startTimeStr, endTimeStr); /* passing startTimeStr[] to strToTime */

        /* displaying the dialogue */
        while(1){
            ch = getc(fp);

            /* If ch is newline, the next character maybe EOF. So let's check */
            if(ch == EOF){
                shouldExit = 1;
                break;
            } else if(ch == '<'){
                while((ch = getc(fp)) != '>');
                continue;
            } else if(ch == '\n'){
                if((ch = getc(fp)) == EOF){
                    shouldExit  = 1;
                    break;
                } else if(ch == '\n') /* if the next character is newline, it's the end of the dialogue */
                    break;
                else{
                    ungetc(ch, fp);
                    addch('\n');
                    continue;
                }
            }

            /* As the next character to ch is not EOF, dialogue still exists(a dialogue might take multiple lines)
             * and it should be put on the screen
             */
            addch(ch);
        }
        refresh();
        sleep(duration);
        if(shouldExit)
            break;
    }
    /* Closing nCurses' window */
    endwin();

    /* Closing the file */
    fclose(fp);

    return 0;
}

/* Defining the function */
int strToTime(char start[13], char end[13]){
    int hour_start, hour_end, minute_start, minute_end, second_start, second_end;

    /* Separating hh, mm and ss for starting time. As I said above, I'll ignore milliseconds */

    /* getting hour_start */
    hour_start = ((start[0] - '0')*10)+(start[1] - '0');
    /* getting minute_start */
    minute_start = ((start[3] - '0')*10)+(start[4] - '0');
    /* getting second_start */
    second_start = ((start[6] - '0')*10)+(start[7] - '0');

    /* Separating hh, mm and ss for ending time. As I said above, I'll ignore milliseconds */

    /* getting hour_end */
    hour_end = ((end[0] - '0')*10)+(end[1] - '0');
    /* getting minute_end */
    minute_end = ((end[3] - '0')*10)+(end[4] - '0');
    /* getting second_end */
    second_end = ((end[6] - '0')*10)+(end[7] - '0');

    return ( ( ( ( (hour_end - hour_start) * 60) + (minute_end - minute_start) ) * 60) + (second_end - second_start) );
}