0

This is an edited, corrected, more specific version of a previous question of mine. So I'm doing a homework assignment where we have to use a sliding window to print the comments and strings from an input file coming from the stdin. I am very close but something is missing. Following is my code, an input, my current output, and the correct output. The // comments and ignoring characters work. I'm not exactly sure the problem with strings and in /**/ comments I can't get rid of the the star in the initial /* without screwing up it finding the */. Thank you anyone who can offer assistance.

Code:

#include <stdio.h>
typedef enum  
{  
    Initial,
    Comment,  
    String,  
    Char,
    CPPComment  
} extrema; 
int main()
{
    int c, c1 = 0, c2 = 0;
    extrema state = Initial;
    extrema next = Initial;
    while(1)
    {
        switch(state)
        {
           case Initial: next = ((c2 == '*' && c1 == '/') ? Comment : (c2 == '\"') ? String : (c2 == '/' && c1 == '/') ? Char : (c2 == '\'') ? CPPComment : Initial); break; 
           case Comment: next = ((c2 == '/' && c1 == '*') ? Initial : Comment); break; 
           case String: next = ((c2 == '\"' && c1 != '\\') ? Initial : String); break;
           case Char: next = ((c2 == '\n') ? Initial : Char); break;
           case CPPComment: next = ((c2 == '\'' && c1 != '\\') ? Initial : CPPComment); break;
           default: next = state; 
        }
        if(state == Comment)
        {
            if(c1 == '*')
            {
                if(c2 != '/')
                    putchar(c1);
                else
                    putchar('\n');  
            }
            else
                putchar(c1);
        }
        else if(state == String)
        {
            if(c2 != '\"' || (c2 == '\"' && c1 != '\\'))
                putchar(c2);
        }
        else if(state == CPPComment)
        {
                putchar(c2);
        }

        c = getchar(); if( c < 0) break;
        c1 = c2; c2 = c; // slide window
//printf("%i",state);
        state = next;
        // c2 is the current input byte and c1 is the previous input byte
    }
    return 0;
}

Input:

 /* recognize '...' otherwise see " as start of string: */

     int c='\"', d='\'', e = '\012'; // comment line 3

 /* recognize "..." otherwise see comments here: */

     char s[] = "abc/*not a comment*/efg\"ZZ\'";

     char t[] = "ABC//not a comment//EFG\x012\/\/";

char *p = ""; //

int d = '/*'; // comment line 13

/*/*/
/**/
/*Z*/
/***/
/****/
/**A**/

My Output:

* recognize '...' otherwise see " as start of string: 
 comment line 3
* recognize "..." otherwise see comments here: 
abc/*not a comment*/efg\ZZ\'"ABC//not a comment//EFG\x012\/\/""
 comment line 13


*
*Z
**
***
**A*

Correct Output:

 recognize '...' otherwise see " as start of string: 
 comment line 3
 recognize "..." otherwise see comments here: 
abc/*not a comment*/efg\"ZZ\'
ABC//not a comment//EFG\x012\/\/


 comment line 13
/

Z
*
**
*A*
Mike Weber
  • 179
  • 1
  • 1
  • 10
  • 1
    Note: your code will get more compact if you remove the `next` state and the two token lookahead. (that may require adding some more states). You could also handle *all* logig inside the `switch(state){}`, including output. This may require a few continues, or even goto's, but the code would become clearer, IMHO. – wildplasser Apr 20 '13 at 18:16
  • WRT the title: do you want to *remove* the strings and comments, or do you want to *extract* them? – wildplasser Apr 20 '13 at 18:27
  • You are referring to your states inconsistently, both by names and by numbers. This is not helpful. Why on earth should anyone be forced to figure out what is "state 3"? Perhaps if you get rid of these numbers it will be slightly easier for others to reason about your program. – n. m. could be an AI Apr 20 '13 at 19:01
  • I corrected my question with wildplasser and n.m.'s suggestions. – Mike Weber Apr 21 '13 at 20:24

1 Answers1

2

Here's how you could do it:

 /* recognize '...' otherwise see " as start of string: */

     int c='\"', d='\'', e = '\012'; // comment line 3

 /* recognize "..." otherwise see comments here: */

     char s[] = "abc/*not a comment*/efg\"ZZ\'";

     char t[] = "ABC//not a comment//EFG\x012\/\/";

char *p = ""; //

int dd = '/*'; // comment line 13

/*/*/
/**/
/*Z*/
/***/
/****/
/**A**/

#include <stdio.h>
#include <stdlib.h>

int main(int argc, char** argv)
{
  FILE* f;

  if (argc == 2 && (f = fopen(argv[1], "rt")) != NULL)
  {
    int c[2];

    enum {
      INITIAL,
      CCOMMENT1,
      CCOMMENT2,
      CCOMMENT3,
      CPPCOMMENT1,
      CPPCOMMENT2,
      STRING1,
      STRING2,
      CHAR1,
      CHAR2,
    } state = INITIAL;

    if ((c[0] = fgetc(f)) == EOF)
      return 0;

    while ((c[1] = fgetc(f)) != EOF)
    {
      switch (state)
      {
      case INITIAL:
        if (c[0] == '/' && c[1] == '*')
          state = CCOMMENT1, printf("<C comment>\n");
        else if (c[0] == '/' && c[1] == '/')
          state = CPPCOMMENT1, printf("<C++ comment>\n");
        else if (c[0] == '"')
          state = STRING1, printf("<String literal>\n");
        else if (c[0] == '\'')
          state = CHAR1, printf("<Char literal>\n");
        break;

      case CCOMMENT1:
      case CPPCOMMENT1:
        /* skip * in /* and 2nd / in // */
        state++;
        break;

      case CCOMMENT2:
        if (c[0] == '*' && c[1] == '/')
          state++, printf("\n</C comment>\n");
        else
          printf("%c", c[0]);
        break;

      case CCOMMENT3:
        // skip / in */
        state = INITIAL;
        break;

      case CPPCOMMENT2:
        if (c[0] == '\n')
          state = INITIAL, printf("\n</C++ comment>\n");
        else
          printf("%c", c[0]);
        break;

      case STRING1:
        if (c[0] == '"')
          state = INITIAL, printf("\n</String literal>\n");
        else if (c[0] == '\\')
          state = STRING2, printf("%c", c[0]);
        else
          printf("%c", c[0]);
        break;

      case STRING2:
        // skip escaped character
        state = STRING1, printf("%c", c[0]);
        break;

      case CHAR1:
        if (c[0] == '\'')
          state = INITIAL, printf("\n</Char literal>\n");
        else if (c[0] == '\\')
          state = CHAR2, printf("%c", c[0]);
        else
          printf("%c", c[0]);
        break;

      case CHAR2:
        // skip escaped character
        state = CHAR1, printf("%c", c[0]);
        break;
      }

      c[0] = c[1];
    }

    fclose(f);
  }

  return 0;
}

Output of this program on its source code:

<C comment>
 recognize '...' otherwise see " as start of string: 
</C comment>
<Char literal>
\"
</Char literal>
<Char literal>
\'
</Char literal>
<Char literal>
\012
</Char literal>
<C++ comment>
 comment line 3
</C++ comment>
<C comment>
 recognize "..." otherwise see comments here: 
</C comment>
<String literal>
abc/*not a comment*/efg\"ZZ\'
</String literal>
<String literal>
ABC//not a comment//EFG\x012\/\/
</String literal>
<String literal>

</String literal>
<C++ comment>

</C++ comment>
<Char literal>
/*
</Char literal>
<C++ comment>
 comment line 13
</C++ comment>
<C comment>
/
</C comment>
<C comment>

</C comment>
<C comment>
Z
</C comment>
<C comment>
*
</C comment>
<C comment>
**
</C comment>
<C comment>
*A*
</C comment>
<String literal>
rt
</String literal>
<Char literal>
/
</Char literal>
<Char literal>
*
</Char literal>
<String literal>
<C comment>\n
</String literal>
<Char literal>
/
</Char literal>
<Char literal>
/
</Char literal>
<String literal>
<C++ comment>\n
</String literal>
<Char literal>
"
</Char literal>
<String literal>
<String literal>\n
</String literal>
<Char literal>
\'
</Char literal>
<String literal>
<Char literal>\n
</String literal>
<C comment>
 skip * in /* and 2nd / in // 
</C comment>
<Char literal>
*
</Char literal>
<Char literal>
/
</Char literal>
<String literal>
\n</C comment>\n
</String literal>
<String literal>
%c
</String literal>
<C++ comment>
 skip / in */
</C++ comment>
<Char literal>
\n
</Char literal>
<String literal>
\n</C++ comment>\n
</String literal>
<String literal>
%c
</String literal>
<Char literal>
"
</Char literal>
<String literal>
\n</String literal>\n
</String literal>
<Char literal>
\\
</Char literal>
<String literal>
%c
</String literal>
<String literal>
%c
</String literal>
<C++ comment>
 skip escaped character
</C++ comment>
<String literal>
%c
</String literal>
<Char literal>
\'
</Char literal>
<String literal>
\n</Char literal>\n
</String literal>
<Char literal>
\\
</Char literal>
<String literal>
%c
</String literal>
<String literal>
%c
</String literal>
<C++ comment>
 skip escaped character
</C++ comment>
<String literal>
%c
</String literal>
Alexey Frunze
  • 61,140
  • 12
  • 83
  • 180