I need to write a simply tokenizer for C language. It doesn't have to classify things, doesn't have to use any grammar. All it needs to do is to print separately the words, characters, parenthesis and other things. I'm using lex for it. But I need to read a C source file and then tokenize it. Below you can find my current code. I have three questions.
1) How can I correct the error message I get when I compile:
parser.l:47:1: warning: unknown conversion type character ‘=’ in format [-Wformat]
2) How can I make lexer to run on the source file that is passed as argument?
3) How can I make the tokenizer print the names of the variables and other unspecified things just as they are? Meaning if I have, int test, it will print int because it is specified in the lexer, and I also want to print test separately because it is not specified in lex.
Here is my code:
%{
#include <stdio.h>
#include <stdlib.h>
%}
%%
"auto" { printf("auto\t"); }
"break" { printf("break\t"); }
"case" { printf("case\t"); }
"char" { printf("char\t"); }
"const" { printf("const\t"); }
"continue" { printf("continue\t"); }
"default" { printf("default\t"); }
"do" { printf("do\t"); }
"double" { printf("double\t"); }
"else" { printf("else\t"); }
"enum" { printf("enum\t"); }
"extern" { printf("extern\t"); }
"float" { printf("float\t"); }
"for" { printf("for\t"); }
"goto" { printf("goto\t"); }
"if" { printf("if\t"); }
"inline" { printf("inline\t"); }
"int" { printf("int\t"); }
"long" { printf("long\t"); }
"printf" { printf("printf\t"); }
"register" { printf("register\t"); }
"restrict" { printf("restrict\t"); }
"return" { printf("return\t"); }
"short" { printf("short\t"); }
"signed" { printf("signed\t"); }
"sizeof" { printf("sizeof\t"); }
"static" { printf("static\t"); }
"struct" { printf("struct\t"); }
"switch" { printf("switch\t"); }
"typedef" { printf("typedef\t"); }
"union" { printf("union\t"); }
"unsigned" { printf("unsigned\t"); }
"void" { printf("void\t"); }
"volatile" { printf("volatile\t"); }
"while" { printf("while\t"); }
"+=" { printf("+=\t"); }
"-=" { printf("-=\t"); }
"*=" { printf("*=\t"); }
"/=" { printf("/=\t"); }
"%=" { printf("%=\t"); }
"&=" { printf("&=\t"); }
"^=" { printf("^=\t"); }
"|=" { printf("|=\t"); }
"++" { printf("++\t"); }
"--" { printf("--\t"); }
"->" { printf("->\t"); }
"&&" { printf("&&\t"); }
"||" { printf("||\t"); }
"<=" { printf("<=\t"); }
">=" { printf(">=\t"); }
"==" { printf("==\t"); }
"!=" { printf("!=\t"); }
"{" { printf("{\t"); }
"}" { printf("}\t"); }
"=" { printf("=\t"); }
"(" { printf("(\t"); }
")" { printf(")\t"); }
"[" { printf("[\t"); }
"]" { printf("]\t"); }
"<" { printf("<\t"); }
">" { printf(">\t"); }
%%
void main(int argc, char** argv)
{
if(argc != 2)
{
printf("Usage: %s filename\n", argv[0]);
exit(1);
}
char *filename = argv[1];
FILE *f = fopen(filename, "r");
if(f == NULL)
{
fprintf(stderr, "Unable to open %s\n", filename);
}
else
{
yylex();
}
}