I'm trying to make a compiler for a small little language I designed. I'm trying to use Flex as the lexical analyzer generator and Bison as the parser generator. I've read the wikipedia page on Bison, and several posts on stackoverflow on getting Flex and Bison to work together nicely. For some reason, I'm still getting errors. Here's the Makefile:
CC = gcc
CFLAGS = -O2 -Wall -Wextra -Wpedantic -lfl
INFILES = main.c Parser.c Lexer.c
OUTFILE = language
default: Lexer.c Parser.c
$(CC) $(CFLAGS) $(INFILES) -o $(OUTFILE)
Lexer.c: funlang.l
flex funlang.l
Parser.c: funlang.y Lexer.c
bison -d -Wcounterexamples funlang.y
clean:
rm Lexer.* Parser.* language
main.c:
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include "eval.h"
int main(int argc, char *argv[]){
if(argc < 2){
printf("%s error: No input file supplied\n", argv[0]);
return 0;
}
if(argc > 3){
printf("%s error: Too many arguments supplied\n", argv[0]);
return 0;
}
FILE *file = fopen(argv[1], "r");
if(file == NULL){
printf("Error opening file %s\n", argv[1]);
return 0;
}
fseek(file, 0, SEEK_END);
size_t input_length = (size_t) ftell(file);
rewind(file);
size_t bytes_read = 0;
char *input = (char *) malloc(sizeof(char) * input_length);
bytes_read = fread(input, 1, input_length, file);
if(bytes_read != input_length){
puts("Error reading file");
return 0;
}
fclose(file);
/* I'm still not doing anything with the parser or lexer yet, because I've not gotten them to work */
return 0;
}
eval.h:
#include "Parser.h"
#include "Lexer.h"
funlang.l:
%option outfile="Lexer.c" header-file="Lexer.h"
%option bison-bridge bison-locations never-interactive reentrant
%option warn nodefault nounistd yylineno noinput nounput
%option noyywrap batch
%{
#include <stdint.h>
#include <string.h>
#include "Parser.h"
#include "Lexer.h"
size_t line_count = 1;
%}
int_literal ((-)?((0x)[0-9A-Fa-f]+|[0-9]+))
str_literal ("[A-Za-z0-9 \t!#-&(-/:-@[-_{-~]*")
whitespace [ \t\r]
identifier ([A-Za-z])([A-Za-z0-9]+)
%%
"func" {return FUNCTION_KEYWORD; }
"if" {return IF; }
"else" {return ELSE; }
"int" {return INT_KEYWORD }
"str" {return STR_KEYWORD; }
"bool" {return BOOL_KEYWORD; }
"true" {return BOOL_LITERAL; }
"false" {return BOOL_LITERAL; }
{int_literal} {yylval.val = (intmax_t) strtol(yytext, (char **) NULL, 0);
return INT_LITERAL; }
{str_literal} {return STR_LITERAL; }
"->" {return GIVES_TYPE; }
{identifier} {yylval.name = strdup(yytext);
return IDENTIFIER; }
\n {line_count++; }
{whitespace}
. {printf("Error, unrecognized char at line #%li", line_count);
return OTHER; }
%%
and funlang.y:
%define api.pure full
%locations
%param {yyscan_t scanner}
%code top{
#include <stdio.h>
#include <stdint.h>
}
%code requires{
typedef void* yyscan_t;
}
%code{
int yylex(YYSTYPE* yylvalp, YYLTYPE* yyllocp, yyscan_t scanner);
}
%{
#include "Parser.h"
%}
%token FUNCTION_KEYWORD IF ELSE INT_KEYWORD STR_KEYWORD BOOL_KEYWORD
%token BOOL_LITERAL STR_LITERAL GIVES_TYPE OTHER
%token <val> INT_LITERAL
%token <name> IDENTIFIER
%output "Parser.c"
%defines "Parser.h"
%union{
char name[16];
intmax_t val;
};
%%
function: FUNCTION_KEYWORD IDENTIFIER '(' parameters ')' GIVES_TYPE INT_KEYWORD '{' statement '}'
| FUNCTION_KEYWORD IDENTIFIER '(' parameters ')' GIVES_TYPE STR_KEYWORD '{' statement '}'
| FUNCTION_KEYWORD IDENTIFIER '(' parameters ')' GIVES_TYPE BOOL_KEYWORD '{' statement '}';
parameters: parameter
| %empty;
parameter: INT_KEYWORD IDENTIFIER
| parameter ',' INT_KEYWORD IDENTIFIER
| STR_KEYWORD IDENTIFIER
| parameter ',' STR_KEYWORD IDENTIFIER
| BOOL_KEYWORD IDENTIFIER
| parameter ',' BOOL_KEYWORD IDENTIFIER;
call_parameters: parameter
| %empty;
call_parameter: IDENTIFIER
| parameter ',' IDENTIFIER;
array_list: INT_LITERAL
| IDENTIFIER
| function_call
| indexing_expression
| STR_LITERAL
| BOOL_LITERAL
| array_list ',' IDENTIFIER
| array_list ',' function_call
| array_list ',' indexing_expression
| array_list ',' INT_LITERAL
| array_list ',' STR_LITERAL
| array_list ',' BOOL_LITERAL;
var_definition: INT_KEYWORD IDENTIFIER '=' INT_LITERAL ';'
| INT_KEYWORD IDENTIFIER '=' math_expression ';'
| INT_KEYWORD IDENTIFIER '=' function_call ';'
| INT_KEYWORD IDENTIFIER ';'
| INT_KEYWORD indexing_expression '=' array_list ';'
| INT_KEYWORD indexing_expression ';'
| STR_KEYWORD IDENTIFIER '=' STR_LITERAL ';'
| STR_KEYWORD IDENTIFIER '=' function_call ';'
| STR_KEYWORD IDENTIFIER ';'
| STR_KEYWORD indexing_expression '=' array_list ';'
| STR_KEYWORD indexing_expression ';'
| BOOL_KEYWORD IDENTIFIER '=' BOOL_LITERAL ';'
| BOOL_KEYWORD IDENTIFIER '=' function_call ';'
| BOOL_KEYWORD IDENTIFIER ';'
| BOOL_KEYWORD indexing_expression '=' array_list ';'
| BOOL_KEYWORD indexing_expression ';';
function_call: IDENTIFIER '(' call_parameters ')';
boolean_statement: '(' IDENTIFIER "==" IDENTIFIER ')'
| '(' IDENTIFIER ')'
| '(' indexing_expression ')'
| '(' boolean_statement ')'
| '(' boolean_statement "&&" boolean_statement ')'
| '(' boolean_statement "||" boolean_statement ')'
| '(' '!' boolean_statement ')'
| '(' function_call ')';
BINARY_OPERATOR: '+'| '-' | '*' | '/' | '&' | '|';
math_expression: '(' BINARY_OPERATOR INT_LITERAL INT_LITERAL ')'
| '(' BINARY_OPERATOR INT_LITERAL IDENTIFIER ')'
| '(' BINARY_OPERATOR IDENTIFIER INT_LITERAL ')'
| '(' BINARY_OPERATOR INT_LITERAL function_call ')'
| '(' BINARY_OPERATOR function_call INT_LITERAL ')'
| '(' BINARY_OPERATOR function_call function_call ')'
| '(' BINARY_OPERATOR INT_LITERAL math_expression ')'
| '(' BINARY_OPERATOR math_expression INT_LITERAL ')'
| '(' BINARY_OPERATOR math_expression IDENTIFIER ')'
| '(' BINARY_OPERATOR IDENTIFIER math_expression ')'
| '(' BINARY_OPERATOR math_expression function_call ')'
| '(' BINARY_OPERATOR function_call math_expression ')'
| '(' BINARY_OPERATOR math_expression math_expression ')';
indexing_expression: IDENTIFIER '[' INT_LITERAL ']'
| IDENTIFIER '[' IDENTIFIER ']';
statement: IF boolean_statement '{' statement '}'
| ELSE '{' statement '}'
| "return" function_call ';'
| "return" IDENTIFIER ';'
| "return" INT_LITERAL ';'
| "return" STR_LITERAL ';'
| "return" BOOL_LITERAL ';'
| var_definition;
%%
The big issue I'm having is that when compiling everything, there are several stages in which the C compiler says that 'yyin', 'yyout', 'yyleng' and several others are undeclared. E.g., Lexer.c:800:10: error: ‘yyin’ undeclared (first use in this function)
I'm sorry if I left out any important information, or if this is otherwise unclear. I will do my best to clarify any ambiguities. Thank you in advance