I am trying to make a transcompiler with Bison and Flex. This is my first time using these tools. My goal is to convert a very basic code from a simple custom language called "facile" to CIL. The problem I am facing is that when I run my transcompiler for the following test file, it throws me the error: Line 1: syntax error, unexpected end of file, expecting identifier
.
Here is the test file in "facile" language:
read a;
read b;
c := a+b;
print c;
Also, my transcompiler shows a segfault when I run it for an empty file.
Here's the language description:
Flex file facile.lex :
%{
#include <glib.h>
#include <assert.h>
#include "facile.y.h"
%}
%option yylineno
%%
if {
assert(printf("'if' found"));
return TOK_IF;
}
then {
assert(printf("'then' found"));
return TOK_THEN;
}
elsif {
assert(printf("'elsif' found"));
return TOK_ELSIF;
}
else {
assert(printf("'else' found"));
return TOK_ELSE;
}
read {
assert(printf("'read' found"));
return TOK_READ;
}
print {
assert(printf("'print' found"));
return TOK_PRINT;
}
while {
assert(printf("'while' found"));
return TOK_WHILE;
}
do {
assert(printf("'do' found"));
return TOK_DO;
}
end {
assert(printf("'end' found"));
return TOK_END;
}
endwhile {
assert(printf("'endwhile' found"));
return TOK_ENDWHILE;
}
continue {
assert(printf("'continue' found"));
return TOK_CONTINUE;
}
break {
assert(printf("'break' found"));
return TOK_BREAK;
}
";" {
assert(printf("';' found"));
return TOK_SEMI_COLON;
}
":=" {
assert(printf("':=' found"));
return TOK_AFFECTATION;
}
"+" {
assert(printf("'+' found"));
return TOK_ADD;
}
"-" {
assert(printf("'-' found"));
return TOK_SUB;
}
"*" {
assert(printf("'*' found"));
return TOK_MUL;
}
"/" {
assert(printf("'/' found"));
return TOK_DIV;
}
"(" {
assert(printf("'(' found"));
return TOK_OPEN_PAR;
}
")" {
assert(printf("')' found"));
return TOK_CLOSE_PAR;
}
true {
assert(printf("'true' found"));
return TOK_TRUE;
}
false {
assert(printf("'false' found"));
return TOK_FALSE;
}
">=" {
assert(printf("'>=' found"));
return TOK_SUP_EQ;
}
"<=" {
assert(printf("'<=' found"));
return TOK_INF_EQ;
}
">" {
assert(printf("'>' found"));
return TOK_SUP;
}
"<" {
assert(printf("'<' found"));
return TOK_INF;
}
"=" {
assert(printf("'=' found"));
return TOK_EQ;
}
"#" {
assert(printf("'#' found"));
return TOK_DIFF;
}
not {
assert(printf("'not' found"));
return TOK_NOT;
}
and {
assert(printf("'and' found"));
return TOK_AND;
}
or {
assert(printf("'or' found"));
return TOK_OR;
}
[0-9]+ {
assert(printf("number '%s(%d)' found", yytext, yyleng));
sscanf(yytext, "%lu", &yylval.number);
return TOK_NUMBER;
}
[a-zA-Z][a-zA-Z0-9_]* {
assert(printf("indentifier '%s(%d)' found", yytext, yyleng));
yylval.string = strdup(yytext);
return TOK_IDENTIFIER;
}
[ \t\r\n]*;
. {
return 0;
}
%%
/*
* file: facile.lex
* version: 0.8.0
*/
Bison file facile.y :
%{
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <glib.h>
extern int yylex(void);
extern int yyerror(const char *msg);
extern int yylineno;
extern FILE *stdin;
GHashTable *table;
void begin_code();
void produce_code(GNode *node);
void end_code();
%}
%union {
gulong number;
gchar *string;
GNode *node;
}
%define parse.error verbose
%token<number> TOK_NUMBER "number"
%token<string> TOK_IDENTIFIER "identifier"
%token TOK_IF "if"
%token TOK_THEN "then"
%token TOK_SEMI_COLON ";"
%token TOK_AFFECTATION ":="
%left TOK_ADD "+"
%left TOK_SUB "-"
%left TOK_MUL "*"
%left TOK_DIV "/"
%left TOK_SUP_EQ ">="
%left TOK_INF_EQ "<="
%left TOK_SUP ">"
%left TOK_INF "<"
%left TOK_EQ "="
%left TOK_DIFF "#"
%left TOK_NOT "not"
%left TOK_AND "and"
%left TOK_OR "or"
%token TOK_ELSIF "elsif"
%token TOK_ELSE "else"
%token TOK_READ "read"
%token TOK_PRINT "print"
%token TOK_WHILE "while"
%token TOK_DO "do"
%token TOK_END "end"
%token TOK_ENDWHILE "endwhile"
%token TOK_CONTINUE "continue"
%token TOK_BREAK "break"
%token TOK_OPEN_PAR "("
%token TOK_CLOSE_PAR ")"
%token TOK_TRUE "true"
%token TOK_FALSE "false"
%type<node> code
%type<node> expression
%type<node> instruction
%type<node> identifier
%type<node> print
%type<node> read
%type<node> affectation
%type<node> number
%type<node> boolean
%type<node> elsif
%type<node> else
%type<node> if
%type<node> code_while
%type<node> while
%type<node> program
%%
program:
code {
begin_code();
produce_code($1);
end_code();
g_node_destroy($1);
};
code:
code instruction{
$$ = g_node_new("code");
g_node_append($$, $1);
g_node_append($$, $2);
}
|
{
$$ = g_node_new("");
} ;
instruction:
read |
print |
affectation |
if |
while ;
read:
TOK_READ identifier TOK_SEMI_COLON
{
$$ = g_node_new("read");
g_node_append($$, $2);
};
print:
TOK_PRINT expression TOK_SEMI_COLON
{
$$ = g_node_new("print");
g_node_append($$, $2);
};
affectation:
identifier TOK_AFFECTATION expression TOK_SEMI_COLON
{
$$ = g_node_new("affectation");
g_node_append($$, $1);
g_node_append($$, $3);
};
boolean:
TOK_TRUE |
TOK_FALSE |
expression TOK_SUP_EQ expression |
expression TOK_INF_EQ expression |
expression TOK_SUP expression |
expression TOK_INF expression |
expression TOK_EQ expression |
expression TOK_DIFF expression |
TOK_NOT boolean |
boolean TOK_AND boolean |
boolean TOK_OR boolean |
TOK_OPEN_PAR boolean TOK_CLOSE_PAR;
elsif: TOK_ELSIF boolean TOK_THEN code |
TOK_ELSIF boolean TOK_THEN code elsif;
else:
TOK_ELSE code;
if:
TOK_IF boolean TOK_THEN code TOK_END |
TOK_IF boolean TOK_THEN code else TOK_END |
TOK_IF boolean TOK_THEN code elsif TOK_END |
TOK_IF boolean TOK_THEN code elsif else TOK_END;
code_while:
instruction |
TOK_CONTINUE |
TOK_BREAK;
while: TOK_WHILE boolean TOK_DO code_while TOK_ENDWHILE |
TOK_WHILE boolean TOK_DO code_while TOK_END;
expression :
identifier |
number |
expression TOK_ADD expression
{
$$ = g_node_new("add");
g_node_append($$, $1);
g_node_append($$, $3);
}
|
expression TOK_SUB expression
{
$$ = g_node_new("sub");
g_node_append($$, $1);
g_node_append($$, $3);
}
|
expression TOK_MUL expression
{
$$ = g_node_new("mul");
g_node_append($$, $1);
g_node_append($$, $3);
}
|
expression TOK_DIV expression
{
$$ = g_node_new("div");
g_node_append($$, $1);
g_node_append($$, $3);
}
|
TOK_OPEN_PAR expression TOK_CLOSE_PAR{
$$ = $2;
};
identifier:
TOK_IDENTIFIER
{
$$ = g_node_new("identifier");
gulong value = (gulong) g_hash_table_lookup(table, $1);
if (!value) {
value = g_hash_table_size(table) + 1;
g_hash_table_insert(table, strdup($1), (gpointer) value);
}
g_node_append_data($$, (gpointer)value);
};
number:
TOK_NUMBER
{
$$ = g_node_new("number");
g_node_append_data($$, (gpointer)$1);
};
%%
/*
* file: facile.y
* version: 0.8.0
*/
int yyerror(const char *msg) {
fprintf(stderr, "Line %d: %s\n", yylineno, msg);
}
void begin_code()
{
FILE *stream;
char *module_name;
int max_stack;
fprintf(stream,
".assembly %s {}\n"
".method public static void Main() cil managed\n"
"{\n"
" .entrypoint\n"
" .maxstack %u\n"
" .locals init (",
module_name,
max_stack
);
guint size = g_hash_table_size(table);
guint i;
for (i = 0; i < size; i++) {
if (i) {
fprintf(stream, ", ");
}
fprintf(stream, "int32");
}
fprintf(stream, ")\n");
}
int main(int argc, char *argv[]) {
if(argc == 2){
char *file_name_input = argv[1];
char *extension;
char *directory_limiter;
char *basename;
FILE *stream;
char *module_name;
extension = rindex(file_name_input, '.');
if(!extension || strcmp(extension, ".facile") != 0) {
fprintf(stderr, "Input filename extension must be '.facile'\n");
return EXIT_FAILURE;
}
directory_limiter = rindex(file_name_input, '/');
if (!directory_limiter) {
directory_limiter = rindex(file_name_input, '\\');
}
if (directory_limiter) {
basename = strdup(directory_limiter + 1);
} else {
basename = strdup(file_name_input);
}
module_name = strdup(basename);
*rindex(module_name, '.') = '\0';
strcpy(rindex(basename, '.'), ".il");
char *onechar = module_name;
if (!isalpha(*onechar) && *onechar != '_'){
free(basename);
fprintf(stderr, "Base input filename must start with a letter or an underscore");
return EXIT_FAILURE;
}
onechar++;
while (*onechar) {
if (!isalnum(*onechar) && *onechar != '_') {
free(basename);
fprintf(stderr, "Base input filename cannot contain special characters");
return EXIT_FAILURE;
}
onechar++;
}
if (stdin = fopen(file_name_input, "r")) {
if(stream = fopen(basename, "w")){
table = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
yyparse();
g_hash_table_destroy(table);
fclose(stream);
fclose(stdin);
} else {
free(basename);
fclose(stdin);
fprintf(stderr, "Output filename cannot be opened\n");
return EXIT_FAILURE;
}
} else {
free(basename);
fprintf(stderr, "Input filename cannot be opened\n");
return EXIT_FAILURE;
}
free(basename);
} else {
fprintf(stderr, "No input filename given\n");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
void produce_code(GNode *node) {
FILE *stream;
char *module_name;
if (node->data == "code"){
produce_code(g_node_nth_child(node, 0));
produce_code(g_node_nth_child(node, 1));
} else if (node->data == "affectation") {
produce_code(g_node_nth_child(node, 1));
fprintf(stream, "stloc\t%ld\n", (long)g_node_nth_child(g_node_nth_child(node, 0), 0)->data - 1);
} else if (node->data == "add") {
produce_code(g_node_nth_child(node, 0));
produce_code(g_node_nth_child(node, 1));
fprintf(stream, "add\n");
} else if (node->data == "sub"){
produce_code(g_node_nth_child(node, 0));
produce_code(g_node_nth_child(node, 1));
fprintf(stream, "sub\n");
} else if (node->data == "mul"){
produce_code(g_node_nth_child(node, 0));
produce_code(g_node_nth_child(node, 1));
fprintf(stream, "mul\n");
} else if (node->data == "div"){
produce_code(g_node_nth_child(node, 0));
produce_code(g_node_nth_child(node, 1));
fprintf(stream, "div\n");
} else if (node->data == "number") {
fprintf(stream, "ldc.i4\t%ld\n", (long)g_node_nth_child(node, 0)->data);
} else if (node->data == "identifier"){
fprintf(stream, "ldloc\t%ld\n", (long)g_node_nth_child(node, 0)->data - 1);
} else if (node->data == "print"){
produce_code(g_node_nth_child(node, 0));
fprintf(stream, "call void class [mscorlib]System.Console::WriteLine(int32)\n");
} else if (node->data == "read") {
fprintf(stream, "call string class [mscorlib]System.Console::ReadLine()\n");
fprintf(stream, "call int32 class [mscorlib]System.Int32::Parse(string)\n");
fprintf(stream, "stloc\t%ld\n", (long)g_node_nth_child(g_node_nth_child(node, 0), 0)->data - 1);
}
}
void end_code()
{
FILE *stream;
fprintf(stream, " ret\n}\n");
}
I don't know what to do, I tried simplifying the language just to test but I couldn't manage to make it work.
Your help would be very appreciated!
Thanks!