0

I'm new to flex and bison. I want to write a compiler that read C program and translate it to my processor commands that are similar to assembly. I downloaded a pre-written compiler that uses flex and bison. I should change the scanner.l and parser.y as it can process the asm commands that are inside my C code like asm [asm command1 \n asm command2 \n asm command3 \n ...]. Which definitions and rules should I add to these two files?

scanner.l:

%{
#include "scanner.h"
#include "y.tab.h"
#include <stdio.h>
#include <stdlib.h>
#define MAX_STR_CONST 1000
char string_buf[MAX_STR_CONST];
char *string_buf_ptr;
int line_num = 1;
int line_pos = 1;

void updatePosition();
#define YY_USER_ACTION updatePosition();

%}

NUMBER  (0)|([1-9][0-9]*)
HEXNUM  ((0x)|(0X))([a-fA-F0-9]+)
IDENT   [a-zA-Z_][a-zA-Z0-9_]*

%x comment
%x str


%option noyywrap
%option yylineno
%option nounput

%%

\"      string_buf_ptr = string_buf; BEGIN(str);
<str>{
\"      { /* saw closing quote - all done */
            BEGIN(INITIAL);
            *string_buf_ptr = '\0';
            /* return string constant token type and
            * value to parser
            */
            yylval.strConst = new std::string(string_buf);
            return T_STR_CONST;
        }

\n      {
            /* error - unterminated string constant */
            /* generate error message */
            yyerror("Unterminated string constant.");
        }

<<EOF>> { return T_UNTERM_STRING; }

\\[0-7]{1,3} {
        /* octal escape sequence */
        int result;
        
        (void) sscanf( yytext + 1, "%o", &result );

        if ( result > 0xff )
                /* error, constant is out-of-bounds */

        *string_buf_ptr++ = result;
        }

\\[0-9]+ {
        /* generate error - bad escape sequence; something
        * like '\48' or '\0777777'
        */
        yyerror("Bad string escape sequence.");
        }

\\n         *string_buf_ptr++ = '\n';
\\t         *string_buf_ptr++ = '\t';
\\r         *string_buf_ptr++ = '\r';
\\b         *string_buf_ptr++ = '\b';
\\f         *string_buf_ptr++ = '\f';

\\(.|\n)    *string_buf_ptr++ = yytext[1];

[^\\\n\"]+  {
            char *yptr = yytext;
            
            while ( *yptr )
                    *string_buf_ptr++ = *yptr++;
            }
}

"/*"            BEGIN(comment);
<comment>{
[^*\n]*        /* eat anything that's not a '*' */
"*"+[^*/\n]*   /* eat up '*'s not followed by '/'s */
\n             
<<EOF>>         { return T_UNTERM_COMMENT; }
"*"+"/"        BEGIN(INITIAL);
}




"do"            { return T_DO; }
"while"         { return T_WHILE; }
"for"           { return T_FOR; }
"if"            { return T_IF; }
"else"          { return T_ELSE; }
"int"           { return T_INT_TYPE; }
"string"        { return T_STRING_TYPE; }
"void"          { return T_VOID_TYPE; }
"struct"        { return T_STRUCT; }
"return"        { return T_RETURN; }
"switch"        { return T_SWITCH; }
"case"          { return T_CASE; }
"default"       { return T_DEFAULT; }
"break"         { return T_BREAK; }
"continue"      { return T_CONTINUE; }
"sizeof"        { return T_SIZEOF; }

"{"             { return '{'; }
"}"             { return '}'; }
"("             { return '('; }
")"             { return ')'; }
"["             { return '['; }
"]"             { return ']'; }
"+"             { return '+'; }
"-"             { return '-'; }
"*"             { return '*'; }
"/"             { return '/'; }
"%"             { return '%'; }
"="             { return '='; }
">"             { return '>'; }
"<"             { return '<'; }
"!"             { return '!'; }
"|"             { return '|'; }
"&"             { return '&'; }
"^"             { return '^'; }
"~"             { return '~'; }
"."             { return '.'; }
":"             { return ':'; }
";"             { return ';'; }
","             { return ','; }

"<<"            { return T_LEFT_SHIFT; }
">>"            { return T_RIGHT_SHIFT; }
"&&"            { return T_BOOL_AND; }
"||"            { return T_BOOL_OR; }
"+="            { return T_PLUS_EQUALS; }
"-="            { return T_MINUS_EQUALS; }
"*="            { return T_STAR_EQUALS; }
"/="            { return T_DIV_EQUALS; }
"%="            { return T_MOD_EQUALS; }
"=="            { return T_EQUAL; }
"<="            { return T_LESS_OR_EQUAL; }
">="            { return T_GREATER_OR_EQUAL; }
"!="            { return T_NOT_EQUAL; }
"|="            { return T_BIT_OR_EQUALS; }
"&="            { return T_BIT_AND_EQUALS; }
"^="            { return T_BIT_XOR_EQUALS; }
"~="            { return T_BIT_NOT_EQUALS; }
"->"            { return T_ARROW; }
"<<="           { return T_LEFT_SHIFT_EQUALS; }
">>="           { return T_RIGHT_SHIFT_EQUALS; }
"++"            { return T_PLUS_PLUS; }
"--"            { return T_MINUS_MINUS; }

" "|"\t"|"\r"|"\n"|"const"  {}
{HEXNUM}        { yylval.intConst = std::strtoul(yytext, NULL, 0); return T_INT_CONST; }
{NUMBER}        { yylval.intConst = atoi(yytext); return T_INT_CONST; }
{IDENT}         { yylval.ident = new std::string(yytext); return T_IDENT; }
.               {{ char err[] = "Unknown Character: a"; err[strlen(err)-1] = *yytext; yyerror(err); }}

%%

/**
 * This function is called on every token, and updates the yylloc global variable, which stores the
 * location/position of the current token.
 */
void updatePosition() {
    yylloc.first_line = line_num;
    yylloc.first_column = line_pos;
    char* text = yytext;
    while(*text != '\0') {
        if(*text == '\n') {
            line_num++;
            line_pos = 1;
        } else {
            line_pos++;
        }
        text++;
    }
    yylloc.last_line = line_num;
    yylloc.last_column = line_pos;
}

parser.y:

%code requires {

#include "Declaration.h"
#include "Expression.h"
#include "Statement.h"
#include "Type.h"
#include "Parser.h"
#include "Util.h"

extern Program* program_out;

}

%locations
%define parse.lac full
%error-verbose

%{
#include "Parser.h"
#include "scanner.h"
#include <string>
#include <iostream>
#include "Type.h"

%}

//%parse-param {Program*& out}

%union {
    char* cstr;
    std::string* ident;
    std::string* strConst;
    unsigned int intConst;
    Type* type;
    std::vector<Declaration*>* declareList;
    Declaration* declare;
    ConstantExpression* constant;
    std::vector<FunctionParameter*>* paramList;
    FunctionParameter* param;
    std::vector<StructMember*>* structMemberList;
    StructMember* structMember;
    StatementBlock* statementBlock;
    Statement* statement;
    std::vector<Statement*>* statementList;
    Expression* expression;
    std::vector<Expression*>* expressionList;
}

%type <type> type
%type <cstr> root
%type <declareList> root_declare_list
%type <declare> root_declare
%type <constant> constant
%type <paramList> param_list non_empty_param_list
%type <param> param
%type <structMemberList> struct_list
%type <structMember> struct_member;
%type <statementBlock> statement_block
%type <statementList> statement_list
%type <statement> statement
%type <expression> expression
%type <expressionList> argument_list non_empty_argument_list

%token <ident> T_IDENT
%token <strConst> T_STR_CONST
%token <intConst> T_INT_CONST
%token T_IF T_ELSE T_FOR T_WHILE T_DO T_SIZEOF 
%token T_INT_TYPE T_STRING_TYPE T_VOID_TYPE T_STRUCT
%token T_RETURN T_SWITCH T_CASE T_DEFAULT T_BREAK T_CONTINUE
%token T_BOOL_OR T_BOOL_AND
%token T_LEFT_SHIFT T_RIGHT_SHIFT T_PLUS_EQUALS T_MINUS_EQUALS 
%token T_STAR_EQUALS T_DIV_EQUALS T_MOD_EQUALS T_EQUAL 
%token T_LESS_OR_EQUAL T_GREATER_OR_EQUAL T_NOT_EQUAL
%token T_BIT_OR_EQUALS T_BIT_AND_EQUALS T_BIT_XOR_EQUALS
%token T_BIT_NOT_EQUALS T_ARROW T_LEFT_SHIFT_EQUALS
%token T_RIGHT_SHIFT_EQUALS T_PLUS_PLUS T_MINUS_MINUS
%token T_UNTERM_STRING T_UNTERM_COMMENT

/* tokens for precedence */
%token PREC_ADDRESS PREC_DEREFERENCE PREC_UNARY_MINUS PREC_UNARY_PLUS
%token PREC_SUFFIX_PLUS_PLUS PREC_SUFFIX_MINUS_MINUS
%token PREC_PREFIX_PLUS_PLUS PREC_PREFIX_MINUS_MINUS
%token PREC_APPLICATION

/* lowest precedence */
%left ','
%right T_BIT_AND_EQUALS T_BIT_XOR_EQUALS T_BIT_OR_EQUALS
%right T_LEFT_SHIFT_EQUALS T_RIGHT_SHIFT_EQUALS
%right T_STAR_EQUALS T_DIV_EQUALS T_MOD_EQUALS
%right T_PLUS_EQUALS T_MINUS_EQUALS
%right '='
%left T_BOOL_OR
%left T_BOOL_AND
%left '|'
%left '^'
%left '&'
%left T_EQUAL T_NOT_EQUAL
%left '>' T_GREATER_OR_EQUAL
%left '<' T_LESS_OR_EQUAL
%left T_LEFT_SHIFT T_RIGHT_SHIFT
%left '+' '-'
%left '*' '/' '%'
%right PREC_ADDRESS
%right PREC_DEREFERENCE
%right '!' '~'
%right PREC_UNARY_PLUS PREC_UNARY_MINUS
%right PREC_PREFIX_PLUS_PLUS PREC_PREFIX_MINUS_MINUS
%right T_PLUS_PLUS T_MINUS_MINUS
%left T_ARROW
%left '.'
%left '['
%left PREC_APPLICATION
%left PREC_SUFFIX_PLUS_PLUS PREC_SUFFIX_MINUS_MINUS
%nonassoc T_IF
%nonassoc T_ELSE
/* highest precedence */



%%

root:
        root_declare_list                                       { $$ = NULL; program_out = new Program(@$, *$1); delete $1; }
    ;

root_declare_list:
        root_declare_list root_declare                          { $$ = $1; $1->push_back($2); }
    |                                                           { $$ = new std::vector<Declaration*>(); }
    ;

root_declare:
        type T_IDENT '(' param_list ')' ';'                     { $$ = new FunctionPrototype(@$, $1, *$2, *$4); delete $2; delete $4; }
    |   type T_IDENT '(' param_list ')' statement_block         { $$ = new FunctionDeclaration(@$, $1, *$2, *$4, $6); delete $2; delete $4; }
    |   type T_IDENT ';'                                        { $$ = new GlobalVarDeclaration(@$, $1, *$2); delete $2; }
    |   type T_IDENT '[' T_INT_CONST ']' ';'                    { $$ = new GlobalArrayDeclaration(@$, $1, *$2, $4); delete $2; }
    |   type T_IDENT '=' constant ';'                           { $$ = new GlobalVarDeclarationInit(@$, $1, *$2, $4); delete $2; }
    |   T_STRUCT T_IDENT '{' struct_list '}' ';'                { $$ = new StructDeclaration(@$, *$2, *$4); delete $2; delete $4; }
    |   T_STRUCT T_IDENT ';'                                    { $$ = new StructPredeclaration(@$, *$2); delete $2; }
    ;

constant:
        T_INT_CONST                                             { $$ = new IntConstantExpression(@$, $1); }
    |   T_STR_CONST                                             { $$ = new StringConstantExpression(@$, *$1); delete $1; }
    ;

param_list:
        non_empty_param_list                                    { $$ = $1; }
    |                                                           { $$ = new std::vector<FunctionParameter*>(); }
    ;

non_empty_param_list:
        non_empty_param_list ',' param                          { $$ = $1; $1->push_back($3); }
    |   param                                                   { $$ = new std::vector<FunctionParameter*>({$1}); }
    ;

param:
        type T_IDENT                                            { $$ = new FunctionParameter(@$, $1, *$2); delete $2; }
    ;

struct_list:
        struct_list struct_member ';'                           { $$ = $1; $1->push_back($2); }
    |                                                           { $$ = new std::vector<StructMember*>(); }
    ;

struct_member:
        type T_IDENT                                            { $$ = new StructMember(@$, $1, *$2); delete $2; }
    ;

type:
        type '*'                                                { $$ = new PointerType($1); }
    |   T_STRUCT T_IDENT                                        { $$ = new StructType(*$2); delete $2; }
    |   T_INT_TYPE                                              { $$ = new IntType(); }
    |   T_VOID_TYPE                                             { $$ = new VoidType(); }
    |   T_STRING_TYPE                                           { $$ = new StringType(); }
    ;

statement_block:
        '{' statement_list '}'                                  { $$ = new StatementBlock(@$, *$2); delete $2; }
    ;

statement_list:
        statement_list statement                                { $$ = $1; $1->push_back($2); }
    |                                                           { $$ = new std::vector<Statement*>(); }
    ;

statement:
        expression ';'                                          { $$ = $1; }
    |   type T_IDENT ';'                                        { $$ = new VarDeclaration(@$, $1, *$2); delete $2; }
    |   type T_IDENT '=' expression ';'                         { $$ = new VarDeclarationInit(@$, $1, *$2, $4); delete $2; }
    |   type T_IDENT '[' T_INT_CONST ']' ';'                    { $$ = new ArrayDeclaration(@$, $1, *$2, $4); delete $2; }
    |   T_WHILE '(' expression ')' statement                    { $$ = new WhileStatement(@$, $3, $5); }
    |   T_DO statement T_WHILE '(' expression ')' ';'           { $$ = new DoWhileStatement(@$, $2, $5); }
    |   T_FOR '(' expression ';' expression ';' expression ')' statement { $$ = new ForStatement(@$, $3, $5, $7, $9); }
    |   statement_block                                         { $$ = $1; }
    |   T_IF '(' expression ')' statement %prec T_IF            { $$ = new IfStatement(@$, $3, $5); }
    |   T_IF '(' expression ')' statement T_ELSE statement      { $$ = new IfElseStatement(@$, $3, $5, $7); }
    |   T_BREAK ';'                                             { $$ = new BreakStatement(@$); }
    |   T_CONTINUE ';'                                          { $$ = new ContinueStatement(@$); }
    |   T_SWITCH '(' expression ')' '{' statement_list '}'      { $$ = new SwitchStatement(@$, $3, *$6); delete $6; }
    |   T_CASE T_INT_CONST ':'                                  { $$ = new CaseStatement(@$, $2); }
    |   T_DEFAULT ':'                                           { $$ = new DefaultStatement(@$); }
    |   T_RETURN expression ';'                                 { $$ = new ReturnStatement(@$, $2); }
    ;

expression:
        expression '=' expression                               { $$ = new AssignExpression(@$, $1, $3); }
    |   expression T_PLUS_EQUALS expression                     { $$ = new AssignExpression(@$, $1, new BinaryOperatorExpression(@3, $1->clone(), "+", $3)); }
    |   expression T_MINUS_EQUALS expression                    { $$ = new AssignExpression(@$, $1, new BinaryOperatorExpression(@3, $1->clone(), "-", $3)); }
    |   expression T_STAR_EQUALS expression                     { $$ = new AssignExpression(@$, $1, new BinaryOperatorExpression(@3, $1->clone(), "*", $3)); }
    |   expression T_DIV_EQUALS expression                      { $$ = new AssignExpression(@$, $1, new BinaryOperatorExpression(@3, $1->clone(), "/", $3)); }
    |   expression T_MOD_EQUALS expression                      { $$ = new AssignExpression(@$, $1, new BinaryOperatorExpression(@3, $1->clone(), "%", $3)); }
    |   expression T_BIT_AND_EQUALS expression                  { $$ = new AssignExpression(@$, $1, new BinaryOperatorExpression(@3, $1->clone(), "&", $3)); }
    |   expression T_BIT_OR_EQUALS expression                   { $$ = new AssignExpression(@$, $1, new BinaryOperatorExpression(@3, $1->clone(), "|", $3)); }
    |   expression T_BIT_XOR_EQUALS expression                  { $$ = new AssignExpression(@$, $1, new BinaryOperatorExpression(@3, $1->clone(), "^", $3)); }
    |   expression T_LEFT_SHIFT_EQUALS expression               { $$ = new AssignExpression(@$, $1, new BinaryOperatorExpression(@3, $1->clone(), "<<", $3)); }
    |   expression T_RIGHT_SHIFT_EQUALS expression              { $$ = new AssignExpression(@$, $1, new BinaryOperatorExpression(@3, $1->clone(), ">>", $3)); }
    |   expression T_PLUS_PLUS %prec PREC_SUFFIX_PLUS_PLUS      { $$ = new UnaryAssignExpression(@$, $1, "++"); }
    |   T_PLUS_PLUS expression %prec PREC_PREFIX_PLUS_PLUS      { $$ = new UnaryAssignExpression(@$, "++", $2); }
    |   expression T_MINUS_MINUS %prec PREC_SUFFIX_MINUS_MINUS  { $$ = new UnaryAssignExpression(@$, $1, "--"); }
    |   T_MINUS_MINUS expression %prec PREC_PREFIX_MINUS_MINUS  { $$ = new UnaryAssignExpression(@$, "--", $2); }
    |   constant                                                { $$ = $1; }
    |   '(' expression ')'                                      { $$ = $2; }
    |   T_IDENT '(' argument_list ')' %prec PREC_APPLICATION    { $$ = new FunctionCallExpression(@$, *$1, *$3); delete $1; delete $3; }
    |   T_SIZEOF '(' type ')'                                   { $$ = new SizeofExpression(@$, $3); }
    |   '!' expression                                          { $$ = new UnaryOperatorExpression(@$, "!", $2); }
    |   '~' expression                                          { $$ = new UnaryOperatorExpression(@$, "~", $2); }
    |   '+' expression %prec PREC_UNARY_PLUS                    { $$ = new UnaryOperatorExpression(@$, "+", $2); }
    |   '-' expression %prec PREC_UNARY_MINUS                   { $$ = new UnaryOperatorExpression(@$, "-", $2); }
    |   '*' expression %prec PREC_DEREFERENCE                   { $$ = new ArraySubscriptExpression(@$, $2, new IntConstantExpression(@2, 0)); }
    |   '&' expression %prec PREC_ADDRESS                       { $$ = new UnaryOperatorExpression(@$, "&", $2); }
    |   expression '+' expression                               { $$ = new BinaryOperatorExpression(@$, $1, "+", $3); }
    |   expression '-' expression                               { $$ = new BinaryOperatorExpression(@$, $1, "-", $3); }
    |   expression '*' expression                               { $$ = new BinaryOperatorExpression(@$, $1, "*", $3); }
    |   expression '/' expression                               { $$ = new BinaryOperatorExpression(@$, $1, "/", $3); }
    |   expression '%' expression                               { $$ = new BinaryOperatorExpression(@$, $1, "%", $3); }
    |   expression '&' expression                               { $$ = new BinaryOperatorExpression(@$, $1, "&", $3); }
    |   expression '|' expression                               { $$ = new BinaryOperatorExpression(@$, $1, "|", $3); }
    |   expression '^' expression                               { $$ = new BinaryOperatorExpression(@$, $1, "^", $3); }
    |   expression T_BOOL_AND expression                        { $$ = new BinaryOperatorExpression(@$, $1, "&&", $3); }
    |   expression T_BOOL_OR expression                         { $$ = new BinaryOperatorExpression(@$, $1, "||", $3); }
    |   expression T_LEFT_SHIFT expression                      { $$ = new BinaryOperatorExpression(@$, $1, "<<", $3); }
    |   expression T_RIGHT_SHIFT expression                     { $$ = new BinaryOperatorExpression(@$, $1, ">>", $3); }
    |   expression T_EQUAL expression                           { $$ = new BinaryOperatorConditionExpression(@$, $1, "==", $3); }
    |   expression T_NOT_EQUAL expression                       { $$ = new BinaryOperatorConditionExpression(@$, $1, "!=", $3); }
    |   expression '<' expression                               { $$ = new BinaryOperatorConditionExpression(@$, $1, "<", $3); }
    |   expression '>' expression                               { $$ = new BinaryOperatorConditionExpression(@$, $1, ">", $3); }
    |   expression T_LESS_OR_EQUAL expression                   { $$ = new BinaryOperatorConditionExpression(@$, $1, "<=", $3); }
    |   expression T_GREATER_OR_EQUAL expression                { $$ = new BinaryOperatorConditionExpression(@$, $1, ">=", $3); }
    |   T_IDENT                                                 { $$ = new VarExpression(@$, *$1); delete $1; }
    |   expression '.' T_IDENT                                  { $$ = new StructMemberExpression(@$, $1, *$3); delete $3; }
    |   expression T_ARROW T_IDENT                              { $$ = new StructMemberExpression(@$, $1, *$3); delete $3; }
    |   expression '[' expression ']'                           { $$ = new ArraySubscriptExpression(@$, $1, $3); }
    ;

argument_list:
        non_empty_argument_list                                 { $$ = $1; }
    |                                                           { $$ = new std::vector<Expression*>(); }
    ;

non_empty_argument_list:
        non_empty_argument_list ',' expression                  { $$ = $1; $1->push_back($3); }
    |   expression                                              { $$ = new std::vector<Expression*>({$1}); }
    ;
zahrak
  • 1
  • 2
  • It cannot be said in general. Please provide the relevant code. – fuz Feb 05 '21 at 18:04
  • Thanks @fuz for your attention. I add content of scanner.l and parser.y to the question. – zahrak Feb 05 '21 at 18:34
  • 1
    You probably want to add a new token for the `asm` keyword as well as a new rule for the `statement` production for `asm` statements. – fuz Feb 05 '21 at 18:36
  • Yes exactly. As there is \n between assembly commands inside the asm block, how should I define these rules in scanner.l and parser.y? – zahrak Feb 05 '21 at 18:38
  • You have to be a bit more exact to what you want. I see one token `asm` but the `commandn` thing is a bit vague. What is a command? – Martin York Feb 05 '21 at 18:53
  • @zahrak I'm not sure. Can you give an example for your inline asm syntax? – fuz Feb 05 '21 at 19:20
  • I mean I have a block of assembly commands for example inside my C program I have something like: asm[ INC count ADD A,B JMP label ] – zahrak Feb 06 '21 at 08:23
  • There are \n between INC, ADD and JMP commands and I want the scanner and parser read them line by line to pass them to another process. – zahrak Feb 06 '21 at 08:31

0 Answers0