0

So i'm using a flex/bison parser but the variable names arent printing correctly. It understands the number values. I've tried messing with everything but I'm lost. heres a link to the output. its where it prints "Data: 0" that i'm trying to get the variable name [https://imgur.com/vJDpgpR][1]

invocation is: ./frontEnd data.txt

//main.c
#define BUF_SIZE 1024
#include <stdio.h>
#include <stdlib.h>
#include <string.h>


extern FILE* yyin;
extern yyparse();

int main(int argc, char* argv[]){

    if(argc < 2){
        FILE* fp = fopen("temp.txt", "a");

        printf("Entering data: \n");


        void *content = malloc(BUF_SIZE);

        if (fp == 0)
            printf("error opening file");

        int read;
        while ((read = fread(content, BUF_SIZE, 1, stdin))){
            fwrite(content, read, 1, fp);
        }
        if (ferror(stdin))
            printf("There was an error reading from stdin");
        fclose(fp);

        yyparse(fp);
    }

    if(argc == 2){

        yyin = fopen(argv[2], "r");

        if(!yyin)
        {

            perror(argv[2]);
            printf("ERROR: file does not exist.\n");
            return 0;

        }

        yyparse (yyin);
    }
    return 0;
}
void yyerror(char *s){
    fprintf(stderr, "error: exiting %s \n", s);
}




//lex.l
%{

#include <stdio.h>
#include <stdlib.h>
#include "parser.tab.h"


extern SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
extern int curSymSize;

%}
%option noyywrap
%option nounput yylineno

%%

"stop" return STOP;
"iter" return ITER;



"scanf"     return SCANF;
"printf"    return PRINTF;
"main"      return MAIN;
"if"        return IF;
"then"      return THEN;
"let"       return LET;

"func"      return FUNC;
"//"        return COMMENT; printf("\n");
"start"     return START;
"="         return ASSIGN;
"=<"        return LE;
"=>"        return GE;
":"         return COLON;
"+"         return PLUS;
"-"         return MINUS;
"*"         return MULT;
"/"         return DIV;
"%"         return MOD;
"."         return DOT;
"("         return RPAREN;
")"         return LPAREN;
","         return COMMA;
"{"         return RBRACE;
"}"         return LBRACE;
";"         return SEMICOLON;
"["         return LBRACK;
"]"         return RBRACK;
"=="        return EQUAL;


[A-Z][a-z]*             { printf("SYNTAX ERROR: Identifiers must start with lower case. "); }
[a-zA-Z][_a-zA-Z0-9]*   {
                            printf("string: %s \n", yytext);
                            yylval.iVal = strdup(yytext);
                            yylval.iVal = addSymbol(yytext);
                            return ID;
                        }
[0-9]+                  {
                            yylval.iVal = atoi(yytext);
                            printf("num: %s \n", yytext);
                            return NUMBER; }
[ _\t\r\s\n]            ;
^"#".+$                 return COMMENT;

.                       {printf("ERROR: Invalid Character "); yyterminate();}
<<EOF>>                 { printf("EOF: line %d\n", yylineno); yyterminate(); }
%%

// stores all variable id is in an array

SYMTABNODEPTR newSymTabNode()
{
    return ((SYMTABNODEPTR)malloc(sizeof(SYMTABNODE)));
}

int addSymbol(char *s)
{
    extern SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
    extern int curSymSize;
    int i;
    i = lookup(s);
    if(i >= 0){
        return(i);
    }
    else if(curSymSize >= SYMBOLTABLESIZE)
    {
        return (NOTHING);
    }
    else{
        symtable[curSymSize] = newSymTabNode();
        strncpy(symtable[curSymSize]->id,s,IDLENGTH);
        symtable[curSymSize]->id[IDLENGTH-1] = '\0';
        return(curSymSize++);
    }
}
int lookup(char *s)
{
    extern SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
    extern int curSymSize;
    int i;
    for(i=0;i<curSymSize;i++)
    {
        if(strncmp(s,symtable[i]->id,IDLENGTH) == 0){
            return (i);
        }
    }
    return(-1);
}

// parser.y

%{
#define YYERROR_VERBOSE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>


extern char *yytext;
extern int yylex();
extern void yyerror(char *);
extern int yyparse();
extern FILE *yyin;
/* ------------- some constants --------------------------------------------- */

#define SYMBOLTABLESIZE     50
#define IDLENGTH       15
#define NOTHING        -1
#define INDENTOFFSET    2

#ifdef DEBUG
char *NodeName[] =
{
    "PROGRAM", "BLOCK", "VARS", "EXPR", "N", "A", "R", "STATS", "MSTAT", "STAT",
    "IN", "OUT", "IF_STAT", "LOOP", "ASSIGN", "RO", "IDVAL", "NUMVAL"
};
#endif

enum ParseTreeNodeType
{
    PROGRAM, BLOCK, VARS, EXPR, N, A, R, STATS, MSTAT, STAT,
    IN, OUT,IF_STAT, LOOP, ASSIGN, RO, IDVAL, NUMVAL
};


#define TYPE_CHARACTER "char"
#define TYPE_INTEGER "int"
#define TYPE_REAL "double"

#ifndef TRUE
#define TRUE 1
#endif

#ifndef FALSE
#define FALSE 0
#endif

#ifndef NULL
#define NULL 0
#endif

// definitions for parse tree

struct treeNode {
    int item;
    int nodeID;
    struct treeNode *first;
    struct treeNode *second;
};

typedef struct treeNode TREE_NODE;
typedef TREE_NODE *TREE;

TREE makeNode(int, int, TREE, TREE);

#ifdef DEBUG
void printTree(TREE, int);
#endif

// symbol table definitions.
struct symbolTableNode{
    char id[IDLENGTH];
};
typedef struct symbolTableNode SYMTABNODE;
typedef SYMTABNODE  *SYMTABNODEPTR;
SYMTABNODEPTR symtable[SYMBOLTABLESIZE];
int curSymSize = 0;


%}

%start program

%union {
    char *sVal;
    int iVal;
    TREE tVal;
}


// list of all tokens

%token SEMICOLON GE LE EQUAL COLON RBRACK LBRACK ASSIGNS LPAREN RPAREN COMMENT
%token DOT MOD PLUS MINUS DIV MULT RBRACE LBRACE START MAIN STOP LET COMMA
%token SCANF PRINTF IF ITER THEN FUNC

%left MULT DIV MOD ADD SUB

// tokens defined with values and rule names
%token<iVal> NUMBER ID
//%token<sVal> ID
%type<tVal> program type block vars expr N A R stats mStat stat in out if_stat loop assign RO


%%
program   :     START  vars  MAIN  block  STOP
                {
                    TREE tree;
                    tree = makeNode(NOTHING, PROGRAM, $2,$4);
                    #ifdef DEBUG
                    printTree(tree, 0);
                    #endif
                }
;

block   :       RBRACE vars stats LBRACE
                {
                    $$ = makeNode(NOTHING, BLOCK, $2, $3);
                }
 ;
vars    :       /*empty*/
                {
                $$ = makeNode(NOTHING, VARS,NULL,NULL);
                }
                | LET ID COLON NUMBER vars
                {
                    $$ = makeNode($2, VARS, $5,NULL);
                    printf("id: %d", $2);
                }
 ;
//variable:
//                type  ID{$$ = newNode($2,VARIABLE,$1,NULL,NULL);};
//type:
//                INT {$$ = newNode(INT,TYPE,NULL,NULL,NULL);}
//                | BOOL {$$ = newNode(BOOL,TYPE,NULL,NULL,NULL);}
//                | CHAR {$$ = newNode(CHAR,TYPE,NULL,NULL,NULL);}
//                | STRING{$$ = newNode(STRING,TYPE,NULL,NULL,NULL);};
expr         :       N  DIV  expr
                {
                $$ = makeNode(DIV, EXPR, $1, $3);
                }
                |  N  MULT  expr
                {
                $$ = makeNode(MULT, EXPR, $1, $3);
                }
                |  N
                {
                $$ = makeNode(NOTHING, EXPR, $1,NULL);
                }
;
N              :        A  PLUS  N
                {
                $$ = makeNode(PLUS, N, $1, $3);
                }
                |  A MINUS  N
                {
                $$ = makeNode(MINUS, N, $1, $3);
                }
                |  A
                {
                $$ = makeNode(NOTHING, N, $1,NULL);
                        }
 ;
A               :     MOD  A
                {
                        $$ = makeNode(NOTHING, A, $2,NULL);
                }
                |   R
                {
                $$ = makeNode(NOTHING, A, $1,NULL);
                }
;
R               :      LBRACK  expr RBRACK
                {
                $$ = makeNode(NOTHING, R, $2,NULL);
                }
                | ID
                {
                $$ = makeNode($1, IDVAL, NULL,NULL);
                }
                | NUMBER
                {
                $$ = makeNode($1, NUMVAL, NULL,NULL);
                }
 ;
stats          :       stat    mStat
                {
                        $$ = makeNode(NOTHING, STATS, $1, $2);
                }
 ;
mStat           :  /* empty */
                {
                $$ = makeNode(NOTHING, MSTAT, NULL,NULL);
                }
                |   stat    mStat
                {
                        $$ = makeNode(NOTHING, MSTAT, $1, $2);
                }
 ;
stat:       in  DOT
                {
                        $$ = makeNode(NOTHING, STAT, $1,NULL);
                }
                |  out  DOT
                {
                        $$ = makeNode(NOTHING, STAT, $1,NULL);
                }
                |  block
                {
                $$ = makeNode(NOTHING, STAT, $1,NULL);
                }
                |  if_stat  DOT
                {
                        $$ = makeNode(NOTHING, STAT, $1,NULL);
                }
                |  loop  DOT
                {
                        $$ = makeNode(NOTHING, STAT, $1,NULL);
                }
                |  assign  DOT
                {
                        $$ = makeNode(NOTHING, STAT, $1,NULL);
                }
;
in               :      SCANF LBRACK ID RBRACK
                {
                        $$ = makeNode($3, IN,NULL,NULL);
                }
;
out             :        PRINTF LBRACK  expr  RBRACK
                {
                        $$ = makeNode(NOTHING, OUT,$3,NULL);
                }
;
if_stat         :      IF LBRACK  expr   RO   expr  RBRACK THEN  block
                {
                        $$ = makeNode(NOTHING, IF_STAT, $4, $8);
                }
;
loop           :      ITER LBRACK  expr   RO   expr  RBRACK   block
                {
                $$ = makeNode(NOTHING, LOOP, $4, $7);
                }
;
assign          :      ID  ASSIGNS  expr
                {
                $$ = makeNode($1, ASSIGN, $3,NULL);
                }
;
RO              :      LE
                {
                $$ = makeNode(LE, RO, NULL,NULL);
                }
                | GE
                {
                $$ = makeNode(GE, RO, NULL,NULL);
                }
                |  EQUAL
                {
                $$ = makeNode(EQUAL, RO, NULL,NULL);
                }
                |   COLON COLON
                {
                        $$ = makeNode(EQUAL, RO, NULL,NULL);
                }
 ;

 %%

// node generator
TREE makeNode(int iVal, int nodeID, TREE p1, TREE p2)
{
    TREE t;
    t = (TREE)malloc(sizeof(TREE_NODE));

    t->item = iVal;
    t->nodeID = nodeID;
    t->first = p1;
    t->second = p2;

    //printf("NODE CREATED");
    return(t);
}


// prints the tree with indentation for depth
void printTree(TREE tree, int depth){
    int i;
    if(tree == NULL) return;
    for(i=depth;i;i--)
        printf(" ");
    if(tree->nodeID == NUMBER)
        printf("INT: %d ",tree->item);
    else if(tree->nodeID == IDVAL){
        if(tree->item > 0 && tree->item < SYMBOLTABLESIZE )
            printf("id: %s ",symtable[tree->item]->id);
        else
            printf("unknown id: %d ", tree->item);
    }
    if(tree->item != NOTHING){

        printf("Data: %d ",tree->item);
    }
    // If out of range of the table
    if (tree->nodeID < 0 || tree->nodeID > sizeof(NodeName))
        printf("Unknown ID: %d\n",tree->nodeID);
    else
        printf("%s\n",NodeName[tree->nodeID]);
    printTree(tree->first,depth+2);
    printTree(tree->second,depth+2);

 }




#include "lex.yy.c"


// heres the makefile I use for compilation


frontEnd: lex.yy.c parser.tab.c
    gcc parser.tab.c main.c -o frontEnd -lfl -DDEBUG

parser.tab.c parser.tab.h: parser.y
    bison -d parser.y

lex.yy.c: lex.l
    flex lex.l

clean:
    rm lex.yy.c y.tab.c frontEnd


'''


// data.txt

start
let x : 13
main {
  scanf [ x ] .
  printf [ 34 ] .

} stop[enter image description here][2]


  [1]: https://i.stack.imgur.com/xlNnh.png
  [2]: https://i.stack.imgur.com/HKRtX.png
rici
  • 234,347
  • 28
  • 237
  • 341
  • heres a link to the github repository https://github.com/ShawnTheHuman-zz/new_p2.git – shawn brown Dec 02 '20 at 23:52
  • 1
    My guess is that using a debugger will provide an answer in less time than asking here: Why not inspect the data structures whether they have the values you are expecting? Or at least you could ask a more specific question. – U. Windl Dec 03 '20 at 00:06

1 Answers1

0

I think this has a lot more to do with your AST and symbol table functions than with your parser, and practically nothing to do with bison itself.

For example, your function to print trees won't attempt to print an identifier's name if its symbol table index is 0.

if(tree->item > 0 && tree->item < SYMBOLTABLESIZE)

But the first symbol entered in the table will have index 0. (Perhaps you fixed this between pasting your code and generating the results. You should always check that the code you paste in a question corresponds precisely to the output which you show. But this isn't the only bug in your code; it's just an example.)

As another example, the immediate problem which causes Data: 0 to be printed instead of the symbol name is that your tree printer only prints symbol names for AST nodes of type IDVAL, but you create an AST IN node whose data field contains the variable's symbol table index. So either you need to fix your tree printer so it knows about IN nodes, or you need to change the IN node so that it has a child which is the IDVAL node. (That's probably the best solution in the long run.)

It's always a temptation to blame bison (or whatever unfamiliar tool you're using at the moment) for bugs, instead of considering the possibility that you've introduced bugs in your own support code. To avoid falling into this trap, it's always a good idea to test your library functions separately before using them in a more complicated project. For example, you could write a small test driver that builds a fixed AST tree, prints it, and deletes it. Once that works, and only when that works, you can check to see if your parser can build and print the same tree by parsing an input.

You will find that some simple good software design practices will make this whole process much smoother:

  1. Organise your code into separate component files, each with its own header file. Document the library interfaces (and, if necessary, data structures) using comments in the header file. Briefly describe what each function's purpose is. If you can't find a brief description, it nay be that the function is trying to do too many different things.

    In your parser, the functions and declarations needed to build and use ASTs are scattered between different parts of your lexer and parser files. This makes them much harder to read, debug, maintain and even use.

    No matter what your teacher might tell you, if you find it necessary to #include the generated lexical scanner directly into the parser, then you probably have not found a good way to organise your support functions. You should always aim to make it possible to separately compile the parser and the scanner.

  2. For data structures like your AST node, which use different member variables in different ways depending on an enumerated node type -- which is a model you'll find in other C projects as well, but is particularly common in parsers -- document the precise use of each field for every enumeration value. And make sure that every time you change the way you use the data or add new enumeration values, you fix the documentation accordingly.

    This documentation will make it much easier to verify that your AST is being built correctly. As an additional benefit, you (or others using your code) will have an accurate description of how to interpret the contents of AST nodes, which makes it much easier to write code which analyses the tree.

In short, the way to write, debug and maintain any non-trivial project is not by "messing around" but by being systematic and modular. While it might seem like all of this takes precious time, particularly the documentation, it will almost always save you a lot of time in the long run.

rici
  • 234,347
  • 28
  • 237
  • 341
  • Holy shit that was literally the problem, the printer not reading the first index. Appreciate your response so much. Thank you. – shawn brown Dec 07 '20 at 23:41