6

I'm trying to build a lexical analyser with FLEX on windows. I'm getting always an error:

"undefined reference to `yylval'"

I declared yylval as a extern type up where all definitions are made as follows:

  %option noyywrap
    %{
        #include<stdio.h>
        #include<stdlib.h>
        #include "tokens.h"
        int nline = 1;
        int size_token_array = 100;
        int number_of_tokens_in_array = 0;
        int inc_token_array = 50;
        token *token_store ;
        extern yylval;

    %}
    delim [ \t]
    delim_nl [\n]
    ws {delim}+
    nl {delim_nl}+
    letter [a-z]
    digit [0-9]
    id {letter}(letter.digit)*
    int_num (0|([+-]?([1-9]{digit}*)))
    real_num [+-]?{digit}+(\.{digit}+)
    rel_op ">"|"<"|"<="|">="|"=="|"!="
    binary_ar_op "+"|"-"|"*"|"/"|"="
    task_id {letter}(letter+digit)*
    signal_id {letter}(letter+digit)*

    %%
    "parbegin" {create_and_store_token(TOKEN_PARBEGIN,yytext,nline); return 1;}
    "parend" {create_and_store_token(TOKEN_PAREND,yytext,nline); return 1;}
    "task" {create_and_store_token(TOKEN_TASK,yytext,nline); return 1;} 
    "{" {create_and_store_token('{',yytext,nline); return 1;} 
    "}" {create_and_store_token('}',yytext,nline); return 1;}
    "begin" {create_and_store_token(TOKEN_BEGIN,yytext,nline); return 1;}  
    "end" {create_and_store_token(TOKEN_END,yytext,nline); return 1;} 
    "integer" {create_and_store_token(TOKEN_INTEGER,yytext,nline); return 1;} 
    "real" {create_and_store_token(TOKEN_REAL,yytext,nline); return 1;}
    "||"  {create_and_store_token(TOKEN_PARALLEL,yytext,nline); return 1;}
    ";" {create_and_store_token(';',yytext,nline); return 1;} 
    "," {create_and_store_token(',',yytext,nline); return 1;} 
    "do" {create_and_store_token(TOKEN_DO,yytext,nline); return 1;} 
    "until" {create_and_store_token(TOKEN_UNTIL,yytext,nline); return 1;} 
    "od" {create_and_store_token(TOKEN_OD,yytext,nline); return 1;}  
    "send" {create_and_store_token(TOKEN_SEND,yytext,nline); return 1;} 
    "accept" {create_and_store_token(TOKEN_ACCEPT,yytext,nline); return 1;}  
    "(" {create_and_store_token('(',yytext,nline); return 1;} 
    ")" {create_and_store_token(')',yytext,nline); return 1;} 
    "<" {create_and_store_token(LT,yytext,nline); yylval=rel_op; return 1;} 
    ">" {create_and_store_token(GT,yytext,nline); yylval=rel_op; return 1;}  
    "<=" {create_and_store_token(LE,yytext,nline); yylval=rel_op; return 1;}  
    ">=" {create_and_store_token(GE,yytext,nline); yylval=rel_op; return 1;}  
    "==" {create_and_store_token(EQ,yytext,nline); yylval=rel_op; return 1;}  
    "!=" {create_and_store_token(NE,yytext,nline); yylval=rel_op; return 1;} 
    "*" {create_and_store_token('*',yytext,nline); yylval=binary_ar_op; return 1;}  
    "/" {create_and_store_token('/',yytext,nline); yylval=binary_ar_op; return 1;}  
    "+" {create_and_store_token('+',yytext,nline); yylval=binary_ar_op; return 1;}  
    "-" {create_and_store_token('-',yytext,nline); yylval=binary_ar_op; return 1;} 
    "=" {create_and_store_token('=',yytext,nline); yylval=binary_ar_op; return 1;} 
    {ws} ;
    {nl} nline++;
    id {create_and_store_token(TOKEN_ID,yytext,nline); return 1;} 
    int_num {create_and_store_token(TOKEN_INT_NUM,yytext,nline); return 1;}  
    real_num {create_and_store_token(TOKEN_REAL_NUM,yytext,nline); return 1;}  
    binary_ar_op {create_and_store_token(TOKEN_AR_OP,yytext,nline); return 1;}  
    "task_id" {create_and_store_token(TOKEN_TASK_ID,yytext,nline); return 1;}  
    "signal_id" {create_and_store_token(TOKEN_SIGNAL_ID,yytext,nline); return 1;}  

    %%
    int main()
    {
        token_store = (token*)calloc(size_token_array,sizeof(token));
        free(token_store);
        return 0;

    }

    void create_and_store_token(int token_type,char* token_lexeme,int line_number){

        token new_token;
        new_token.ivalue = token_type;
        new_token.lexema = token_lexeme;
        new_token.line_number = line_number;

        if(size_token_array == (number_of_tokens_in_array-10)){

          token_store = (token*)realloc(token_store,inc_token_array*sizeof(token));
          size_token_array+=inc_token_array;
          number_of_tokens_in_array++;
          token_store[number_of_tokens_in_array]= new_token;

        }
        else{
          token_store[number_of_tokens_in_array]= new_token;
          number_of_tokens_in_array++;

        }
    }

    int nextToken(){
       return yylex();
    }

    void backToken(){
        token_store[number_of_tokens_in_array].ivalue = 0;
        token_store[number_of_tokens_in_array].lexema = "";
        token_store[number_of_tokens_in_array].line_number = 0;
        number_of_tokens_in_array--;
    }

Anybody have an idea how should I solve this?

Brian Tompsett - 汤莱恩
  • 5,753
  • 72
  • 57
  • 129
ofer gertz
  • 89
  • 1
  • 6

1 Answers1

10

extern yylval; means that yylval is defined somewhere else. So you have to do that.

Usually it is defined in the yacc/bison generated parser, so the name can be resolved when you link the scanner and the parser. If you aren't using bison/yacc, you will have to define yylval yourself. (If you actually need it. Your code does not give much of a hint what you need it for.)

By the way, your code has many other problems. One particularly glaring one is that you cannot use the value of the pointer yytext after the scanner moves on to the next token. If you need a persistent copy of the string pointed to by yytext, you need to make your own copy (and free the memory allocated for the copy when it is no longer needed.)

Also many of your regular expressions are incorrect. Macro uses ("definitions") must be surrounded​ by braces, so

id {create_and_store_token(TOKEN_ID,yytext,nline); return 1;} 

won't match what you expect; it will only match the two-character the sequence id. Changing that to {id} is a start, but the definition of id is also incorrect.

Personally, I avoid macros​ since they add no value to the code, IMO; they often create confusion. For example, your definition of letter only includes lower-case letters, something which would not at all be obvious to someone reading your code. It is much better to use Posix character classes, which don't require definitions and whose meanings are unambiguous: [[: alpha:]] for letters, [[:lower:]] for lower-case letters, [[:alnum:]] for letters or digits, etc.

rici
  • 234,347
  • 28
  • 237
  • 341