0

The last token of each line from the "example.y" file is written with an extra "\n". We also found out that, if we get the string length of the buffer, when it contains the element of the last token of each line, that length is the size of that token from the array tokensa plus 1.

    #include <stdio.h>
    #include <string.h>
    #include <stdlib.h>
    #include <string.h>
    #include <unistd.h>
    #include <fcntl.h>

    char *tokensa[100];
    int i = 0;

    char *termsa[100];
    int j = 0;
%}

%option stack
%x tokens termos termo

Tokens    ^\%token
Token     [a-zA-Z]+
Termos    [ \t]*[a-zA-Z]+[ \t]+\:
Lixo      [ \t]*[a-zA-Z]*[ \t]*['’]
Termo     [^'’]+
Barras    ^[ \t]+\|

%%

{Tokens}                            { BEGIN tokens ; }
<tokens>[ \t]*\n                    { BEGIN INITIAL ; }
<tokens>[^ \n\t]+                   { tokensa[i] = strdup(yytext); i++; }

{Termos}                            { yy_push_state(termos) ; }

<termos>{
    {Lixo}                          { yy_push_state(termo) ; }
    ([ \t]+\{.*\n)|\n               { yy_pop_state() ; }
}  

<termo>{
    ['’][ \t]?                      { yy_pop_state() ; }
    {Termo}                         { termsa[j] = strdup(yytext); j++; }
}

{Barras}                            { yy_push_state(termos) ; }

<*>(.|\n)                           { ; }

%%

int yywrap()
{ return(1); }

int main()
{   
    char buffer[1024];
    size_t nbytes;
    ssize_t bytes_written;
    int fd;

    yylex();

    fd = open("lexgen.txt", O_WRONLY | O_CREAT | O_TRUNC, 0640);

    write(fd, "%%\n", 3);
    for(int k = 0; k < i; k++){
        strcpy(buffer, tokensa[k]);
        nbytes = strlen(buffer);
        write(fd, "FIXME {return ", 14);
        bytes_written = write(fd, buffer, nbytes);
        write(fd, ";}\n", 3);
        nbytes = 0;
    }
    buffer[0] = '\0';
    write(fd, "[", 1);
    for(int l = 0; l < j; l++){
        strcpy(buffer, termsa[l]);
        nbytes = strlen(buffer);
        write(fd, buffer, nbytes);
    }
    write(fd, "] {return yytext[0];}\n", 22);
    write(fd, "%%", 2);
    close(fd);
    return 0;
}

Expected output:

%%
FIXME {return LISTA;}
FIXME {return NUMERO;}
FIXME {return PALAVRA;}
FIXME {return TESTE;}
[.,] {return yytext[0];}
%%

Obtained output:

%%
FIXME {return LISTA;}
FIXME {return NUMERO;}
FIXME {return PALAVRA
;}
FIXME {return TESTE
;}
[.,] {return yytext[0];}
%%

example.y:

%{
#include <stdio.h>
#include <string.h>

int conta;
%}

%token LISTA NUMERO     PALAVRA
%token TESTE
%%

Lista : LISTA elementos '.'     { printf("comprimento = %d\n", conta) ; }
      ;

elementos : elemento            { conta = 1 ; }
      | elementos ',' elemento  { conta++ ; }
      ;

elemento : NUMERO   | PALAVRA
     ;

%%

#include "lex.yy.c"

int yyerror(char *s){
    printf("Frase inválida: %s\n", s);
}

int main(){
        printf("inicio do parsing\n");
    yyparse();
    return 0;
        printf("fim do parsing\n");

}
  • Works as expected when I run it. Best guess -- your input "example.y" has CR (\r) characters in it before the newlines, which are ending up in your tokens. Try adding \r to all your whitespace patterns. – Chris Dodd Nov 30 '20 at 19:20
  • We tested what you suggested and it works!! Thank you so much. – Fernando Lobo Nov 30 '20 at 19:28

0 Answers0