0

My grammar needs to have custom types defined by the user, but the problem is that when I add tvariable ::= ENTERO | ID it generates the following:

[cup] Warning : *** Shift/Reduce conflict found in state #21
      [cup]   between epsilon ::= (*) 
      [cup]   and     tvariable ::= (*) ID 
      [cup]   under symbol ID
      [cup]   Resolved in favor of shifting.
      [cup] Warning : *** Shift/Reduce conflict found in state #24
      [cup]   between epsilon ::= (*) 
      [cup]   and     tvariable ::= (*) ID 
      [cup]   under symbol ID
      [cup]   Resolved in favor of shifting.
      [cup]   Checking for non-reduced productions...
      [cup] Error : *** More conflicts encountered than expected -- parser generation aborted
package compiler.syntax;

// Declaracion de importaciones 
//(No modificar las proporcionadas. Se pueden agregar mas)

import java_cup.runtime.Symbol;
import java.util.*;

import es.uned.lsi.compiler.lexical.*;
import es.uned.lsi.compiler.code.*;
import es.uned.lsi.compiler.intermediate.*;
import es.uned.lsi.compiler.semantic.*;
import es.uned.lsi.compiler.semantic.symbol.*;
import es.uned.lsi.compiler.semantic.type.*;
import es.uned.lsi.compiler.syntax.*;

import compiler.CompilerContext;
import compiler.lexical.*;
import compiler.syntax.nonTerminal.*;

import compiler.semantic.symbol.*;
import compiler.semantic.type.*;
import compiler.intermediate.*;
import compiler.code.*;

// Declaracion del codigo de usuario

action code {:
    
    SyntaxErrorManager   syntaxErrorManager   = CompilerContext.getSyntaxErrorManager();
    SemanticErrorManager semanticErrorManager = CompilerContext.getSemanticErrorManager ();
    ScopeManagerIF       scopeManager         = CompilerContext.getScopeManager ();
    FinalCodeFactoryIF   finalCodeFactory     = CompilerContext.getFinalCodeFactory ();

:}  

parser code {:
    SyntaxErrorManager syntaxErrorManager = CompilerContext.getSyntaxErrorManager();
    
    public void syntax_error(Symbol symbol)
    { 
        Token token = (Token) symbol.value;
        syntaxErrorManager.syntaxError ("Error sintactico", token);     
    }
        
    public void unrecovered_syntax_error(java_cup.runtime.Symbol symbol)
    {   
        Token token = (Token) symbol.value;
        syntaxErrorManager.syntaxFatalError ("Error fatal", token);
    }
:}

// Declaracion de terminales (Ejemplo)

terminal Token PLUS;
terminal Token MULT;
terminal Token MENOR;
terminal Token IGUAL;
terminal Token AND;
terminal Token NOT;
terminal Token AUTOINCREMENTO;
terminal Token ASIGNACION;
terminal Token ASIGNACION_CON_SUMA;
terminal Token ABRIR_PARENTESIS;
terminal Token CERRAR_PARENTESIS;
terminal Token ABRIR_BRACKET;
terminal Token CERRAR_BRACKET;
terminal Token COMA;
terminal Token PUNTO_COMA;
terminal Token DOS_PUNTOS;
terminal Token ABRIR_LLAVE;
terminal Token CERRAR_LLAVE;
terminal Token CASO;
terminal Token CONSTANTE;
terminal Token CORTE;
terminal Token ENTERO;
terminal Token ESCRIBE;
terminal Token ESCRIBE_ENT;
terminal Token ALTERNATIVAS;
terminal Token MIENTRAS;
terminal Token PORDEFECTO;
terminal Token PRINCIPAL;
terminal Token DEVUELVE;
terminal Token SI;
terminal Token SINO;
terminal Token TIPO;
terminal Token VACIO;
terminal Token DIGITOS;
terminal Token LIT_INTEGER;
terminal Token ID;
//terminal Token CONST;
terminal Token CADENA_CARACTERES;
terminal Token CADENA;
// ...


// Declaracion de no terminales
// no modificar los propuestos

non terminal            program;
non terminal Axiom      axiom;
non terminal            epsilon;
non terminal            declaraciones;


non terminal            declaracionConstantes;
non terminal            constantes;
non terminal            constante;
non terminal            fconstante;

non terminal            declaracionVariables;
non terminal            variables;
non terminal            tdvariable;
non terminal            ftdvariable;
non terminal            tvariable;
non terminal            dvariable;
non terminal            Fid;
non terminal            asigvariable;
non terminal            fasigvariable;

non terminal            vector;

non terminal            expresion;
non terminal            expresion2;
non terminal            expresion3;
non terminal            expresion4;
non terminal            expresion5;
non terminal            expresion6;
non terminal            expAutoincremento;

non terminal            sentencias;
non terminal            sentencia;
non terminal            sentenciaDevuelve;
non terminal            sentenciaSalida;
non terminal            sentenciaAsignacion;
non terminal            sentenciaSuma;
non terminal            sentenciaAutoincremento;
non terminal            cadenaSalida;
non terminal            sentenciaSalidaEnt;
non terminal            cadenaSalidaEnt;
non terminal            tipoReferencia;

non terminal            funcionPrincipal;
// ...


// Declaracion de relaciones de precedencia
precedence left         PLUS, MULT, MENOR, AND, NOT, AUTOINCREMENTO, IGUAL, COMA, ABRIR_BRACKET, 
                        CERRAR_BRACKET, ABRIR_PARENTESIS, CERRAR_PARENTESIS;


// Declaraci�n de reglas de produccion


start with program;

program ::= 
  {: 
        syntaxErrorManager.syntaxInfo ("Starting parsing..."); 
   :}
  axiom:ax
  {:        
        
        syntaxErrorManager.syntaxInfo ("Parsing process ended.");
   :};

axiom ::= funcionPrincipal;
   
epsilon ::= ;

declaraciones ::= declaracionConstantes;

// DECLARACION DE CONSTANTES
declaracionConstantes ::= constantes | epsilon  {: syntaxErrorManager.syntaxInfo ("Reconocida una declaración de CONSTANTE"); :};
constantes ::= constante fconstante;
fconstante ::= constantes | epsilon;
constante ::= CONSTANTE ID DIGITOS PUNTO_COMA;

// DECLARACION DE VARIABLES
declaracionVariables ::= variables | epsilon;
variables ::= tdvariable ftdvariable;
ftdvariable ::= variables | epsilon;
tdvariable ::= tvariable dvariable;
tvariable ::= ENTERO | ID;
dvariable ::= ID Fid;
Fid ::= asigvariable fasigvariable;
fasigvariable ::= PUNTO_COMA | COMA dvariable;
asigvariable ::= ASIGNACION DIGITOS | epsilon;

// DECLARACION DE FUNCIONES
// Funcion principal
funcionPrincipal ::= declaraciones VACIO PRINCIPAL ABRIR_PARENTESIS CERRAR_PARENTESIS ABRIR_LLAVE declaracionVariables sentencias CERRAR_LLAVE;

// EXPRESIONES
expresion ::= DIGITOS | ID | expresion2;
expresion2 ::= expresion PLUS expresion | expresion3;
expresion3 ::= expresion IGUAL expresion | expresion4;
expresion4 ::= expresion MULT expresion | expresion5;
expresion5 ::= expAutoincremento | expresion6;
expresion6 ::= ABRIR_PARENTESIS expresion CERRAR_PARENTESIS;

// Expresión vectorial
vector ::= ID ABRIR_BRACKET expresion CERRAR_BRACKET;

// Expresión autoincremento
expAutoincremento ::= tipoReferencia AUTOINCREMENTO;


// SENTENCIAS
sentencias ::= sentencia sentencias | epsilon;
sentencia ::= sentenciaDevuelve
            | sentenciaSalidaEnt
            | sentenciaSalida
            | sentenciaAsignacion
            | sentenciaSuma
            | sentenciaAutoincremento;

// SENTENCIA DEVUELVE
sentenciaDevuelve ::= DEVUELVE PUNTO_COMA;

// SENTENCIA SALIDA
sentenciaSalida ::= ESCRIBE ABRIR_PARENTESIS cadenaSalida CERRAR_PARENTESIS PUNTO_COMA {: syntaxErrorManager.syntaxInfo ("Reconocida una sentencia de SALIDA"); :};
cadenaSalida ::= CADENA | epsilon;
sentenciaSalidaEnt ::= ESCRIBE_ENT ABRIR_PARENTESIS cadenaSalidaEnt CERRAR_PARENTESIS PUNTO_COMA;
cadenaSalidaEnt ::= expresion | epsilon;

// SENTENCIA ASIGNACION
sentenciaAsignacion ::= tipoReferencia ASIGNACION expresion PUNTO_COMA;
tipoReferencia ::= vector | ID;

// SENTENCIA ASIGNACION CON SUMA
sentenciaSuma ::= tipoReferencia ASIGNACION_CON_SUMA expresion PUNTO_COMA;

// SENTENCIA AUTOINCREMENTO
sentenciaAutoincremento ::= expAutoincremento PUNTO_COMA;

I've tried everything that came into mind, but it doesn't seem to work. If I remove the ID from tvariable ::= ENTERO | ID and just define tvariable ::= ENTERO it works just fine, but I need the user defined types.

Aarón
  • 1

1 Answers1

0

You need to get rid of those uses of epsilon. All of them. They are not contributing anything, and they are a huge source of conflicts.

You seem to be basing your grammar style on something vaguely similar to a top-down grammar, for no particular reason since Java CUP generates bottom-up parsers; trying to eliminate left-recursion is counter-productive. If you just wrote your grammar in a natural style, it would be easier to read, easier to maintain, more efficient and less prone to parsing conflicts. In addition, since you have not attempted to left-factor, your grammar cannot be used for top-down parsing either.

The usual way of writing a grammar for a list is as follows:

x_list ::= x
         | x_list x;

That assumes that x is not nullable --cannot derive ε-- and that the list is required. If the list is not required in some context, you should write two rules for that context, one with the list and one without. So you might end up with something like this:

functionBody ::= declarationList statementList
               | statementList
               | declarationList;

rather than making the two lists nullable.

That's not always necessary, but it's safer. You can get away with nullable lists when the repeated elements are distinguished by their first token, but that is often not the case. In your grammar, for example, both sentencia and declaracionVariable can start with ID, which is precisely the problem that leads to your shift-reduce conflict.

To see that, let's rewrite the above using optional lists:

declaracionListaOpcional ::= declaracionLista | epsilon;
sentenciaListaOpcional ::= sentenciaLista | epsilon;
cuerpoDeFuncion ::= declaracionListaOpcional sentenciaListaOpcional;

Now, what happens when the parser sees ID? It cannot know whether that's the start of a declaration or a statement. But it has to make that decision immediately, in order to know whether to create an empty declaration list. If it doesn't create the empty list immediately, it won't be able to do so later, when it sees the token after the ID, because the rule is that reductions must be made exactly at the end of the rule, not retroactively.

A top-down parser wouldn't be able to handle this conflict. You'd have to left factor in order to create something which starts with ID and encompasses both possibilities. Fortunately, that's not necessary because bottom-up parsers can defer the decision until the end if the rule. So when we write (as above):

cuerpoDeFuncion ::= declarationLista sentenciaLista
               | sentenciaLista
               | declarationLista;

the bottom-up parser reads the ID and keeps all of the rules active. It doesn't have to decide right away, because no rule has ended.

rici
  • 234,347
  • 28
  • 237
  • 341