1
lexer grammar TransformLexer;

@header { package com.abc.g4.gen; }

channels { DPCOMMENT, ERRORCHANNEL }


@members {
  /**
   * Verify whether current token is a valid decimal token (which contains dot).
   * Returns true if the character that follows the token is not a digit or letter or underscore.
   *
   * For example:
   * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
   * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
   * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
   * For char stream "12.0D 34.E2+0.12 "  12.0D is a valid decimal token because it is folllowed
   * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
   * which is not a digit or letter or underscore.
   */
  public boolean isValidDecimal() {
    int nextChar = _input.LA(1);
    if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
      nextChar == '_') {
      return false;
    } else {
      return true;
    }
  }
}

// SKIP

SPACE:                               [ \t\r\n]+     -> channel(HIDDEN);
SPEC_MYSQL_COMMENT:                  '/*!' .+? '*/' -> channel(DPCOMMENT);
COMMENT_INPUT:                       '/*' .*? '*/'  -> channel(HIDDEN);
LINE_COMMENT:                        (
                                       ('--' [ \t] | '#') ~[\r\n]* ('\r'? '\n' | EOF)
                                       | '--' ('\r'? '\n' | EOF)
                                     ) -> channel(HIDDEN);


STRING
    : DQUOTA_STRING
    ;

EQ  : '==';
NEQ : '<>';
NEQJ: '!=';
LT  : '<';
LTE : '<=';
GT  : '>';
GTE : '>=';

PLUS: '+';
MINUS: '-';
ASTERISK: '*';
SLASH: '/' ;
PERCENT: '%';
RSHIFT: '>>';
LSHIFT: '<<';

IS:    'IS'    | 'is';
NULL:  'NULL'  | 'null';
TRUE:  'TRUE'  | 'true';
FALSE: 'FALSE' | 'false';
LIKE:  'LIKE'  | 'like';

OR:  'OR'  | 'or' | '|';
AND: 'AND' | '&&' | 'and' | '&';
IN:  'IN'  | 'in';
NOT: 'NOT' | '!'  | 'not';

CASE: 'CASE' | 'case';
WHEN: 'WHEN' | 'when';
THEN: 'THEN' | 'then';
ELSE: 'ELSE' | 'else';
END:  'END'  | 'end';

JOIN: '||';

ID:                                  [@]ID_LITERAL+;
// DOUBLE_QUOTE_ID:                  '"' ~'"'+ '"';
REVERSE_QUOTE_ID:                    '`' ~'`'+ '`';
NAME:                                ID_LITERAL+;

fragment ID_LITERAL:                 [a-zA-Z_0-9\u0080-\uFFFF]*?[a-zA-Z_$\u0080-\uFFFF]+?[a-zA-Z_$0-9\u0080-\uFFFF]*;
fragment DQUOTA_STRING:              '"' ( '\\'. | '""' | ~('"'| '\\') )* '"' | '\'' ( ~('\''|'\\') | ('\\' .) )* '\'';
fragment DEC_DIGIT:                  '0' .. '9'+;

// Last tokens must generate Errors

ERROR_RECONGNIGION:                  .    -> channel(ERRORCHANNEL);

NEWLINE:'\r'? '\n' ;


BYTELENGTH_LITERAL
    : DEC_DIGIT+ ('B' | 'K' | 'M' | 'G')
    ;

INTEGER_VALUE
    : [-]*DEC_DIGIT+
    ;

DECIMAL_VALUE
    : DEC_DIGIT+ EXPONENT
    | DECIMAL_DIGITS EXPONENT? {isValidDecimal()}?
    ;

IDENTIFIER
    : (LETTER | DEC_DIGIT | '_')+
    ;

BACKQUOTED_IDENTIFIER
    : '`' ( ~'`' | '``' )* '`'
    ;

COMMA: ',' ;

LEFT_BRACKET
    : '(('
    ;

RGIHT_BRACKET
    : '))'
    ;

LEFT_BRACKET1
    : '{{'
    ;

RGIHT_BRACKET1
    : '}}'
    ;

START
    : '$'
    ;

fragment DECIMAL_DIGITS
    : DEC_DIGIT+ '.' DEC_DIGIT+
    | '.' DEC_DIGIT+
    ;

fragment EXPONENT
    : 'E' [+-]? DEC_DIGIT+
    ;

fragment LETTER
    : [A-Z]
    ;

SIMPLE_COMMENT
    : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
    ;

BRACKETED_COMMENT
    : '/*' .*? '*/' -> channel(HIDDEN)
    ;

WS
    : [ \r\n\t]+ -> channel(HIDDEN)
    ;



parser grammar TransformParser;

options { tokenVocab=TransformLexer; }
@header { package com.abc.g4.gen; }

finalExpression:
    (booleanExpression | caseExpression | resultExpression | function) EOF
    ;

caseExpression
    : CASE whenClause+ (ELSE (elseExpression=resultExpression | caseExpression))? END       #whenExpression
    | constant                                                                              #constantDefault
    ;

values:
 constant               #constantValue
 | ID                   #idValue
 ;

valueCalc:
     LEFT_BRACKET valueCalc RGIHT_BRACKET
    | valueCalc ('*'|'/'|'%')    valueCalc
    | valueCalc ('+'|'-')        valueCalc
    | valueCalc ('<<'|'>>')      valueCalc
    | values
    ;

booleanExpression
    : left=booleanExpression operator=AND                   right=booleanExpression         #logicalBinary1
    | left=booleanExpression operator=OR                    right=booleanExpression         #logicalBinary
    | NOT booleanExpression                                                                 #logicalNot
    | predicated                                                                            #predicatedExpression
    | left=valueCalc         operator=comparisonOperator    right=valueCalc                 #comparison4
    | booleanValue                                                                          #booleanValueTag
    ;

predicated
    : (values | valueCalc) IN  values (values)*
    ;

whenClause:
    WHEN condition=booleanExpression THEN (result=resultExpression | caseExpression);

resultExpression:
    predicated | values | valueCalc;

constant
    : NULL              #nullLiteral
    | STRING            #typeConstructor
    | number            #numericLiteral
    | booleanValue      #booleanLiteral
    | STRING+           #stringLiteral
    ;

comparisonOperator
    : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | IS
    ;

booleanValue
    : TRUE | FALSE
    ;

number
    : MINUS? DECIMAL_VALUE            #decimalLiteral
    | MINUS? INTEGER_VALUE            #integerLiteral
    ;

qualifiedName
    : NAME
    ;

function
    : qualifiedName (params) #functionCall
    ;

param:
    valueCalc | values | function | booleanExpression
    ;

params:
   param (param)*
   ;

I can recognize numbers of multiple characters, but I cannot recognize numbers of single characters

enter image description here

enter image description here

And parentheses cannot change the priority of expression calculation. What's wrong with my code enter image description here

I try to replace '(', ')' with '((', '))' or '{{', '}}'. It can be done

enter image description here

Michael
  • 41,989
  • 11
  • 82
  • 128
hanhe
  • 11
  • 2

1 Answers1

0

Resolved: delete 'ERROR_ RECONGNATION 'Then it's OK

hanhe
  • 11
  • 2