0

all I have develop ANTR4 grammar. During parse the string

Time;25 10 * * *;'faccalc_minus1_cron.out.'yyyyMMdd.HHmm;America/New_York

I have following errors

Invalid chars in expression! Expression: ;' Invalid chars: ;' extraneous input ';' expecting {'', INTEGER, '-', '/', ','} missing ';' at '_' Incorrect timezone format :faccalc_minus1

I don't undestand why, as regex rule contain '_'.

How to fix it?

Regards, Vladimir

lexer grammar FileTriggerLexer;

CRON
:
    'cron'
;

MARKET_CRON
:
    'marketCron'
;

COMBINED
:
    'combined'
;

FILE_FEED
:
    'FileFeed'
;

MANUAL_NOTICE
:
    'ManualNotice'
;

TIME
:
    'Time'
;

MARKET_TIME
:
    'MarketTime'
;


SCHEDULE
:
    'Schedule'
;

PRODUCT
:
    'Product'
;

UCA_CLIENT
:
    'UCAClient'
;

APEX_GSM
:
    'ApexGSM'
;

DELAY
:
    'Delay'
;

CATEGORY
:
    'Category'
;

EXCHANGE
:
    'Exchange'
;

CALENDAR_EXCHANGE
:
    'CalendarExchange'
;

FEED
:
    'Feed'
;

RANGE
:
    'Range'
;

SYNTH
:
    'Synth'
;

TRIGGER
:
    'Trigger'
;

DELAYED_TRIGGER
:
    'DelayedTrigger'
;

INTRA_TRIGGER
:
    'IntraTrigger'
;

CURRENT_TRIGGER
:
    'CurrentTrigger'
;

CALENDAR_FILE_FEED
:
    'CalendarFileFeed'
;

PREVIOUS
:
    'Previous'
;

LATE_DELAY
:
    'LateDelay'
;

BUILD_ARCHIVE
:
    'BuildArchive'
;

COMPRESS
:
    'Compress'
;


LATE_TIME
:
    'LateTime'
;

CALENDAR_CATEGORY
:
    'CalendarCategory'
;

APEX_GPM
:
    'ApexGPM'
;

PORTFOLIO_NOTICE
:
    'PortfolioNotice'
;

FixedTimeOfDay: 'FixedTimeOfDay';

SEMICOLON
:
    ';'
;

ASTERISK
:
    '*'
;

LBRACKET
:
    '('
;

RBRACKET
:
    ')'
;

PERCENT
:
    '%'
;

INTEGER
:
    [0-9]+
;

DASH
:
    '-'
;

DOUBLE_QUOTE
:
    '"'
;

QUOTE
:
    '\''
;

SLASH
:
    '/'
;

DOT
:
    '.'
;

COMMA
:
    ','
;

UNDERSCORE
:
    '_'
;

EQUAL
:
    '='
;

MORE_THAN
:
    '>'
;

LESS
:
    '<'
;

ID
:
    [a-zA-Z] [a-zA-Z0-9]*
;

WS
:
    [ \t\r\n]+ -> skip
; 


/**
 * Define Fied Trigger valdiator grammar
 */
grammar FileTriggerValidator;

options
   {
    tokenVocab = FileTriggerLexer;
}

r
:
    (
        schedule
        | file_feed
        | time_feed
        | market_time_feed
        | manual_notice
        | portfolio_notice
        | not_checked
    )+
;

not_checked
:
    (
        PRODUCT
        | UCA_CLIENT
        | APEX_GSM
        | APEX_GPM
        | DELAY
        | CATEGORY
        | CALENDAR_CATEGORY
        | EXCHANGE
        | CALENDAR_EXCHANGE
        | FEED
        | RANGE
        | SYNTH
        | TRIGGER
        | DELAYED_TRIGGER
        | INTRA_TRIGGER
        | CURRENT_TRIGGER
        | CALENDAR_FILE_FEED
        | PREVIOUS
        | LATE_DELAY
        | LATE_TIME
        | COMPRESS
        | BUILD_ARCHIVE
    )
    (
        SEMICOLON anyList
    )?
;

anyList
:
    anyElement
    (
        SEMICOLON anyElement
    )*
;

anyElement
:
    cron
    | file_name
    | with_step_value
    | source_file
    | timezone
    | regEx
;

portfolio_notice
:
    PORTFOLIO_NOTICE SEMICOLON regEx
;

manual_notice
:
    MANUAL_NOTICE SEMICOLON file_name SEMICOLON timezone
;

time_feed
:
    TIME SEMICOLON cron_part
    (
        timezone?
    ) SEMICOLON file_name SEMICOLON timezone
;

market_time_feed
:
    MARKET_TIME SEMICOLON cron_part timezone SEMICOLON file_name SEMICOLON
    timezone
    (
        SEMICOLON UNDERSCORE? INTEGER
    )*
;

file_feed
:
    file_feed_name SEMICOLON source_file SEMICOLON source_host SEMICOLON
    source_host SEMICOLON regEx SEMICOLON regEx
    (
        SEMICOLON source_host
    )*
;

regEx
:
    (
        ID
        | DOT
        | ASTERISK
        | INTEGER
        | PERCENT
        | UNDERSCORE
        | DASH
        | LESS
        | MORE_THAN
        | EQUAL
        | SLASH
        | LBRACKET
        | RBRACKET
        | DOUBLE_QUOTE
        | QUOTE
        | COMMA
    )+
;

source_host
:
    ID
    (
        DASH ID
    )*
;

file_feed_name
:
    FILE_FEED
;

source_file
:
    (
        ID
        | DASH
        | UNDERSCORE
    )+
;

schedule
:
    SCHEDULE SEMICOLON schedule_defining SEMICOLON file_name SEMICOLON timezone
    (
        SEMICOLON DASH? INTEGER
    )*
;

schedule_defining
:
    cron
    | market_cron
    | combined_cron
;

cron
:
    CRON LBRACKET DOUBLE_QUOTE cron_part timezone DOUBLE_QUOTE RBRACKET
;

market_cron
:
    MARKET_CRON LBRACKET DOUBLE_QUOTE cron_part timezone DOUBLE_QUOTE COMMA
    DOUBLE_QUOTE ID DOUBLE_QUOTE RBRACKET
;

combined_cron
:
    COMBINED LBRACKET cron_list_element
    (
        COMMA cron_list_element
    )* RBRACKET
;

mic_defining
:
    ID
;

file_name
:
    regEx
;

cron_list_element
:
    cron
    | market_cron
;
//

schedule_defined_string
:
    cron
;
// 

cron_part
:
    minutes hours days_of_month month week_days
;
//

minutes
:
    with_step_value
;

hours
:
    with_step_value
;
//

int_list
:
    INTEGER
    | interval
    (
        COMMA INTEGER
        | interval
    )*
;

interval
:
    INTEGER DASH INTEGER
;
//

days_of_month
:
    with_step_value
;
//

month
:
    with_step_value
;
//

week_days
:
    with_step_value
;
//

timezone
:
    timezone_part
    (
        SLASH timezone_part
    )?
;
//

timezone_part
:
    ID
    (
        UNDERSCORE ID
    )?
;
//

with_step_value
:
    (
        INTEGER
        | COMMA
        | SLASH
        | ASTERISK
        | DASH
    )+
;

step
:
    SLASH int_list
;

1 Answers1

1

To analyze this kind of problem, dump the token stream to see what the lexer is actually doing. To directly dump the tokens, see this answer. AntlrDT, for example, also provides a graphical analysis of the corresponding parse-tree (I am the author of AntlrDT).

FileTriggerParser - ParseTree

From this, easy to see that the first error occurs in the with_step_value rule: does not allow for a trailing semicolon.

Second error is in the timezone_part rule: does not allow for repeated ID UNDERSCORE occurrences.

Community
  • 1
  • 1
GRosenberg
  • 5,843
  • 2
  • 19
  • 23