0

The Problem

I have the below parser that uses Pidgin. I want to add support for raw string literals. So I could have a string with 3 double quotes like this Title == """lamb is great on a "gee-ro" not a "gy-ro" sandwich""" or use more than 3 if i want like Title == """"lamb is great on a "gee-ro" not a "gy-ro" sandwich"""". it should still support regular double quoted strings like Title == "special" as it does now. The raw string literals are to help escape double quotes if needed.

this is a similar parser i have in another project that uses sprache for reference if it helps:

    private static Parser<string> DoubleQuoteParser
        => Parse.Char('"').Then(_ => Parse.AnyChar.Except(Parse.Char('"')).Many().Text().Then(innerValue => Parse.Char('"').Return(innerValue)));

What I've tried

I've tried lots of iterations on something like this:

    private static readonly Parser<char, int> CountLeadingQuotes =
        Char('"').AtLeastOnce().Select(x => x.Count());
    
    private static readonly Parser<char, string> QuotedStringParser =
        CountLeadingQuotes.Then(numQuotes =>
            Char('"').Repeat(numQuotes)
                .Then(AnyCharExcept(Char('"').Repeat(numQuotes)).ManyString())
                .Before(Char('"').Repeat(numQuotes)));

Where i'm essentially trying to say.

  1. Count the quotes
  2. Then you can have any chars you want except for that same amount of quotes
  3. Once you hit that same count of quotes, stop

I think the quote counter works, but i can't figure out the syntax for the between. Something like what I have originally works fine, but that's because i only need to check for one char. Now that I have a string, i can't figure out the syntax.

The Code

here is the full parser

using System.Globalization;
using System.Linq.Expressions;
using System.Reflection;
using System.Reflection.Metadata;
using Pidgin;
using QueryKit.Operators;
using static Pidgin.Parser;

public static class AstFilterParser
{
    internal abstract class Node { }
    internal class Identifier : Node { public string Name { get; set; } }
    internal class NumericConstant : Node { public string Value { get; set; } } // convert to proper number type later
    internal class StringConstant : Node { public string Value { get; set; } }
    internal class GuidConstant : Node { public Guid Value { get; set; } }
    internal class BooleanConstant : Node { public bool Value { get; set; } }
    internal class DateWithTimeConstant : Node { public string Value { get; set; } } // could be datetime or datetimeoffset
    internal class DateOnlyConstant : Node { public DateOnly Value { get; set; } }
    internal class TimeOnlyConstant : Node { public TimeOnly Value { get; set; } }

    
    internal class NullConstant : Node { }
    internal class BinaryExpression : Node
    {
        public Node Left { get; set; }
        public ComparisonOperator Operation { get; set; }
        public Node Right { get; set; }
    }
    internal class LogicalExpression : Node
    {
        public Node Left { get; set; }
        public LogicalOperator Operation { get; set; }
        public Node Right { get; set; }
    }

    private static readonly Parser<char, GuidConstant> GuidConstantParser =
        Char('"').Then(AnyCharExcept('"').ManyString()).Before(Char('"'))
            .Select(str => Guid.TryParse(str, out var guid) 
                ? new GuidConstant { Value = guid } 
                : throw new Exception($"Invalid Guid format: {str}"));

    private static readonly Parser<char, NullConstant> NullConstantParser =
        String("null").ThenReturn(new NullConstant());

    private static readonly Parser<char, BooleanConstant> BooleanConstantParser =
        String("true").Select(_ => new BooleanConstant { Value = true })
            .Or(String("false").Select(_ => new BooleanConstant { Value = false }));

    // Identifier Parser: parses strings of letters
    private static readonly Parser<char, Identifier> IdentifierParser =
        Letter.ManyString().Select(x => new Identifier { Name = x });

    // NumericConstant Parser: parses integer constants
    private static readonly Parser<char, NumericConstant> NumericConstantParser =
        Num.Before(Char('.').Optional()).Then(Num.Optional(), (integerPart, fractionPart) =>
            $"{integerPart}{(fractionPart.HasValue ? "." + fractionPart.Value : "")}").Select(str => new NumericConstant { Value = str });

    private static string ParseAndFormat(string str, string[] formats, CultureInfo culture, DateTimeStyles styles)
    {
        foreach (var format in formats)
        {
            if (DateTime.TryParseExact(str, format, culture, styles, out var date))
                return date.ToString(format, culture);
        }
        throw new Exception($"Invalid DateTime format: {str}");
    }

    // Parses any string enclosed in double quotes
    private static readonly Parser<char, string> QuotedStringParser =
        Char('"').Then(AnyCharExcept('"').ManyString()).Before(Char('"'));

    // Constant Parser: parses null, integer, guid, string, or date constants
    private static readonly Parser<char, Node> ConstantParser =
        NullConstantParser.Cast<Node>()
            .Or(NumericConstantParser.Cast<Node>())
            .Or(BooleanConstantParser.Cast<Node>())
            .Or(QuotedStringParser.Select(str => 
            {
                if (Guid.TryParse(str, out var guid))
                    return (Node)new GuidConstant { Value = guid };
                if (DateTime.TryParseExact(str, "yyyy-MM-ddTHH:mm:ssZ", CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out var dateOne))
                    return (Node)new DateWithTimeConstant { Value = dateOne.ToString("yyyy-MM-ddTHH:mm:ssZ") };
                if (DateTime.TryParseExact(str, "yyyy-MM-ddTHH:mm:ss", CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out var dateTwo))
                    return (Node)new DateWithTimeConstant { Value = dateTwo.ToString("yyyy-MM-ddTHH:mm:ss") };
                if (DateTimeOffset.TryParseExact(str, "yyyy-MM-ddTHH:mm:ssZ", CultureInfo.InvariantCulture, DateTimeStyles.None, out var dateOffsetOne))
                    return (Node)new DateWithTimeConstant { Value = dateOffsetOne.ToString("yyyy-MM-ddTHH:mm:ssZ") };
                if (DateTimeOffset.TryParseExact(str, "yyyy-MM-ddTHH:mm:sszzz", CultureInfo.InvariantCulture, DateTimeStyles.None, out var dateOffsetTwo))
                    return (Node)new DateWithTimeConstant { Value = dateOffsetTwo.ToString("yyyy-MM-ddTHH:mm:sszzz") };
                if (DateOnly.TryParseExact(str, "yyyy-MM-dd", CultureInfo.InvariantCulture, DateTimeStyles.None, out var dateOnly))
                    return (Node)new DateOnlyConstant { Value = dateOnly };
                if (TimeOnly.TryParseExact(str, "HH:mm:ss", CultureInfo.InvariantCulture, DateTimeStyles.None, out var timeOnly))
                    return (Node)new TimeOnlyConstant { Value = timeOnly };
                return new StringConstant { Value = str };
            }));

    // ComparisonOperator Parser: parses the three binary operations
    private static readonly Parser<char, ComparisonOperator> ComparisonOperatorParser =
    OneOf(
        Try(String(ComparisonOperator.LessThanOperator().Operator())
            .Then(Char(ComparisonOperator.CaseSensitiveAppendix)
            .Optional(), (op, asterisk) => ComparisonOperator.LessThanOperator(asterisk.HasValue))),
        Try(String(ComparisonOperator.GreaterThanOperator().Operator())
            .Then(Char(ComparisonOperator.CaseSensitiveAppendix)
            .Optional(), (op, asterisk) => ComparisonOperator.GreaterThanOperator(asterisk.HasValue))),
        Try(String(ComparisonOperator.NotEqualsOperator().Operator())
            .Then(Char(ComparisonOperator.CaseSensitiveAppendix)
            .Optional(), (op, asterisk) => ComparisonOperator.NotEqualsOperator(asterisk.HasValue))),
        Try(String(ComparisonOperator.EqualsOperator().Operator())
            .Then(Char(ComparisonOperator.CaseSensitiveAppendix)
            .Optional(), (op, asterisk) => ComparisonOperator.EqualsOperator(asterisk.HasValue))),
        Try(String(ComparisonOperator.GreaterThanOrEqualOperator().Operator())
            .Then(Char(ComparisonOperator.CaseSensitiveAppendix)
            .Optional(), (op, asterisk) => ComparisonOperator.GreaterThanOrEqualOperator(asterisk.HasValue))),
        Try(String(ComparisonOperator.LessThanOrEqualOperator().Operator())
            .Then(Char(ComparisonOperator.CaseSensitiveAppendix)
            .Optional(), (op, asterisk) => ComparisonOperator.LessThanOrEqualOperator(asterisk.HasValue))),
        Try(String(ComparisonOperator.ContainsOperator().Operator())
            .Then(Char(ComparisonOperator.CaseSensitiveAppendix)
            .Optional(), (op, asterisk) => ComparisonOperator.ContainsOperator(asterisk.HasValue))),
        Try(String(ComparisonOperator.StartsWithOperator().Operator())
            .Then(Char(ComparisonOperator.CaseSensitiveAppendix)
            .Optional(), (op, asterisk) => ComparisonOperator.StartsWithOperator(asterisk.HasValue))),
        Try(String(ComparisonOperator.EndsWithOperator().Operator())
            .Then(Char(ComparisonOperator.CaseSensitiveAppendix)
            .Optional(), (op, asterisk) => ComparisonOperator.EndsWithOperator(asterisk.HasValue))),
        Try(String(ComparisonOperator.NotContainsOperator().Operator())
            .Then(Char(ComparisonOperator.CaseSensitiveAppendix)
            .Optional(), (op, asterisk) => ComparisonOperator.NotContainsOperator(asterisk.HasValue))),
        Try(String(ComparisonOperator.NotStartsWithOperator().Operator())
            .Then(Char(ComparisonOperator.CaseSensitiveAppendix)
            .Optional(), (op, asterisk) => ComparisonOperator.NotStartsWithOperator(asterisk.HasValue))),
        Try(String(ComparisonOperator.NotEndsWithOperator().Operator())
            .Then(Char(ComparisonOperator.CaseSensitiveAppendix)
            .Optional(), (op, asterisk) => ComparisonOperator.NotEndsWithOperator(asterisk.HasValue))),
        Try(String(ComparisonOperator.InOperator().Operator())
            .Then(Char(ComparisonOperator.CaseSensitiveAppendix)
            .Optional(), (op, asterisk) => ComparisonOperator.InOperator(asterisk.HasValue)))
    );


    // Parser for the entire expression
    private static readonly Parser<char, BinaryExpression> ExpressionParser =
        IdentifierParser.Before(SkipWhitespaces)
            .Then(ComparisonOperatorParser.Before(SkipWhitespaces), (id, op) => (id, op))
            .Then(ConstantParser, (tuple, constant) => new BinaryExpression
            {
                Left = tuple.id,
                Operation = tuple.op,
                Right = constant
            });

    private static readonly Parser<char, LogicalOperator> LogicalOperatorParser =
        String(LogicalOperator.AndOperator.Operator()).ThenReturn(LogicalOperator.AndOperator)
            .Or(String(LogicalOperator.OrOperator.Operator()).ThenReturn(LogicalOperator.OrOperator));

    private static readonly Parser<char, Node> ParseExpressionWithParentheses =
        Char('(').Then(Rec(() => LogicalExpressionParser).Before(Char(')'))).Or(ExpressionParser.Cast<Node>());

    private static readonly Parser<char, Node> LogicalExpressionParser =
        ParseExpressionWithParentheses.Before(SkipWhitespaces)
            .Then(LogicalOperatorParser.Before(SkipWhitespaces).Optional(), (expr, op) => (expr, op))
            .Then(Rec(() => LogicalExpressionParser).Optional(), (tuple, right) =>
            {
                if (tuple.op.HasValue && right.HasValue)
                {
                    return new LogicalExpression
                    {
                        Left = tuple.expr,
                        Operation = tuple.op.Value,
                        Right = right.Value
                    } as Node;
                }

                return tuple.expr;
            });

    internal static Node Parse(string input) => LogicalExpressionParser.ParseOrThrow(input);
}

these tests should pass after the update


    
    [Fact]
    public void escaped_double_quote_with_more_than_3_double_quotes()
    { 
        var input = """""""""Title == """"lamb is great on a "gee-ro" not a "gy-ro" sandwich"""" """"""""";
        var node = AstFilterParser.Parse(input);
        
        node.Should().BeOfType<AstFilterParser.BinaryExpression>()
            .Which.Left.Should().BeOfType<AstFilterParser.Identifier>()
            .Which.Name.Should().Be("Title");

        node.Should().BeOfType<AstFilterParser.BinaryExpression>()
            .Which.Right.Should().BeOfType<AstFilterParser.StringConstant>()
            .Which.Value.Should().Be("lamb is great on a \"gee-ro\" not a \"gy-ro\" sandwich");
    }
    
    [Fact]
    public void escaped_double_quote_with_3_double_quotes()
    { 
        var input = """""""""Title == """lamb is great on a "gee-ro" not a "gy-ro" sandwich""" """"""""";
        var node = AstFilterParser.Parse(input);
        
        node.Should().BeOfType<AstFilterParser.BinaryExpression>()
            .Which.Left.Should().BeOfType<AstFilterParser.Identifier>()
            .Which.Name.Should().Be("Title");

        node.Should().BeOfType<AstFilterParser.BinaryExpression>()
            .Which.Right.Should().BeOfType<AstFilterParser.StringConstant>()
            .Which.Value.Should().Be("lamb is great on a \"gee-ro\" not a \"gy-ro\" sandwich");
    }
Paul DeVito
  • 1,542
  • 3
  • 15
  • 38

0 Answers0