The Problem
I have the below parser that uses Pidgin. I want to add support for raw string literals. So I could have a string with 3 double quotes like this Title == """lamb is great on a "gee-ro" not a "gy-ro" sandwich"""
or use more than 3 if i want like Title == """"lamb is great on a "gee-ro" not a "gy-ro" sandwich""""
. it should still support regular double quoted strings like Title == "special"
as it does now. The raw string literals are to help escape double quotes if needed.
this is a similar parser i have in another project that uses sprache for reference if it helps:
private static Parser<string> DoubleQuoteParser
=> Parse.Char('"').Then(_ => Parse.AnyChar.Except(Parse.Char('"')).Many().Text().Then(innerValue => Parse.Char('"').Return(innerValue)));
What I've tried
I've tried lots of iterations on something like this:
private static readonly Parser<char, int> CountLeadingQuotes =
Char('"').AtLeastOnce().Select(x => x.Count());
private static readonly Parser<char, string> QuotedStringParser =
CountLeadingQuotes.Then(numQuotes =>
Char('"').Repeat(numQuotes)
.Then(AnyCharExcept(Char('"').Repeat(numQuotes)).ManyString())
.Before(Char('"').Repeat(numQuotes)));
Where i'm essentially trying to say.
- Count the quotes
- Then you can have any chars you want except for that same amount of quotes
- Once you hit that same count of quotes, stop
I think the quote counter works, but i can't figure out the syntax for the between. Something like what I have originally works fine, but that's because i only need to check for one char. Now that I have a string, i can't figure out the syntax.
The Code
here is the full parser
using System.Globalization;
using System.Linq.Expressions;
using System.Reflection;
using System.Reflection.Metadata;
using Pidgin;
using QueryKit.Operators;
using static Pidgin.Parser;
public static class AstFilterParser
{
internal abstract class Node { }
internal class Identifier : Node { public string Name { get; set; } }
internal class NumericConstant : Node { public string Value { get; set; } } // convert to proper number type later
internal class StringConstant : Node { public string Value { get; set; } }
internal class GuidConstant : Node { public Guid Value { get; set; } }
internal class BooleanConstant : Node { public bool Value { get; set; } }
internal class DateWithTimeConstant : Node { public string Value { get; set; } } // could be datetime or datetimeoffset
internal class DateOnlyConstant : Node { public DateOnly Value { get; set; } }
internal class TimeOnlyConstant : Node { public TimeOnly Value { get; set; } }
internal class NullConstant : Node { }
internal class BinaryExpression : Node
{
public Node Left { get; set; }
public ComparisonOperator Operation { get; set; }
public Node Right { get; set; }
}
internal class LogicalExpression : Node
{
public Node Left { get; set; }
public LogicalOperator Operation { get; set; }
public Node Right { get; set; }
}
private static readonly Parser<char, GuidConstant> GuidConstantParser =
Char('"').Then(AnyCharExcept('"').ManyString()).Before(Char('"'))
.Select(str => Guid.TryParse(str, out var guid)
? new GuidConstant { Value = guid }
: throw new Exception($"Invalid Guid format: {str}"));
private static readonly Parser<char, NullConstant> NullConstantParser =
String("null").ThenReturn(new NullConstant());
private static readonly Parser<char, BooleanConstant> BooleanConstantParser =
String("true").Select(_ => new BooleanConstant { Value = true })
.Or(String("false").Select(_ => new BooleanConstant { Value = false }));
// Identifier Parser: parses strings of letters
private static readonly Parser<char, Identifier> IdentifierParser =
Letter.ManyString().Select(x => new Identifier { Name = x });
// NumericConstant Parser: parses integer constants
private static readonly Parser<char, NumericConstant> NumericConstantParser =
Num.Before(Char('.').Optional()).Then(Num.Optional(), (integerPart, fractionPart) =>
$"{integerPart}{(fractionPart.HasValue ? "." + fractionPart.Value : "")}").Select(str => new NumericConstant { Value = str });
private static string ParseAndFormat(string str, string[] formats, CultureInfo culture, DateTimeStyles styles)
{
foreach (var format in formats)
{
if (DateTime.TryParseExact(str, format, culture, styles, out var date))
return date.ToString(format, culture);
}
throw new Exception($"Invalid DateTime format: {str}");
}
// Parses any string enclosed in double quotes
private static readonly Parser<char, string> QuotedStringParser =
Char('"').Then(AnyCharExcept('"').ManyString()).Before(Char('"'));
// Constant Parser: parses null, integer, guid, string, or date constants
private static readonly Parser<char, Node> ConstantParser =
NullConstantParser.Cast<Node>()
.Or(NumericConstantParser.Cast<Node>())
.Or(BooleanConstantParser.Cast<Node>())
.Or(QuotedStringParser.Select(str =>
{
if (Guid.TryParse(str, out var guid))
return (Node)new GuidConstant { Value = guid };
if (DateTime.TryParseExact(str, "yyyy-MM-ddTHH:mm:ssZ", CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out var dateOne))
return (Node)new DateWithTimeConstant { Value = dateOne.ToString("yyyy-MM-ddTHH:mm:ssZ") };
if (DateTime.TryParseExact(str, "yyyy-MM-ddTHH:mm:ss", CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out var dateTwo))
return (Node)new DateWithTimeConstant { Value = dateTwo.ToString("yyyy-MM-ddTHH:mm:ss") };
if (DateTimeOffset.TryParseExact(str, "yyyy-MM-ddTHH:mm:ssZ", CultureInfo.InvariantCulture, DateTimeStyles.None, out var dateOffsetOne))
return (Node)new DateWithTimeConstant { Value = dateOffsetOne.ToString("yyyy-MM-ddTHH:mm:ssZ") };
if (DateTimeOffset.TryParseExact(str, "yyyy-MM-ddTHH:mm:sszzz", CultureInfo.InvariantCulture, DateTimeStyles.None, out var dateOffsetTwo))
return (Node)new DateWithTimeConstant { Value = dateOffsetTwo.ToString("yyyy-MM-ddTHH:mm:sszzz") };
if (DateOnly.TryParseExact(str, "yyyy-MM-dd", CultureInfo.InvariantCulture, DateTimeStyles.None, out var dateOnly))
return (Node)new DateOnlyConstant { Value = dateOnly };
if (TimeOnly.TryParseExact(str, "HH:mm:ss", CultureInfo.InvariantCulture, DateTimeStyles.None, out var timeOnly))
return (Node)new TimeOnlyConstant { Value = timeOnly };
return new StringConstant { Value = str };
}));
// ComparisonOperator Parser: parses the three binary operations
private static readonly Parser<char, ComparisonOperator> ComparisonOperatorParser =
OneOf(
Try(String(ComparisonOperator.LessThanOperator().Operator())
.Then(Char(ComparisonOperator.CaseSensitiveAppendix)
.Optional(), (op, asterisk) => ComparisonOperator.LessThanOperator(asterisk.HasValue))),
Try(String(ComparisonOperator.GreaterThanOperator().Operator())
.Then(Char(ComparisonOperator.CaseSensitiveAppendix)
.Optional(), (op, asterisk) => ComparisonOperator.GreaterThanOperator(asterisk.HasValue))),
Try(String(ComparisonOperator.NotEqualsOperator().Operator())
.Then(Char(ComparisonOperator.CaseSensitiveAppendix)
.Optional(), (op, asterisk) => ComparisonOperator.NotEqualsOperator(asterisk.HasValue))),
Try(String(ComparisonOperator.EqualsOperator().Operator())
.Then(Char(ComparisonOperator.CaseSensitiveAppendix)
.Optional(), (op, asterisk) => ComparisonOperator.EqualsOperator(asterisk.HasValue))),
Try(String(ComparisonOperator.GreaterThanOrEqualOperator().Operator())
.Then(Char(ComparisonOperator.CaseSensitiveAppendix)
.Optional(), (op, asterisk) => ComparisonOperator.GreaterThanOrEqualOperator(asterisk.HasValue))),
Try(String(ComparisonOperator.LessThanOrEqualOperator().Operator())
.Then(Char(ComparisonOperator.CaseSensitiveAppendix)
.Optional(), (op, asterisk) => ComparisonOperator.LessThanOrEqualOperator(asterisk.HasValue))),
Try(String(ComparisonOperator.ContainsOperator().Operator())
.Then(Char(ComparisonOperator.CaseSensitiveAppendix)
.Optional(), (op, asterisk) => ComparisonOperator.ContainsOperator(asterisk.HasValue))),
Try(String(ComparisonOperator.StartsWithOperator().Operator())
.Then(Char(ComparisonOperator.CaseSensitiveAppendix)
.Optional(), (op, asterisk) => ComparisonOperator.StartsWithOperator(asterisk.HasValue))),
Try(String(ComparisonOperator.EndsWithOperator().Operator())
.Then(Char(ComparisonOperator.CaseSensitiveAppendix)
.Optional(), (op, asterisk) => ComparisonOperator.EndsWithOperator(asterisk.HasValue))),
Try(String(ComparisonOperator.NotContainsOperator().Operator())
.Then(Char(ComparisonOperator.CaseSensitiveAppendix)
.Optional(), (op, asterisk) => ComparisonOperator.NotContainsOperator(asterisk.HasValue))),
Try(String(ComparisonOperator.NotStartsWithOperator().Operator())
.Then(Char(ComparisonOperator.CaseSensitiveAppendix)
.Optional(), (op, asterisk) => ComparisonOperator.NotStartsWithOperator(asterisk.HasValue))),
Try(String(ComparisonOperator.NotEndsWithOperator().Operator())
.Then(Char(ComparisonOperator.CaseSensitiveAppendix)
.Optional(), (op, asterisk) => ComparisonOperator.NotEndsWithOperator(asterisk.HasValue))),
Try(String(ComparisonOperator.InOperator().Operator())
.Then(Char(ComparisonOperator.CaseSensitiveAppendix)
.Optional(), (op, asterisk) => ComparisonOperator.InOperator(asterisk.HasValue)))
);
// Parser for the entire expression
private static readonly Parser<char, BinaryExpression> ExpressionParser =
IdentifierParser.Before(SkipWhitespaces)
.Then(ComparisonOperatorParser.Before(SkipWhitespaces), (id, op) => (id, op))
.Then(ConstantParser, (tuple, constant) => new BinaryExpression
{
Left = tuple.id,
Operation = tuple.op,
Right = constant
});
private static readonly Parser<char, LogicalOperator> LogicalOperatorParser =
String(LogicalOperator.AndOperator.Operator()).ThenReturn(LogicalOperator.AndOperator)
.Or(String(LogicalOperator.OrOperator.Operator()).ThenReturn(LogicalOperator.OrOperator));
private static readonly Parser<char, Node> ParseExpressionWithParentheses =
Char('(').Then(Rec(() => LogicalExpressionParser).Before(Char(')'))).Or(ExpressionParser.Cast<Node>());
private static readonly Parser<char, Node> LogicalExpressionParser =
ParseExpressionWithParentheses.Before(SkipWhitespaces)
.Then(LogicalOperatorParser.Before(SkipWhitespaces).Optional(), (expr, op) => (expr, op))
.Then(Rec(() => LogicalExpressionParser).Optional(), (tuple, right) =>
{
if (tuple.op.HasValue && right.HasValue)
{
return new LogicalExpression
{
Left = tuple.expr,
Operation = tuple.op.Value,
Right = right.Value
} as Node;
}
return tuple.expr;
});
internal static Node Parse(string input) => LogicalExpressionParser.ParseOrThrow(input);
}
these tests should pass after the update
[Fact]
public void escaped_double_quote_with_more_than_3_double_quotes()
{
var input = """""""""Title == """"lamb is great on a "gee-ro" not a "gy-ro" sandwich"""" """"""""";
var node = AstFilterParser.Parse(input);
node.Should().BeOfType<AstFilterParser.BinaryExpression>()
.Which.Left.Should().BeOfType<AstFilterParser.Identifier>()
.Which.Name.Should().Be("Title");
node.Should().BeOfType<AstFilterParser.BinaryExpression>()
.Which.Right.Should().BeOfType<AstFilterParser.StringConstant>()
.Which.Value.Should().Be("lamb is great on a \"gee-ro\" not a \"gy-ro\" sandwich");
}
[Fact]
public void escaped_double_quote_with_3_double_quotes()
{
var input = """""""""Title == """lamb is great on a "gee-ro" not a "gy-ro" sandwich""" """"""""";
var node = AstFilterParser.Parse(input);
node.Should().BeOfType<AstFilterParser.BinaryExpression>()
.Which.Left.Should().BeOfType<AstFilterParser.Identifier>()
.Which.Name.Should().Be("Title");
node.Should().BeOfType<AstFilterParser.BinaryExpression>()
.Which.Right.Should().BeOfType<AstFilterParser.StringConstant>()
.Which.Value.Should().Be("lamb is great on a \"gee-ro\" not a \"gy-ro\" sandwich");
}