0

This is a variation of Parsing single-quoted string with escaped quotes with Nom 5 and Parse string with escaped single quotes. I want to parse strings like '1 \' 2 \ 3 \\ 4' (a raw sequence of characters) as "1 \\' 2 \\ 3 \\\\ 4" (a Rust string), so I'm not concerned with any escaping other than the possibility of having \' inside the strings. Attempts using code from the linked questions:

use nom::{
  branch::alt,
  bytes::complete::{escaped, tag},
  character::complete::none_of,
  combinator::recognize,
  multi::{many0, separated_list0},
  sequence::delimited,
  IResult,
};

fn parse_quoted_1(input: &str) -> IResult<&str, &str> {
  delimited(
    tag("'"),
    alt((escaped(none_of("\\\'"), '\\', tag("'")), tag(""))),
    tag("'"),
  )(input)
}

fn parse_quoted_2(input: &str) -> IResult<&str, &str> {
  delimited(
    tag("'"),
    recognize(separated_list0(tag("\\'"), many0(none_of("'")))),
    tag("'"),
  )(input)
}

fn main() {
  println!("{:?}", parse_quoted_1(r#"'1'"#));
  println!("{:?}", parse_quoted_2(r#"'1'"#));
  println!("{:?}", parse_quoted_1(r#"'1 \' 2'"#));
  println!("{:?}", parse_quoted_2(r#"'1 \' 2'"#));
  println!("{:?}", parse_quoted_1(r#"'1 \' 2 \ 3'"#));
  println!("{:?}", parse_quoted_2(r#"'1 \' 2 \ 3'"#));
  println!("{:?}", parse_quoted_1(r#"'1 \' 2 \ 3 \\ 4'"#));
  println!("{:?}", parse_quoted_2(r#"'1 \' 2 \ 3 \\ 4'"#));
}

/*
Ok(("", "1"))
Ok(("", "1"))
Ok(("", "1 \\' 2"))
Ok((" 2'", "1 \\"))
Err(Error(Error { input: "1 \\' 2 \\ 3'", code: Tag }))
Ok((" 2 \\ 3'", "1 \\"))
Err(Error(Error { input: "1 \\' 2 \\ 3 \\\\ 4'", code: Tag }))
Ok((" 2 \\ 3 \\\\ 4'", "1 \\"))
*/

Only first 3 cases work as intended.

user5365198
  • 103
  • 7

2 Answers2

0

A non-nice/imperative solution:

use nom::{bytes::complete::take, character::complete::char, sequence::delimited, IResult};

fn parse_quoted(input: &str) -> IResult<&str, &str> {
  fn escaped(input: &str) -> IResult<&str, &str> {
    let mut pc = 0 as char;
    let mut n = 0;
    for (i, c) in input.chars().enumerate() {
      if c == '\'' && pc != '\\' {
        break;
      }
      pc = c;
      n = i + 1;
    }
    take(n)(input)
  }
  delimited(char('\''), escaped, char('\''))(input)
}

fn main() {
  println!("{:?}", parse_quoted(r#"'' ..."#));
  println!("{:?}", parse_quoted(r#"'1' ..."#));
  println!("{:?}", parse_quoted(r#"'1 \' 2' ..."#));
  println!("{:?}", parse_quoted(r#"'1 \' 2 \ 3' ..."#));
  println!("{:?}", parse_quoted(r#"'1 \' 2 \ 3 \\ 4' ..."#));
}

/*
Ok((" ...", ""))
Ok((" ...", "1"))
Ok((" ...", "1 \\' 2"))
Ok((" ...", "1 \\' 2 \\ 3"))
Ok((" ...", "1 \\' 2 \\ 3 \\\\ 4"))
*/

To allow for '...\\' we can similarly store more previous characters:

    let mut pc = 0 as char;
    let mut ppc = 0 as char;
    let mut pppc = 0 as char;
    let mut n = 0;
    for (i, c) in input.chars().enumerate() {
      if (c == '\'' && pc != '\\') || (c == '\'' && pc == '\\' && ppc == '\\' && pppc != '\\') {
        break;
      }
      pppc = ppc;
      ppc = pc;
      pc = c;
      n = i + 1;
    }
user5365198
  • 103
  • 7
0

Here is my way to parse quoted string.

It returns Cow type with reference to original string when there is not strings that require escaping or copy of the string without escaping slashes.

You might need to adjust is_gdtext and is_quited_char to your needs.

// is valid character that do not require escaping
fn is_qdtext(chr: char) -> bool {
    match chr {
        '\t' => true,
        ' ' => true,
        '!' => true,
        '#'..='[' => true,
        ']'..='~' => true,
        _ => {
            let x = chr as u8;
            x >= 0x80
        }
    }
}

// check if character can be escaped
fn is_quoted_char(chr: char) -> bool {
    match chr {
        ' '..='~' => true,
        '\t' => true,
        _ => {
            let x = chr as u8;
            x >= 0x80
        }
    }
}

/// parse single escaped character
fn parse_quoted_pair(data: &str) -> IResult<&str, char> {
    let (data, (_, chr)) = pair(tag("\\"), satisfy(is_quoted_char))(data)?;
    Ok((data, chr))
}

// parse content of quoted string
fn parse_quoted_content(data: &str) -> IResult<&str, Cow<'_, str>> {
    let (mut data, content) = data.split_at_position_complete(|item| !is_qdtext(item))?;

    if data.chars().next() == Some('\\') {
        // we need to escape some characters
        let mut content = content.to_string();
        while data.chars().next() == Some('\\') {
            // unescape next char
            let (next_data, chr) = parse_quoted_pair(data)?;
            content.push(chr);
            data = next_data;

            // parse next plain text chunk
            let (next_data, extra_content) =
                data.split_at_position_complete(|item| !is_qdtext(item))?;
            content.push_str(extra_content);
            data = next_data;
        }
        Ok((data, Cow::Owned(content)))
    } else {
        // quick version, there is no characters to escape
        Ok((data, Cow::Borrowed(content)))
    }
}

fn parse_quoted_string(data: &str) -> IResult<&str, Cow<'_, str>> {
    let (data, (_, content, _)) = tuple((tag("'"), parse_quoted_content, tag("'")))(data)?;

    Ok((data, content))
}
Szpadel
  • 61
  • 3