Is this kind of what you were going for?
use nom::{bytes::complete::tag, IResult};
#[derive(Debug)]
pub enum TokenType {
Foo,
Bar,
}
#[derive(Debug)]
pub struct Token {
pub token_type: TokenType,
pub start_offset: usize,
pub end_offset: usize,
}
#[derive(Debug)]
pub struct LexInput<'a> {
source: &'a str,
location: usize,
}
impl<'a> LexInput<'a> {
fn new(source: &'a str, location: usize) -> Self {
Self { source, location }
}
}
impl Token {
fn new(token_type: TokenType, start_offset: usize, end_offset: usize) -> Self {
Self {
token_type,
start_offset,
end_offset,
}
}
}
fn token<'a>(
parser: impl Fn(&'a str) -> IResult<&'a str, &str>,
token_type: TokenType,
) -> impl FnOnce(LexInput<'a>) -> IResult<LexInput<'a>, Token> {
move |input: LexInput| {
let start_offset = input.location;
let (remaining_source, output) =
parser(input.source).map_err(|e| e.map_input(|_| input))?;
let end_offset = start_offset + output.len();
let token = Token::new(token_type, start_offset, end_offset);
let remaining = LexInput::new(remaining_source, end_offset);
Ok((remaining, token))
}
}
fn main() {
let source = "|foo".to_string();
let (remaining, token) = token(tag("|"), TokenType::Bar)(LexInput::new(&source, 0)).unwrap();
println!("remaining: {:?}", remaining);
println!("token: {:?}", token);
}
remaining: LexInput { source: "foo", location: 1 }
token: Token { token_type: Bar, start_offset: 0, end_offset: 1 }
Your main mistakes were lifetime related. Everywhere you don't annotate a lifetime, a default lifetime is taken, which does not fulfill 'a
.
fn token<'a>(
// The result can't be `'a` if it refers to the input `&str`, the input also has to be `'a`.
parser: impl Fn(&str) -> IResult<&'a str, &str>,
token_type: TokenType,
// Same here, `&LexInput` needs to be `'a`. But as it has a lifetime attached, just use that one instead: `LexInput<'a>`.
) -> impl Fn(&LexInput) -> IResult<LexInput<'a>, Token> {
// Same here, although here the anonymous lifetime is sufficient to figure it out
move |input: &LexInput| {
let start_offset = input.location;
// Here, an error conversion is missing, because the error carries the
// input and therefore can't be just directly raised; parser has `&str`
// as input, while `token` has `LexInput` as input. Luckily, the
//`map_input` method exists.
let (remaining_source, output) = parser(input.source)?;
let end_offset = start_offset + output.len();
let token = Token::new(token_type, start_offset, end_offset);
let remaining = LexInput::new(remaining_source, end_offset);
Ok((remaining, token))
}
}
Further remarks
There is already the nom_locate
crate that does exactly what you are attempting to do here.
The big advantage of the nom_locate
crate is that the LocatedSpan
type can directly be used by nom
's parsers. No need to convert back and forth between your type and &str
. This makes the code a lot simpler.
use nom::{bytes::complete::tag, IResult};
use nom_locate::LocatedSpan;
type Span<'a> = LocatedSpan<&'a str>;
#[derive(Debug)]
pub enum TokenType {
Foo,
Bar,
}
#[derive(Debug)]
pub struct Token {
pub token_type: TokenType,
pub start_offset: usize,
pub end_offset: usize,
}
impl Token {
fn new(token_type: TokenType, start_offset: usize, end_offset: usize) -> Self {
Self {
token_type,
start_offset,
end_offset,
}
}
}
fn token<'a>(
parser: impl Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>,
token_type: TokenType,
) -> impl FnOnce(Span<'a>) -> IResult<Span<'a>, Token> {
move |input: Span| {
let start_offset = input.location_offset();
let (remaining, _) = parser(input)?;
let end_offset = remaining.location_offset();
let token = Token::new(token_type, start_offset, end_offset);
Ok((remaining, token))
}
}
fn main() {
let source = "|foo".to_string();
let (remaining, token) = token(tag("|"), TokenType::Bar)(Span::new(&source)).unwrap();
println!("remaining: {:?}", remaining);
println!("token: {:?}", token);
}
remaining: LocatedSpan { offset: 1, line: 1, fragment: "foo", extra: () }
token: Token { token_type: Bar, start_offset: 0, end_offset: 1 }
With the help of nom::combinator::map
and a little bit of restructuring, you can reduce it down even further:
use nom::{bytes::complete::tag, combinator::map, IResult};
use nom_locate::LocatedSpan;
type Span<'a> = LocatedSpan<&'a str>;
#[derive(Debug, Clone)]
pub enum TokenType {
Foo,
Bar,
}
#[derive(Debug)]
pub struct Token {
pub token_type: TokenType,
pub start_offset: usize,
pub end_offset: usize,
}
impl Token {
fn new(token_type: TokenType, start_offset: usize, end_offset: usize) -> Self {
Self {
token_type,
start_offset,
end_offset,
}
}
}
fn token<'a>(
parser: impl Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>,
token_type: TokenType,
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Token> {
map(parser, move |matched| {
Token::new(
token_type.clone(),
matched.location_offset(),
matched.location_offset() + matched.len(),
)
})
}
fn main() {
let source = "|foo".to_string();
let (remaining, token) = token(tag("|"), TokenType::Bar)(Span::new(&source)).unwrap();
println!("remaining: {:?}", remaining);
println!("token: {:?}", token);
}
remaining: LocatedSpan { offset: 1, line: 1, fragment: "foo", extra: () }
token: Token { token_type: Bar, start_offset: 0, end_offset: 1 }