I would do it with an action associated with finishing the string token. In that action you could then iterate over through ts to te and add the logic for your string interpolation and emit the tokens in the action instead.
Something like this, which probably only cover the most basic forms might help you get started:
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
enum token { STR, STR_I };
void emit(enum token tok, const char* start, const char* end) {
if (start == end)
return;
switch(tok) {
case STR:
printf("STR(\"%.*s\") ", (int)(end - start), start); break;
case STR_I:
printf("STR_I(%.*s) ", (int)(end - start), start); break;
}
}
%%{
machine interpolation;
write data;
action interpolate {
// this is the data input without ""
const char* data_start = ts + 1;
const char* data_end = te - 1;
// Use this to walk through the token to find interpolation points
const char *tok_start = data_start;
const char *tok_end = data_start;
for (;tok_end <= data_end; tok_end++) {
// Does it contain #{ ?
if (strncmp(tok_end,"#{", 2) == 0) {
emit(STR, tok_start, tok_end);
tok_start = tok_end + 2;
// fast-forward to } or end, whichever comes first
while (tok_end < data_end && *tok_end != '}') {
++tok_end;
}
if (tok_end == data_end) {
// we're at the end
emit(STR, tok_start - 2, data_end);
tok_start = tok_end;
break;
} else {
// found matching }
emit(STR_I, tok_start, tok_end);
tok_start = tok_end + 1;
}
}
}
if (tok_start != data_end) {
emit(STR, tok_start, data_end);
}
}
not_dquote_or_escape = [^"\\];
escaped_something = /\\./;
string_constant = '"' ( not_dquote_or_escape | escaped_something )* '"';
main := |*
string_constant => interpolate;
*|;
}%%
int main(int argc, char **argv) {
//char text[] = "\"hello #{first_name} #{last_name}, how are you?\"";
//char text[] = "\"#{first_name} is my name.\"";
//char text[] = "\"#{first_name}#{last_name}\"";
//char text[] = "\"#{ without finishing brace.\"";
//char text[] = "\" no starting brace }.\"";
char *p = &text[0];
char *pe = (text + strlen(text));
char *eof = pe;
int act, cs;
const char* ts;
const char* te;
%% write init;
%% write exec;
return 0;
}
I am pretty sure you can also do it with jumping to different states using fgoto and such but I've only ever used Ragel for simple scanners so can't really help you there.