Partial preprocessing is a nifty idea and exactly what you are looking for. The cppp utility by Brian Raiter only handles #ifdef
and #ifndef
lines, it does not perform macro substitution as you require.
Here is a utility I just wrote for this purpose: you can define any number of identifiers on the command line with -Didentifier
(expands to 1
) or -Didentifier=
(expands to nothing), -Didentifier=str
or simply identifier=str
.
It will substitute identifiers only, preserving comments and strings, but some corner cases are not handled, albeit should not be a problem:
- no support for non ASCII identifiers.
stdio
in #include <stdio.h>
will be seen as an identifier that can be substituted.
- some numbers will be parsed as 3 tokens:
1.0E+1
.
- identifiers will not be substituted if they are split on multiple lines with escaped newlines
- defining
include
, ifdef
and other preprocessing directives will cause them to be substituted, unlike the C preprocessor
- macro argument names may be substituted whereas the C preprocessor would preserve them.
pcpp.c:
/* Partial preprocessing by chqrlie */
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct define_t {
struct define_t *next;
size_t len;
const char *tok;
const char *def;
} define_t;
static void *xmalloc(size_t size) {
void *p = malloc(size);
if (!p) {
fprintf(stderr, "pcpp: cannot allocate memory\n");
exit(1);
}
return p;
}
static void add_define(define_t **defsp, const char *str) {
define_t *dp = xmalloc(sizeof(*dp));
size_t len = strcspn(str, "=");
const char *def = str[len] ? str + len + 1 : "1";
dp->len = len;
dp->tok = str;
dp->def = def;
dp->next = *defsp;
*defsp = dp;
}
struct context {
FILE *fp;
int lineno;
size_t size, pos;
char *buf;
};
static int append_char(struct context *ctx, int ch) {
if (ctx->pos == ctx->size) {
size_t new_size = ctx->size + ctx->size / 2 + 32;
char *new_buf = xmalloc(new_size);
memcpy(new_buf, ctx->buf, ctx->size);
free(ctx->buf);
ctx->buf = new_buf;
ctx->size = new_size;
}
ctx->buf[ctx->pos++] = (char)ch;
return ch;
}
static void flush_context(struct context *ctx, FILE *ft) {
if (ctx->pos) {
fwrite(ctx->buf, ctx->pos, 1, ft);
ctx->pos = 0;
}
}
/* read the next byte from the C source file, handing escaped newlines */
static int getcpp(struct context *ctx) {
int ch;
while ((ch = getc(ctx->fp)) == '\\') {
append_char(ctx, ch);
if ((ch = getc(ctx->fp)) != '\n') {
ungetc(ch, ctx->fp);
return '\\';
}
append_char(ctx, ch);
ctx->lineno += 1;
}
if (ch != EOF)
append_char(ctx, ch);
if (ch == '\n')
ctx->lineno += 1;
return ch;
}
static void ungetcpp(struct context *ctx, int ch) {
if (ch != EOF && ctx->pos > 0) {
ungetc(ch, ctx->fp);
ctx->pos--;
}
}
static int preprocess(const char *filename, FILE *fp, const char *outname, define_t *defs) {
FILE *ft = stdout;
int ch;
struct context ctx[1] = {{ fp, 1, 0, 0, NULL }};
if (outname) {
if ((ft = fopen(outname, "w")) == NULL) {
fprintf(stderr, "pcpp: cannot open output file %s: %s\n",
outname, strerror(errno));
return 1;
}
}
while ((ch = getcpp(ctx)) != EOF) {
int startline = ctx->lineno;
if (ch == '/') {
if ((ch = getcpp(ctx)) == '/') {
/* single-line comment */
while ((ch = getcpp(ctx)) != EOF && ch != '\n')
continue;
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated single line comment\n",
filename, startline);
//break;
}
//putc('\n', ft); /* replace comment with newline */
flush_context(ctx, ft);
continue;
}
if (ch == '*') {
/* multi-line comment */
int lastc = 0;
while ((ch = getcpp(ctx)) != EOF) {
if (ch == '/' && lastc == '*') {
break;
}
lastc = ch;
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated comment\n",
filename, startline);
//break;
}
//putc(' ', ft); /* replace comment with single space */
flush_context(ctx, ft);
continue;
}
if (ch != '=') {
ungetcpp(ctx, ch);
}
flush_context(ctx, ft);
continue;
}
if (ch == '\'' || ch == '"') {
int sep = ch;
const char *const_type = (ch == '"') ? "string" : "character";
while ((ch = getcpp(ctx)) != EOF) {
if (ch == sep)
break;;
if (ch == '\\') {
if ((ch = getcpp(ctx)) == EOF)
break;
}
if (ch == '\n') {
fprintf(stderr, "%s:%d: unescaped newline in %s constant\n",
filename, ctx->lineno - 1, const_type);
/* This is a syntax error but keep going as if constant was terminated */
break;
}
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated %s constant\n",
filename, startline, const_type);
}
flush_context(ctx, ft);
continue;
}
if (ch == '_' || isalpha(ch)) {
/* identifier or keyword */
define_t *dp;
while (isalnum(ch = getcpp(ctx)) || ch == '_')
continue;
ungetcpp(ctx, ch);
for (dp = defs; dp; dp = dp->next) {
if (dp->len == ctx->pos && !memcmp(dp->tok, ctx->buf, ctx->pos)) {
/* matching symbol */
fputs(dp->def, ft);
ctx->pos = 0;
break;
}
}
flush_context(ctx, ft);
continue;
}
if (ch == '.' || isdigit(ch)) {
/* preprocessing number: should parse precise syntax */
while (isalnum(ch = getcpp(ctx)) || ch == '.')
continue;
ungetcpp(ctx, ch);
flush_context(ctx, ft);
continue;
}
flush_context(ctx, ft);
}
if (outname) {
fclose(ft);
}
free(ctx->buf);
return 0;
}
int main(int argc, char *argv[]) {
char *filename = NULL;
char *outname = NULL;
define_t *defs = NULL;
FILE *fp;
int i;
for (i = 1; i < argc; i++) {
char *arg = argv[i];
if (*arg == '-') {
if (arg[1] == 'h' || arg[1] == '?' || !strcmp(arg, "--help")) {
printf("usage: pcpp [-o FILENAME] [-Dname[=value]] ... [FILE] ...\n");
return 2;
} else
if (arg[1] == 'o') {
if (arg[2]) {
outname = arg + 2;
} else
if (i + 1 < argc) {
outname = argv[++i];
} else {
fprintf(stderr, "pcpp: missing filename for -o\n");
return 1;
}
} else
if (arg[1] == 'D') {
if (arg[2]) {
add_define(&defs, arg + 2);
} else
if (i + 1 < argc) {
add_define(&defs, argv[++i]);
} else {
fprintf(stderr, "pcpp: missing definition for -D\n");
return 1;
}
} else {
fprintf(stderr, "pcpp: bad option: %s\n", arg);
return 1;
}
} else
if (strchr(arg, '=')) {
add_define(&defs, arg);
} else {
filename = arg;
if ((fp = fopen(filename, "r")) == NULL) {
fprintf(stderr, "pcpp: cannot open input file %s: %s\n",
filename, strerror(errno));
return 1;
}
preprocess(filename, fp, outname, defs);
fclose(fp);
}
}
if (!filename) {
preprocess("<stdin>", stdin, outname, defs);
}
return 0;
}