Imagine that you have such structs:
struct nix_codec {
nix_uint8 state;
nix_uint8 mode;
nix_uint8 flags;
nix_size offset;
nix_uint32 codepage;
nix_utf8 const *const *aliases;
void (*delete)(
struct nix_codec *codec,
struct nix_error *error
);
struct nix_codec* (*clone)(
struct nix_codec const *codec,
nix_int8 mode,
struct nix_error *error
);
nix_size (*decode)(
struct nix_codec *codec,
nix_byte const *bdata,
nix_size bsize,
nix_rune *udata,
nix_size usize,
struct nix_error *error
);
nix_size (*encode)(
struct nix_codec *codec,
nix_rune const *udata,
nix_size usize,
nix_byte *bdata,
nix_size bsize,
struct nix_error *error
);
};
typedef struct {
nix_uint8 const state;
nix_uint8 const mode;
nix_uint8 const flags;
nix_size const offset;
nix_uint32 const codepage;
nix_utf8 const *const *const aliases;
} nix_codec;
One has also several functions, which are used to create the nix_codec*
instances, e.g. for UTF-8 codec it will look like this:
static nix_size self_decode
(
struct nix_codec *codec,
nix_byte const *bdata,
nix_size bsize,
nix_rune *udata,
nix_size usize,
struct nix_error *error
)
{ /* UTF-8 decode function, too long to post here */}
static nix_utf8 const *const aliases[] = {
"UTF-8",
"UTF8",
"CP65001",
NULL,
};
nix_codec *nix_codec_utf8
(
nix_int8 mode,
struct nix_error *error
)
{
struct nix_codec *codec = NULL;
if ((mode != NIX_CODEC_STRICT) && (mode != NIX_CODEC_ESCAPE)
&& (mode != NIX_CODEC_REPLACE) && (mode != NIX_CODEC_IGNORE)) {
return NULL;
}
codec = calloc(1, sizeof(struct nix_codec));
if (codec == NULL) {
return NULL;
}
codec->mode = mode;
codec->codepage = 65001;
codec->aliases = aliases;
codec->decode = &self_decode;
codec->encode = &self_encode;
codec->flags = (NIX_CODEC_COMPATIBLE | NIX_CODEC_MULTIBYTE | NIX_CODEC_ABSOLUTE);
return (nix_codec*)codec;
}
The function for legacy single-byte encodings is based on such structures:
struct nix_sbmap {
nix_uint8 byte;
nix_rune rune;
};
struct nix_sbcodec {
struct nix_codec base;
struct nix_sbmap const *entries;
nix_size count;
};
Note that struct nix_sbcodec
and struct nix_sbmap
are declared in the source files, not in headers, thus there is no need to use variant
pattern. The corresponding function, e.g. nix_codec_koi8r()
, allocates a struct nix_sbcodec
, sets its base
, entries
and count
members and then casts it to nix_codec
and returns it. Every actual encode()
and decode()
calls are performed using this public function:
nix_size nix_codec_decode
(
nix_codec *codec,
nix_byte const *bdata,
nix_size bsize,
nix_rune *udata,
nix_size usize,
struct nix_error *error
)
{
nix_size result = 0;
struct nix_codec *self = (struct nix_codec*)codec;
return self->decode(self, bdata, bsize, udata, usize, error);
}
Note that state
, mode
, flags
and offset
members may be interesting to anyone using any codec (the most part of them is set in codec creator functions, offset
is changed after calls to encode()
and decode()
functions and represents the count of bytes/Unicode characters which were successfully processed before function exited. Each codec has its own encode()
and decode()
functions as you see.
Now the question: is this trick correct and guaranteed to work by the C Standard?
Thanks in advance!