We are a few people learning Coq and we are trying to define an Inductive predicate for the denotation of regular expressions, which represents a set of sequences.
This seems to run into the strictly positive
limitation, since we allow not
as an operator.
not
is not usually included in regular expressions, but it is included in Brzozowski's regular expressions,
which is the regular expressions we are looking at.
When we try to redefine regular expressions using a Fixpoint,
we run into the ill-formed
limitation for the zero or more
operator.
We can overcome these problems by defining our regular expressions as a mix of Inductive predicates and a Fixpoint,
but this feels wrong.
Is there any other way to define our regular expressions purely as an Inductive predicate?
Is there any problem with how we use a mix of Fixpoint and Inductive Predicate, or are we just being overly pure?
Here is the example code, with the explanations and expected errors in the comments:
Require Import List.
Import ListNotations.
(* We are defining our input alphabet for regular expressions as only two possible symbols *)
Inductive alphabet := a1 | a0.
Inductive regex :=
(* emptyset matches absolutely no strings *)
| emptyset : regex
(* lambda matches only the empty string *)
| lambda : regex
(* symbol matches only strings of length 1 containing the exact alphabet symbol *)
| symbol : alphabet -> regex
(* concat is used to build of regular expressions that can match longer strings *)
| concat : regex -> regex -> regex
(* zero or more, as you are familiar with from regular expressions *)
| star : regex -> regex
(* `nor` is a boolean operator, here is the truth table
P | Q | P `nor` Q
-----------------
T | T | F
T | F | F
F | T | F
F | F | T
*)
| nor : regex -> regex -> regex
.
(* We chose to include `nor`, since it can represent any possible boolean expression,
which is one of the selling points of Brzozowski's derivatives for regular expressions.
*)
Definition complement (r: regex) : regex :=
nor r r.
Definition and (r s: regex) : regex :=
nor (nor r r) (nor s s).
Definition or (r s: regex) : regex :=
nor (nor r s) (nor r s).
Definition xor (r s: regex) : regex :=
or (and r (complement s)) (and (complement r) s).
(* I matches all strings *)
Definition I: regex :=
complement (emptyset).
(* A regular expression denotes a set of sequences. *)
Definition seq := (list alphabet).
Definition seqs := seq -> Prop.
Definition in_set_of_sequences (ss: seqs) (s: seq): Prop := ss s.
Notation "p \in P" := (in_set_of_sequences P p) (at level 80).
(* Concatenation*. $(P.Q) = \{ s | s = p.q; p \in P, q \in Q \}$. *)
Inductive concat_seqs (P Q: seqs): seqs :=
| mk_concat: forall (s: seq),
(exists p q, p ++ q = s ->
p \in P /\
q \in Q
) ->
concat_seqs P Q s
.
(*
*Star*. $P^{*} = \cup_{0}^{\infty} P^n$ , where $P^2 = P.P$, etc.
and $P^0 = \lambda$, the set consisting of the sequence of zero length.
*)
Inductive star_seqs (R: seqs): seqs :=
| mk_star_zero : forall (s: seq),
s = [] -> star_seqs R s
| mk_star_more : forall (s: seq),
s \in (concat_seqs R (star_seqs R)) ->
star_seqs R s
.
(*
*Boolean function*. We shall denote any Boolean function of $P$ and $Q$ by $f(P, Q)$.
Of course, all the laws of Boolean algebra apply.
`nor` is used to emulate `f`, since nor can be used to emulate all boolean functions.
*)
Inductive nor_seqs (P Q: seqs): seqs :=
| mk_nor : forall s,
~(s \in P) /\ ~(s \in Q) ->
nor_seqs P Q s
.
(* Here we use a mix of Fixpoint and Inductive predicates to define the denotation of regular expressions.
This works, but it would be nicer to define it purely as an Inductive predicate.
*)
Fixpoint denote_regex (r: regex): seqs :=
match r with
| emptyset => fun _ => False
| lambda => fun xs => xs = []
| symbol y => fun xs => xs = [y]
| concat r1 r2 => concat_seqs (denote_regex r1) (denote_regex r2)
| star r1 => star_seqs (denote_regex r1)
| nor r1 r2 => nor_seqs (denote_regex r1) (denote_regex r2)
end.
(* Here we try to rewrite the denotation of a regex using a pure inductive predicate, but we get an error:
Non strictly positive occurrence of "ind_regex" in
"forall (s : seq) (P Q : regex),
s \in nor_seqs (ind_regex P) (ind_regex Q) -> ind_regex (nor P Q) s".
*)
Inductive ind_regex: regex -> seqs :=
| ind_emptyset (s: seq):
False ->
ind_regex emptyset s
| ind_lambda (s: seq):
s = [] ->
ind_regex lambda s
| ind_symbol (s: seq) (a: alphabet):
s = [a] ->
ind_regex (symbol a) s
| ind_concat (s: seq) (P Q: regex):
s \in (concat_seqs (ind_regex P) (ind_regex Q)) ->
ind_regex (concat P Q) s
| ind_star (s: seq) (R: regex):
s \in (star_seqs (ind_regex R)) ->
ind_regex (star R) s
| ind_nor (s: seq) (P Q: regex):
s \in (nor_seqs (ind_regex P) (ind_regex Q)) ->
ind_regex (nor P Q) s
.
(*
Here we try to define the denotation of a regex purely as a fixpoint, but we get an error:
Recursive definition of fix_regex is ill-formed.
In environment
fix_regex : regex -> seqs
r : regex
s : regex
xs : seq
x : alphabet
xs' : list alphabet
ys : list alphabet
zs : list alphabet
Recursive call to fix_regex has principal argument equal to "star s" instead of "s".
Recursive definition is:
"fun r : regex =>
match r with
| emptyset => fun _ : seq => False
| lambda => fun xs : seq => xs = []
| symbol y => fun xs : seq => xs = [y]
| concat s t => fun xs : seq => exists ys zs : list alphabet, xs = ys ++ zs /\ fix_regex s ys /\ fix_regex t zs
| star s =>
fun xs : seq =>
match xs with
| [] => True
| x :: xs' => exists ys zs : list alphabet, xs' = ys ++ zs /\ fix_regex s (x :: ys) /\ fix_regex (star s) zs
end
| nor _ _ => fun _ : seq => True
end".
*)
Fixpoint fix_regex (r: regex): seqs :=
match r with
| emptyset => fun _ => False
| lambda => fun xs => xs = []
| symbol y => fun xs => xs = [y]
| concat s t => fun xs => exists ys zs, xs = ys ++ zs /\ fix_regex s ys /\ fix_regex t zs
| star s => fun xs =>
match xs with
| [] => True
| (x::xs') => exists ys zs, xs' = ys ++ zs /\ fix_regex s (x::ys) /\ fix_regex (star s) zs
end
| _ => fun _ => True
end.