CoqでPrim言語のサブセットのパーサを実装した。

CSNagoyaコンパイラを作ろう部門での教科書「コンパイラ入門 C#で学ぶ理論と実践(冨沢 高明)」が自宅に届いたので、8章までの部分を復習するために実装してみた。
この本ではC#という言語で実装することを念頭に置かれていたが、Coqが好きなのでCoqで実装してみた。

作ったファイルは2つ

Syntax.v
Parse.v

Prim言語の例

MODULE HelloWorld;
  BEGIN
    WriteStr("Hello, world!")
  END HelloWorld.

パース例

$ coqc Syntax.v Parse.v
     = inl string
         (PG "HelloWorld" (Apply "WriteStr" (Str "Hello, world!"))
            "HelloWorld")
     : program + string

実装したPrim言語のサブセットのBNF定義

<program> ::= "MODULE" <ident> ";" "BEGIN" <statlist> "END" <ident> "."
<statlist> ::= ε | <statement> ";" <statlist>
<statement> ::= <ident>  "(" <literal> ")"
<literal> ::= <ident> | <number> | <string>
<ident> ::= <letter>  (<letter> | <digit>)*
<letter> ::= "a" | "b" | ... | "z" | "A" | "B" | ... | "Z"
<digit> ::= "0" | "1" | ... | "9"

ソースコード全体は以下

(* Syntax.v
 for PrimLanguage compiler
 Y. Imai wrote 2008 09 12 (CSNagoya study meeting)
*)
Require Import String.

Inductive reserved : Set :=
 | Module : reserved
 | Semicolon : reserved
 | Begin : reserved
 | End : reserved.

Definition ident := string.

Inductive literal : Set :=
 | Id : ident -> literal
 | Num : nat -> literal
 | Str : string -> literal.

Inductive statement : Set :=
 | Apply : ident -> literal -> statement.

Inductive program : Set :=
  PG : ident -> statement -> ident -> program.
(* Parse.v
 for PrimLanguage compiler
 Y. Imai wrote 2008 09 13 (CSNagoya study meeting)
*)

Require Import String.
Require Import Ascii.
Require Import Syntax.
Require Import List.

Open Scope char_scope.

Infix "::" := String (at level 60, right associativity).
Notation "[ c ]" := (String c EmptyString) (at level 60).
Notation "x || y" := (if x then true else if y then true else false)
  (at level 60, right associativity).

Fixpoint rev(s:string) := match s with
  | EmptyString => EmptyString
  | hd :: tl => append (rev tl) (String hd EmptyString)
  end.


Definition is_letter c := match c with
  |"a"|"b"|"c"|"d"|"e"|"f"|"g"|"h"|"i"|"j"|"k"|"l"|"m"
  |"n"|"o"|"p"|"q"|"r"|"s"|"t"|"u"|"v"|"w"|"x"|"y"|"z"
  |"A"|"B"|"C"|"D"|"E"|"F"|"G"|"H"|"I"|"J"|"K"|"L"|"M"
  |"N"|"O"|"P"|"Q"|"R"|"S"|"T"|"U"|"V"|"W"|"X"|"Y"|"Z" => true
  | _ => false end.

Definition is_digit c := match c with
  |"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"|"0" => true
  | _ => false end.

Definition is_space c := match c with
  | " " | "009" | "010" => true | _ => false end.

Fixpoint eat_space code := match code with
  | hd :: rest =>
     if is_space hd then eat_space rest
     else code
  | _ => code end.

Open Scope string_scope.
Fixpoint prefixm (r:reserved) pref s {struct pref} := match (pref, s) with
  | ("", _) => inl _ (r, s)
  | (p::ps, "") => inr _ ""
  | (p::ps, c::cs) => if ascii_dec p c then prefixm r ps cs
      else inr _ ""
  end.
Definition parse_reserved token s := match token with
  | Module => prefixm Module "MODULE" s
  | Semicolon => prefixm Semicolon ";" s
  | Begin => prefixm Begin "BEGIN" s
  | End => prefixm End "END" s
  end.

Fixpoint parse_ident s := match s with
  | hd :: rest => if is_letter hd then parse_ident_aux ([hd]) rest
      else inr _ ("not first letter: "++([hd]))
  | _ => inr _ ("Empty")
end
with parse_ident_aux store s {struct s} := match s with
  | hd :: rest =>
     if is_letter hd || is_digit hd then parse_ident_aux (hd::store) rest
     else inl _ (rev store, (hd::rest))
  | _ => inl _ (rev store, s)
  end.


Fixpoint parse_string s := match s with
  | """" :: rest => parse_string_aux "" rest
  | _ => inr _ "" end
with parse_string_aux store s {struct s} := match s with
  | """" :: rest => inl _ (rev store, rest)
  | c :: rest => parse_string_aux (c::store) rest
  | _ => inr _ ""
  end.

Definition parse_literal s :=
  match parse_ident s with
  | inl (id, rest) => inl _ (Id id, rest)
  | inr _ => match parse_string s with
     | inl (str, rest) => inl _ (Str str, rest)
     | _ => inr _ "" end
  end.

Definition parse_statement s :=
  match parse_ident s with
  | inl (id, rest) => match eat_space rest with
     | "("::rest => match parse_literal rest with
        | inl (lit, ")"::rest) => inl _ (Apply id lit, rest)
        | _ => inr _ "" end
     | _ => inr _ "" end
  | _ => inr _ ""
  end.

Fixpoint parse_program code :=
  match (parse_reserved Module) code with
  | inl (Module, rest) => match parse_ident (eat_space rest) with
     | inl (id, rest) => match (parse_reserved Semicolon) rest with
        | inl (Semicolon, rest) => match (parse_reserved Begin) (eat_space rest) with
           | inl (Begin, rest) => match parse_statement (eat_space rest) with
              | inl (stat, rest) => match (parse_reserved End) (eat_space rest) with
                 | inl (End, rest) => match parse_ident (eat_space rest) with
                    | inl (id', rest) =>
               inl _ (PG id stat id')
                    | _ => inr _ "ident end" end
                 | _ => inr _ ("end" ++ rest) end
              | _ => inr _ "statement" end
           | _ => inr _ ("begin" ++ rest) end
        | _ => inr _ "semicolon" end
     | _ => inr _ ("ident: """++rest++"""") end
  | _ => inr _ "module"
  end.

(** TEST **)
Eval compute in parse_program
"MODULE HelloWorld;
  BEGIN
    WriteStr(""Hello, world!"")
  END HelloWorld."
.