読者です 読者をやめる 読者になる 読者になる

にわとり小屋でのプログラミング ブログ

名古屋のCoqエンジニアです。日記は勘で書いてるところがあるので細かいとこが違うことがあります。

CoqでPrim言語のサブセットのパーサを実装した。

CSNagoyaコンパイラを作ろう部門での教科書「コンパイラ入門 C#で学ぶ理論と実践(冨沢 高明)」が自宅に届いたので、8章までの部分を復習するために実装してみた。
この本ではC#という言語で実装することを念頭に置かれていたが、Coqが好きなのでCoqで実装してみた。

作ったファイルは2つ

Syntax.v
Parse.v

Prim言語の例

MODULE HelloWorld;
  BEGIN
    WriteStr("Hello, world!")
  END HelloWorld.

パース例

$ coqc Syntax.v Parse.v
     = inl string
         (PG "HelloWorld" (Apply "WriteStr" (Str "Hello, world!"))
            "HelloWorld")
     : program + string

実装したPrim言語のサブセットのBNF定義

<program> ::= "MODULE" <ident> ";" "BEGIN" <statlist> "END" <ident> "."
<statlist> ::= ε | <statement> ";" <statlist>
<statement> ::= <ident>  "(" <literal> ")"
<literal> ::= <ident> | <number> | <string>
<ident> ::= <letter>  (<letter> | <digit>)*
<letter> ::= "a" | "b" | ... | "z" | "A" | "B" | ... | "Z"
<digit> ::= "0" | "1" | ... | "9"

ソースコード全体は以下

(* Syntax.v
 for PrimLanguage compiler
 Y. Imai wrote 2008 09 12 (CSNagoya study meeting)
*)
Require Import String.

Inductive reserved : Set :=
 | Module : reserved
 | Semicolon : reserved
 | Begin : reserved
 | End : reserved.

Definition ident := string.

Inductive literal : Set :=
 | Id : ident -> literal
 | Num : nat -> literal
 | Str : string -> literal.

Inductive statement : Set :=
 | Apply : ident -> literal -> statement.

Inductive program : Set :=
  PG : ident -> statement -> ident -> program.
(* Parse.v
 for PrimLanguage compiler
 Y. Imai wrote 2008 09 13 (CSNagoya study meeting)
*)

Require Import String.
Require Import Ascii.
Require Import Syntax.
Require Import List.

Open Scope char_scope.

Infix "::" := String (at level 60, right associativity).
Notation "[ c ]" := (String c EmptyString) (at level 60).
Notation "x || y" := (if x then true else if y then true else false)
  (at level 60, right associativity).

Fixpoint rev(s:string) := match s with
  | EmptyString => EmptyString
  | hd :: tl => append (rev tl) (String hd EmptyString)
  end.


Definition is_letter c := match c with
  |"a"|"b"|"c"|"d"|"e"|"f"|"g"|"h"|"i"|"j"|"k"|"l"|"m"
  |"n"|"o"|"p"|"q"|"r"|"s"|"t"|"u"|"v"|"w"|"x"|"y"|"z"
  |"A"|"B"|"C"|"D"|"E"|"F"|"G"|"H"|"I"|"J"|"K"|"L"|"M"
  |"N"|"O"|"P"|"Q"|"R"|"S"|"T"|"U"|"V"|"W"|"X"|"Y"|"Z" => true
  | _ => false end.

Definition is_digit c := match c with
  |"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"|"0" => true
  | _ => false end.

Definition is_space c := match c with
  | " " | "009" | "010" => true | _ => false end.

Fixpoint eat_space code := match code with
  | hd :: rest =>
     if is_space hd then eat_space rest
     else code
  | _ => code end.

Open Scope string_scope.
Fixpoint prefixm (r:reserved) pref s {struct pref} := match (pref, s) with
  | ("", _) => inl _ (r, s)
  | (p::ps, "") => inr _ ""
  | (p::ps, c::cs) => if ascii_dec p c then prefixm r ps cs
      else inr _ ""
  end.
Definition parse_reserved token s := match token with
  | Module => prefixm Module "MODULE" s
  | Semicolon => prefixm Semicolon ";" s
  | Begin => prefixm Begin "BEGIN" s
  | End => prefixm End "END" s
  end.

Fixpoint parse_ident s := match s with
  | hd :: rest => if is_letter hd then parse_ident_aux ([hd]) rest
      else inr _ ("not first letter: "++([hd]))
  | _ => inr _ ("Empty")
end
with parse_ident_aux store s {struct s} := match s with
  | hd :: rest =>
     if is_letter hd || is_digit hd then parse_ident_aux (hd::store) rest
     else inl _ (rev store, (hd::rest))
  | _ => inl _ (rev store, s)
  end.


Fixpoint parse_string s := match s with
  | """" :: rest => parse_string_aux "" rest
  | _ => inr _ "" end
with parse_string_aux store s {struct s} := match s with
  | """" :: rest => inl _ (rev store, rest)
  | c :: rest => parse_string_aux (c::store) rest
  | _ => inr _ ""
  end.

Definition parse_literal s :=
  match parse_ident s with
  | inl (id, rest) => inl _ (Id id, rest)
  | inr _ => match parse_string s with
     | inl (str, rest) => inl _ (Str str, rest)
     | _ => inr _ "" end
  end.

Definition parse_statement s :=
  match parse_ident s with
  | inl (id, rest) => match eat_space rest with
     | "("::rest => match parse_literal rest with
        | inl (lit, ")"::rest) => inl _ (Apply id lit, rest)
        | _ => inr _ "" end
     | _ => inr _ "" end
  | _ => inr _ ""
  end.

Fixpoint parse_program code :=
  match (parse_reserved Module) code with
  | inl (Module, rest) => match parse_ident (eat_space rest) with
     | inl (id, rest) => match (parse_reserved Semicolon) rest with
        | inl (Semicolon, rest) => match (parse_reserved Begin) (eat_space rest) with
           | inl (Begin, rest) => match parse_statement (eat_space rest) with
              | inl (stat, rest) => match (parse_reserved End) (eat_space rest) with
                 | inl (End, rest) => match parse_ident (eat_space rest) with
                    | inl (id', rest) =>
               inl _ (PG id stat id')
                    | _ => inr _ "ident end" end
                 | _ => inr _ ("end" ++ rest) end
              | _ => inr _ "statement" end
           | _ => inr _ ("begin" ++ rest) end
        | _ => inr _ "semicolon" end
     | _ => inr _ ("ident: """++rest++"""") end
  | _ => inr _ "module"
  end.

(** TEST **)
Eval compute in parse_program
"MODULE HelloWorld;
  BEGIN
    WriteStr(""Hello, world!"")
  END HelloWorld."
.