CoqでPrim言語のサブセットのパーサを実装した。
CSNagoyaのコンパイラを作ろう部門での教科書「コンパイラ入門 C#で学ぶ理論と実践(冨沢 高明)」が自宅に届いたので、8章までの部分を復習するために実装してみた。
この本ではC#という言語で実装することを念頭に置かれていたが、Coqが好きなのでCoqで実装してみた。
作ったファイルは2つ
Syntax.v Parse.v
Prim言語の例
MODULE HelloWorld; BEGIN WriteStr("Hello, world!") END HelloWorld.
パース例
$ coqc Syntax.v Parse.v = inl string (PG "HelloWorld" (Apply "WriteStr" (Str "Hello, world!")) "HelloWorld") : program + string
実装したPrim言語のサブセットのBNF定義
<program> ::= "MODULE" <ident> ";" "BEGIN" <statlist> "END" <ident> "." <statlist> ::= ε | <statement> ";" <statlist> <statement> ::= <ident> "(" <literal> ")" <literal> ::= <ident> | <number> | <string> <ident> ::= <letter> (<letter> | <digit>)* <letter> ::= "a" | "b" | ... | "z" | "A" | "B" | ... | "Z" <digit> ::= "0" | "1" | ... | "9"
ソースコード全体は以下
(* Syntax.v for PrimLanguage compiler Y. Imai wrote 2008 09 12 (CSNagoya study meeting) *) Require Import String. Inductive reserved : Set := | Module : reserved | Semicolon : reserved | Begin : reserved | End : reserved. Definition ident := string. Inductive literal : Set := | Id : ident -> literal | Num : nat -> literal | Str : string -> literal. Inductive statement : Set := | Apply : ident -> literal -> statement. Inductive program : Set := PG : ident -> statement -> ident -> program.
(* Parse.v for PrimLanguage compiler Y. Imai wrote 2008 09 13 (CSNagoya study meeting) *) Require Import String. Require Import Ascii. Require Import Syntax. Require Import List. Open Scope char_scope. Infix "::" := String (at level 60, right associativity). Notation "[ c ]" := (String c EmptyString) (at level 60). Notation "x || y" := (if x then true else if y then true else false) (at level 60, right associativity). Fixpoint rev(s:string) := match s with | EmptyString => EmptyString | hd :: tl => append (rev tl) (String hd EmptyString) end. Definition is_letter c := match c with |"a"|"b"|"c"|"d"|"e"|"f"|"g"|"h"|"i"|"j"|"k"|"l"|"m" |"n"|"o"|"p"|"q"|"r"|"s"|"t"|"u"|"v"|"w"|"x"|"y"|"z" |"A"|"B"|"C"|"D"|"E"|"F"|"G"|"H"|"I"|"J"|"K"|"L"|"M" |"N"|"O"|"P"|"Q"|"R"|"S"|"T"|"U"|"V"|"W"|"X"|"Y"|"Z" => true | _ => false end. Definition is_digit c := match c with |"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"|"0" => true | _ => false end. Definition is_space c := match c with | " " | "009" | "010" => true | _ => false end. Fixpoint eat_space code := match code with | hd :: rest => if is_space hd then eat_space rest else code | _ => code end. Open Scope string_scope. Fixpoint prefixm (r:reserved) pref s {struct pref} := match (pref, s) with | ("", _) => inl _ (r, s) | (p::ps, "") => inr _ "" | (p::ps, c::cs) => if ascii_dec p c then prefixm r ps cs else inr _ "" end. Definition parse_reserved token s := match token with | Module => prefixm Module "MODULE" s | Semicolon => prefixm Semicolon ";" s | Begin => prefixm Begin "BEGIN" s | End => prefixm End "END" s end. Fixpoint parse_ident s := match s with | hd :: rest => if is_letter hd then parse_ident_aux ([hd]) rest else inr _ ("not first letter: "++([hd])) | _ => inr _ ("Empty") end with parse_ident_aux store s {struct s} := match s with | hd :: rest => if is_letter hd || is_digit hd then parse_ident_aux (hd::store) rest else inl _ (rev store, (hd::rest)) | _ => inl _ (rev store, s) end. Fixpoint parse_string s := match s with | """" :: rest => parse_string_aux "" rest | _ => inr _ "" end with parse_string_aux store s {struct s} := match s with | """" :: rest => inl _ (rev store, rest) | c :: rest => parse_string_aux (c::store) rest | _ => inr _ "" end. Definition parse_literal s := match parse_ident s with | inl (id, rest) => inl _ (Id id, rest) | inr _ => match parse_string s with | inl (str, rest) => inl _ (Str str, rest) | _ => inr _ "" end end. Definition parse_statement s := match parse_ident s with | inl (id, rest) => match eat_space rest with | "("::rest => match parse_literal rest with | inl (lit, ")"::rest) => inl _ (Apply id lit, rest) | _ => inr _ "" end | _ => inr _ "" end | _ => inr _ "" end. Fixpoint parse_program code := match (parse_reserved Module) code with | inl (Module, rest) => match parse_ident (eat_space rest) with | inl (id, rest) => match (parse_reserved Semicolon) rest with | inl (Semicolon, rest) => match (parse_reserved Begin) (eat_space rest) with | inl (Begin, rest) => match parse_statement (eat_space rest) with | inl (stat, rest) => match (parse_reserved End) (eat_space rest) with | inl (End, rest) => match parse_ident (eat_space rest) with | inl (id', rest) => inl _ (PG id stat id') | _ => inr _ "ident end" end | _ => inr _ ("end" ++ rest) end | _ => inr _ "statement" end | _ => inr _ ("begin" ++ rest) end | _ => inr _ "semicolon" end | _ => inr _ ("ident: """++rest++"""") end | _ => inr _ "module" end. (** TEST **) Eval compute in parse_program "MODULE HelloWorld; BEGIN WriteStr(""Hello, world!"") END HelloWorld." .