---------------------------------------------------------------------- -- Metalua samples: $Id$ -- -- Summary: Structural pattern matching for metalua ADT. -- ---------------------------------------------------------------------- -- -- Copyright (c) 2006-2007, Fabien Fleutot <metalua@gmail.com>. -- -- This software is released under the MIT Licence, see licence.txt -- for details. -- ------------------------------------------------------------------------------- -- -- This extension, borrowed from ML dialects, allows in a single operation to -- analyze the structure of nested ADT, and bind local variables to subtrees -- of the analyzed ADT before executing a block of statements chosen depending -- on the tested term's structure. -- -- The general form of a pattern matching statement is: -- -- match <tested_term> with -- | <pattern_1_1> | <pattern_1_2> | <pattern_1_3> -> <block_1> -- | <pattern_2> -> <block_2> -- | <pattern_3_1> | <pattern_3_2> if <some_condition> -> <block_3> -- end -- -- If one of the patterns <pattern_1_x> accurately describes the -- structure of <tested_term>, then <block_1> is executed (and no -- other block of the match statement is tested). If none of -- <pattern_1_x> patterns mathc <tested_term>, but <pattern_2> does, -- then <block_2> is evaluated before exiting. If no pattern matches, -- the whole <match> statemetn does nothing. -- -- When an additional condition, introduced by [if], is put after -- the patterns, this condition is evaluated if one of the patterns matches, -- and the case is considered successful only if the condition returns neither -- [nil] nor [false]. -- -- Terminology -- =========== -- -- The whole compound statement is called a match; -- Each schema is called a pattern; -- Each sequence (list of patterns, optional guard, statements block) -- is called a case. -- -- Patterns -- ======== -- Patterns can consist of: -- - numbers, booleans, strings: they only match terms equal to them -- - variables: they match everything, and bind it, i.e. the variable -- will be set to the corresponding tested value when the block will -- be executed (if the whole pattern and the guard match) -- - tables: a table matches if: -- * the tested term is a table; -- * all of the pattern's keys are strings or integer implicit indexes; -- * all of the pattern's values are valid patterns, except maybe the -- last value with implicit integer key, which can also be [...]; -- * every value in the tested term is matched by the corresponding -- sub-pattern; -- * There are as many integer-indexed values in the tested term as in -- the pattern, or there is a [...] at the end of the table pattern. -- -- Pattern examples -- ================ -- -- Pattern { 1, a } matches term { 1, 2 }, and binds [a] to [2]. -- It doesn't match term { 1, 2, 3 } (wrong number of parameters). -- -- Pattern { 1, a, ... } matches term { 1, 2 } as well as { 1, 2, 3 } -- (the trailing [...] suppresses the same-length condition) -- -- `Foo{ a, { bar = 2, b } } matches `Foo{ 1, { bar = 2, "THREE" } }, -- and binds [a] to [1], [b] to ["THREE"] (the syntax sugar for [tag] fields -- is available in patterns as well as in regular terms). -- -- Implementation hints -- ==================== -- -- The patterns are compiled into a series of [if not ... then goto ... end]. -- -- The [goto] statement isn't supported by Metalua syntax, essentially -- to avoid supporting sloppy coding and upsetting Mr. Dijkstra. However, -- it is extremely useful for metaprogramming, when hairy control paths have -- to be handled, as is the case for pattern matching. -- -- Some varying [on_success] and [on_failure] labels are used to remember -- where to jump when it is established that a patttern did (resp. didn't) match -- the tested_term. -- -- Realistic usages -- ================ -- -- Some samples use pattern matching: -- -- - An extremely basic one is in "match_test.lua"; -- - A non-trivial one, with real bits of CS inside, is in "lambda.lua"; -- - "alpha.lua" implements hygienic macros, and uses pattern matching -- to do this; -- - "srcdump.lua" converts AST back to source code, with some local -- optimizations, and does so by pattern matching -- - ------------------------------------------------------------------------------- -- TODO: -- -- - Optimize local vars: instead of declaring them at the beginning of -- the match statement, declare them at the beginning of each case. Will -- save some local var slots in the stack. -- -- - Optimize jumps: the bytecode generated often contains several -- [OP_JMP 1] in a row, which is quite silly. That might be due to the -- implementation of [goto], but something can also probably be done -- in pattern matching implementation. -- ---------------------------------------------------------------------- -- BUG: match x with x -> foo(x) end va planter, parce que x est capture -- par le 'local v1, x' initial. ---------------------------------------------------------------------- -- Convert a tested term and a list of (pattern, statement) pairs -- into a pattern-matching AST. ---------------------------------------------------------------------- local function match_builder (tested_term, cases) local local_vars = { } local var = |n| `Id{ "v" .. n } local on_failure -- current target upon pattern mismatch local literal_tag = { String=1, Number=1, Boolean=1 } ------------------------------------------------------------------- -- Accumulate statements in [code] ------------------------------------------------------------------- local code = `Do{ } local function acc (x) --printf ("%s", disp.ast (x)) table.insert (code, x) end local function acc_test (it) -- the test must fail for match to succeeed. acc (+{stat: if -{it} then -{`Goto{ on_failure }} end }) end local function acc_assign (lhs, rhs) local_vars[lhs[1]] = true acc (+{stat: (-{lhs}) = -{rhs} }) end ------------------------------------------------------------------- -- Set of variables bound in the current pattern, to find -- non-linear patterns. ------------------------------------------------------------------- local pattern_boundvars = { } local function handle_id (id, val) assert (id.tag=="Id") if id[1] == "_" then -- "_" is used as a dummy var -> no assignment, no == checking local_vars["_"] = true elseif pattern_boundvars[id[1]] then -- This var is already bound --> test for equality acc_test (+{ -{val} ~= -{id} }) else -- Free var --> bind it acc_assign (id, val) pattern_boundvars[id[1]] = true end end ------------------------------------------------------------------- -- Turn a pattern into a list of tests and assignments stored into -- [code]. [n] is the depth of the subpattern into the toplevel -- pattern; [pattern] is the AST of a pattern, or a subtree of that -- pattern when [n>0]. ------------------------------------------------------------------- local function pattern_builder (n, pattern) if n<=1 then pattern_boundvars = { } end local v = var(n) if literal_tag[pattern.tag] then acc_test (+{ -{v} ~= -{pattern} }) elseif "Id" == pattern.tag then handle_id (pattern, v) elseif "True" == pattern.tag then acc_test (+{ -{v} ~= true }) elseif "False" == pattern.tag then acc_test (+{ -{v} ~= false }) elseif "Table" == pattern.tag then local seen_dots, len = false, 0 acc_test (+{ type( -{v} ) ~= "table" } ) for i = 1, #pattern do local key, sub_pattern if pattern[i].tag=="Key" then -- Explicit key key, sub_pattern = unpack (pattern[i]) assert (literal_tag[key.tag], "Invalid key") else -- Implicit key len, key, sub_pattern = len+1, `Number{ len+1 }, pattern[i] end assert (not seen_dots, "Wrongly placed `...' ") if sub_pattern.tag == "Id" then -- Optimization: save a useless [ v(n+1)=v(n).key ] handle_id (sub_pattern, `Index{ v, key }) if sub_pattern[1] ~= "_" then acc_test (+{ -{sub_pattern} == nil }) end elseif sub_pattern.tag == "Dots" then -- Remember to suppress arity checking seen_dots = true else -- Business as usual: local w = var (n+1) acc_assign (w, `Index{ v, key }) --acc_test (+{ -{w} == nil }) -- it's not an Id--> non-nil-ness will be checked below pattern_builder (n+1, sub_pattern) end end if not seen_dots then -- Check arity acc_test (+{ #-{v} ~= -{`Number{len}} }) end else error ("Invalid pattern: "..table.tostring(pattern, "nohash")) end end acc_assign (var(1), tested_term) local end_of_match = mlp.gensym "_end_of_match" -- Foreach [{patterns, guard, block}]: for i = 1, #cases do local patterns, guard, block = unpack (cases[i]) local on_success = mlp.gensym "_on_success" -- 1 success target per case ----------------------------------------------------------- -- Foreach [pattern] in [patterns]: -- on failure go to next pattern if any, -- next case if no more pattern in current case. -- on success (i.e. no [goto on_failure]), go to after last pattern test, -- or just stay there if this is already the last pattern. -- if there ia a guard, test it before the block, [goto on_failure] -- if it fails ----------------------------------------------------------- for j = 1, #patterns do on_failure = mlp.gensym "_on_failure" -- 1 failure target per pattern pattern_builder (1, patterns[j]) if j<#patterns then acc (`Goto{on_success}) acc (`Label{on_failure}) end end acc (`Label{on_success}) if guard then acc_test (`Op{ `Not, guard}) end acc (block) if i<#cases then acc (`Goto{end_of_match}) end acc (`Label{on_failure}) end acc (`Label{end_of_match}) -- Insert local variables declaration local x = { } for k, _ in pairs (local_vars) do table.insert (x, `Id{ k }) end table.insert (code, 1, `Local{ x, { } }) return code end ---------------------------------------------------------------------- -- Sugar: add the syntactic extension that makes pattern matching -- pleasant to read and write. ---------------------------------------------------------------------- mlp.lexer:add{ "match", "with", "->" } mlp.block.terminators:add "|" mlp.stat:add{ name = "match statement", "match", mlp.expr, "with", gg.optkeyword "|", gg.list{ name = "match cases list", primary = gg.sequence{ name = "match case", gg.list{ name = "patterns", primary = mlp.expr, separators = "|", terminators = { "->", "if" } }, gg.onkeyword{ "if", mlp.expr, consume = true }, "->", mlp.block }, separators = "|", terminators = "end" }, "end", builder = |x| match_builder (x[1], x[3]) }