----------------------------------------------------------------------
-- Metalua:  $Id$
--
-- Summary: parser generator. Collection of higher order functors,
--   which allow to build and combine parsers. Relies on a lexer
--   that supports the same API as the one exposed in mll.lua.
--
----------------------------------------------------------------------
--
-- Copyright (c) 2006-2007, Fabien Fleutot <metalua@gmail.com>.
--
-- This software is released under the MIT Licence, see licence.txt
-- for details.
--
----------------------------------------------------------------------

--------------------------------------------------------------------------------
--
-- Exported API:
--
-- Parser generators:
-- * [gg.sequence()]
-- * [gg.multisequence()]
-- * [gg.expr()]
-- * [gg.list()]
-- * [gg.onkeyword()]
-- * [gg.optkeyword()]
--
-- Other functions: 
-- * [gg.parse_error()]
-- * [gg.make_parser()]
-- * [gg.is_parser()]
--
--------------------------------------------------------------------------------

module("gg", package.seeall)

-------------------------------------------------------------------------------
-- parser metatable, which maps __call to method parse, and adds some
-- error tracing boilerplate.
-------------------------------------------------------------------------------
local parser_metatable = { }
function parser_metatable.__call (parser, lx, ...)
   --printf ("Call parser %s/%s", parser.kind, parser.name or "?")
   if mlc.SHOW_METABUGS then 
      --return parser:parse (lx, ...) 
      local x = parser:parse (lx, ...) 
      --printf ("Result: %s", _G.table.tostring(x, "nohash", 60))
      return x
   else
      local char = lx:peek().char
      local status, ast = pcall (parser.parse, parser, lx, ...)      
      if status then return ast else
         local msg = ast
         if msg then print(msg) end
         printf(" - (%i) in parser %s", char, parser.name or parser.kind)
         error()
      end
   end
end

-------------------------------------------------------------------------------
-- Turn a table into a parser, mainly by setting the metatable.
-------------------------------------------------------------------------------
function make_parser(kind, p)
   p.kind = kind
   if not p.transformers then p.transformers = { } end
   function p.transformers:add (x)
      table.insert (self, x)
   end
   setmetatable (p, parser_metatable)
   return p
end

-------------------------------------------------------------------------------
-- Return true iff [x] is a parser.
-- If it's a gg-generated parser, reutrn the name of its kind.
-------------------------------------------------------------------------------
function is_parser (x)
   return type(x)=="function" or getmetatable(x)==parser_metatable and x.kind
end

-------------------------------------------------------------------------------
-- Parse a sequence, without applying builder nor transformers
-------------------------------------------------------------------------------
local function raw_parse_sequence (lx, p)
   local r = { }
   for i=1, #p do
      e=p[i]
      if type(e) == "string" then 
         if not lx:is_keyword (lx:next(), e) then
            parse_error (lx, "Keyword '%s' expected", e) end
      elseif is_parser (e) then
         table.insert (r, e (lx)) 
      else 
         gg.parse_error (lx,"Sequence `%s': element #%i is not a string "..
                         "nor a parser: %s", 
                         p.name, i, table.tostring(e))
      end
   end
   return r
end

-------------------------------------------------------------------------------
-- Parse a multisequence, without applying multisequence transformers.
-- The sequences are completely parsed.
-------------------------------------------------------------------------------
local function raw_parse_multisequence (lx, sequence_table, default)
   local seq_parser = sequence_table[lx:is_keyword(lx:peek())]
   if seq_parser  then return seq_parser (lx)
   elseif default then return default (lx)
   else return false end
end

-------------------------------------------------------------------------------
-- Applies all transformers listed in parser on ast.
-------------------------------------------------------------------------------
local function transform (ast, parser)
   if parser.transformers then
      for _, t in ipairs (parser.transformers) do ast = t(ast) or ast end
   end
   return ast
end

-------------------------------------------------------------------------------
-- Generate a tracable parsing error (not implemented yet)
-------------------------------------------------------------------------------
function parse_error(lx, fmt, ...)   
   local line = lx:peek().line or -1
   local char = lx:peek().char or -1
   local msg  = string.format("line %i, char %i: "..fmt, line, char, ...)   
   local src = lx.src
   if char>0 and src then
      local i, j = char, char
      while src:sub(i,i) ~= '\n' and i>=0    do i=i-1 end
      while src:sub(j,j) ~= '\n' and j<=#src do j=j+1 end      
      local srcline = src:sub (i+1, j-1)
      local idx  = string.rep (" ", char-i-1).."^"
      msg = printf("%s\n>>> %s\n>>> %s", msg, srcline, idx)
   end
   error(msg)
end
   
-------------------------------------------------------------------------------
--
-- Sequence parser generator
--
-------------------------------------------------------------------------------
-- Input fields:
--
-- * [builder]: how to build an AST out of sequence parts. let [x] be the list
--   of subparser results (keywords are simply omitted). [builder] can be:
--    - [nil], in which case the result of parsing is simply [x]
--    - a string, which is then put as a tag on [x]
--    - a function, which takes [x] as a parameter and returns an AST.
--
-- * [name]: the name of the parser. Used for debug messages
--
-- * [transformers]: a list of AST->AST functions, applied in order on ASTs
--   returned by the parser.
--
-- * Table-part entries corresponds to keywords (strings) and subparsers 
--   (function and callable objects).
--
-- After creation, the following fields are added:
-- * [parse] the parsing function lexer->AST
-- * [kind] == "sequence"
-- * [name] is set, if it wasn't in the input.
--
-------------------------------------------------------------------------------
function sequence (p)
   make_parser ("sequence", p)

   -------------------------------------------------------------------
   -- Parsing method
   -------------------------------------------------------------------
   function p:parse (lx)
      -- Raw parsing:
      local x = raw_parse_sequence (lx, self)
      
      -- Builder application:
      local builder, tb = self.builder, type (self.builder)
      if tb == "string" then x.tag = builder
      elseif tb == "function" or builder and builder.__call then x = builder(x)
      elseif builder == nil then -- nothing
      else error("Invalid builder of type "..tb.." in sequence") end
      return transform (x, self)
   end

   -------------------------------------------------------------------
   -- Construction
   -------------------------------------------------------------------
   -- Try to build a proper name
   if not p.name and type(p[1])=="string" then 
      p.name = p[1].." ..." 
      if type(p[#p])=="string" then p.name = p.name .. " " .. p[#p] end
   else
      p.name = ""
   end

   return p
end --</sequence>


-------------------------------------------------------------------------------
--
-- Multiple, keyword-driven, sequence parser generator
--
-------------------------------------------------------------------------------
-- in [p], useful fields are:
--
-- * [transformers]: as usual
--
-- * [name]: as usual
--
-- * Table-part entries must be sequence parsers, or tables which can
--   be turned into a sequence parser by [gg.sequence]. These
--   sequences must start with a keyword, and this initial keyword
--   must be different for each sequence.  The table-part entries will
--   be removed after [gg.multisequence] returns.
--
-- * [default]: the parser to run if the next keyword in the lexer is
--   none of the registered initial keywords. If there's no default
--   parser and no suitable initial keyword, the multisequence parser
--   simply returns [false].
--
-- After creation, the following fields are added:
--
-- * [parse] the parsing function lexer->AST
--
-- * [sequences] the table of sequences, indexed by initial keywords.
--
-- * [add] method takes a sequence parser or a config table for
--   [gg.sequence], and adds/replaces the corresponding sequence
--   parser. If the keyword was already used, the former sequence is
--   removed and a warning is issued.
--
-- * [get] method returns a sequence by its initial keyword
--
-- * [kind] == "multisequence"
--
-------------------------------------------------------------------------------
function multisequence (p)   
   make_parser ("multisequence", p)

   -------------------------------------------------------------------
   -- Add a sequence (might be just a config table for [gg.sequence])
   -------------------------------------------------------------------
   function p:add (s)
      -- compile if necessary:
      if not is_parser(s) then sequence(s) end
      if type(s[1]) ~= "string" then 
         error "Invalid sequence for multiseq"
      elseif self.sequences[s[1]] then 
         printf (" *** Warning: keyword %q overloaded in multisequence ***", s[1])
      end
      self.sequences[s[1]] = s
   end -- </multisequence.add>

   -------------------------------------------------------------------
   -- Get the sequence starting with this keyword. [kw :: string]
   -------------------------------------------------------------------
   function p:get (kw) return self.sequences[kw] end

   -------------------------------------------------------------------
   -- Get the sequence starting with this keyword. [kw :: string]
   -------------------------------------------------------------------
   function p:remove (kw) 
      local x = self.sequences[kw]
      self.sequences[kw] = nil 
      return x
   end

   -------------------------------------------------------------------
   -- Parsing method
   -------------------------------------------------------------------
   function p:parse (lx)
      local x = raw_parse_multisequence (lx, self.sequences, self.default)
      return transform (x, self) end

   -------------------------------------------------------------------
   -- Construction
   -------------------------------------------------------------------
   -- Register the sequences passed to the constructor. They're going
   -- from the array part of the parser to the hash part of field
   -- [sequence]
   p.sequences = { }
   for i=1, #p do p:add (p[i]); p[i] = nil end
   return p
end --</multisequence>


-------------------------------------------------------------------------------
--
-- Expression parser generator
--
-------------------------------------------------------------------------------
--
-- Expression configuration relies on three tables: [prefix], [infix]
-- and [suffix]. Moreover, the primary parser can be replaced by a
-- table: in this case the [primary] table will be passed to
-- [gg.multisequence] to create a parser.
--
-- Each of these tables is a modified multisequence parser: the
-- differences with respect to regular multisequence config tables are:
--
-- * the builder takes specific parameters:
--   - for [prefix], it takes the result of the prefix sequence parser,
--     and the prefixed expression
--   - for [infix], it takes the left-hand-side expression, the results 
--     of the infix sequence parser, and the right-hand-side expression.
--   - for [suffix], it takes the suffixed expression, and theresult 
--     of the suffix sequence parser.
--
-- * the default field is a list, with parameters:
--   - [parser] the raw parsing function
--   - [transformers], as usual
--   - [prec], the operator's precedence
--   - [assoc] for [infix] table, the operator's associativity, which
--     can be "left", "right" or "flat" (default to left)
--
-- In [p], useful fields are:
-- * [transformers]: as usual
-- * [name]: as usual
-- * [primary]: the atomic expression parser, or a multisequence config 
--   table (mandatory)
-- * [prefix]:  prefix  operators config table, see above.
-- * [infix]:   infix   operators config table, see above.
-- * [suffix]: suffix operators config table, see above.
--
-- After creation, these fields are added:
-- * [kind] == "expr"
-- * [parse] as usual
-- * each table is turned into a multisequence, and therefore has an 
--   [add] method
--
-------------------------------------------------------------------------------
function expr (p)
   make_parser ("expr", p)

   -------------------------------------------------------------------
   -- parser method.
   -- In addition to the lexer, it takes an optional precedence:
   -- it won't read expressions whose precedence is lower or equal
   -- to [prec].
   -------------------------------------------------------------------
   function p:parse (lx, prec)
      if not prec then prec = 0 end

      ------------------------------------------------------
      -- Extract the right parser and the corresponding
      -- options table, for (pre|in|post)fix operators.
      -- Options include prec, assoc, transformers.
      ------------------------------------------------------
      local function get_parser_info (tab)
         local p2 = tab:get (lx:is_keyword (lx:peek()))
         if p2 then -- keyword-based sequence found
            local function parser(lx) return raw_parse_sequence(lx, p2) end
            return parser, p2
         else -- Got to use the default parser
            local d = tab.default
            if d then return d.parse or d.parser, d
            else return false, false end
         end
      end

      ------------------------------------------------------
      -- Look for a prefix sequence. Multiple prefixes are
      -- handled through the recursive [p.parse] call.
      -- Notice the double-transform: one for the primary
      -- expr, and one for the one with the prefix op.
      ------------------------------------------------------
      local function handle_prefix ()
         local p2_func, p2 = get_parser_info (self.prefix)
         local op = p2_func and p2_func(lx)
         if op then -- Keyword-based sequence found
            local e = p2.builder (op, self:parse (lx, p2.prec))
            return transform (transform (e, p2), self)
         else -- No prefix found, get a primary expression
            return transform (self.primary (lx), self)
         end
      end --</expr.parse.handle_prefix>

      ------------------------------------------------------
      -- Look for an infix sequence+right-hand-side operand.
      -- Return the whole binary expression result,
      -- or false if no operator was found.
      ------------------------------------------------------
      local function handle_infix (e)
         local p2_func, p2 = get_parser_info (self.infix)
         if not p2 then return false end
   
         -----------------------------------------
         -- Handle flattening operators: gather all operands
         -- of the series in [list]; when a different operator 
         -- is found, stop, build from [list], [transform] and
         -- return.
         -----------------------------------------
         if (not p2.prec or p2.prec>prec) and p2.assoc=="flat" then
            local pflat, list = p2, { }
            repeat
               local op = p2_func(lx)
               if not op then break end
               table.insert (list, self:parse (lx, p2.prec))
               p2 = get_parser_info (self.infix)
            until p2 ~= pflat
            return transform (transform (p2.builder (list), p2), self)
 
         -----------------------------------------
         -- Handle regular infix operators: [e] the LHS is known,
         -- just gather the operator and [e2] the RHS.
         -----------------------------------------
         elseif not p2.prec or p2.prec>prec or 
                p2.prec==prec and p2.assoc=="right" then
            local op = p2_func(lx)
            if not op then return false end
            local e2 = self:parse (lx, p2.prec)
            return transform (transform (p2.builder (e, op, e2), p2), self)

         -----------------------------------------
         -- Check for non-associative operators, and complain if applicable. 
         -----------------------------------------
         elseif p2.assoc=="none" and p2.prec==prec then
            parser_error (lx, "non-associative operator!")

         -----------------------------------------
         -- No infix operator suitable at that precedence
         -----------------------------------------
         else return false end

      end --</expr.parse.handle_infix>

      ------------------------------------------------------
      -- Look for a suffix sequence.
      -- Return the result of suffix operator on [e],
      -- or false if no operator was found.
      ------------------------------------------------------
      local function handle_suffix (e)
         local p2_func, p2 = get_parser_info (self.suffix)
         if not p2 then return false end
         if not p2.prec or p2.prec>=prec then
            local op = p2_func(lx)
            if not op then return false end
            e = p2.builder (e, op)
            return transform (transform (e, p2), self)
         end
         return false
      end --</expr.parse.handle_suffix>

      ------------------------------------------------------
      -- Parser body: read suffix and (infix+operand) 
      -- extensions as long as we're able to fetch more at
      -- this precedence level.
      ------------------------------------------------------
      local e = handle_prefix()
      repeat
         local x = handle_suffix (e); e = x or e
         local y = handle_infix   (e); e = y or e
      until not (x or y)

      -- No transform: it already happened in operators handling
      return e
   end --</expr.parse>

   -------------------------------------------------------------------
   -- Construction
   -------------------------------------------------------------------
   if not p.primary then p.primary=p[1]; p[1]=nil end
   for _, t in ipairs{ "primary", "prefix", "infix", "suffix" } do
      if not p[t] then p[t] = { } end
      if not is_parser(p[t]) then multisequence(p[t]) end
   end
   function p:add(...) return self.primary:add(...) end
   return p
end --</expr>


-------------------------------------------------------------------------------
--
-- List parser generator
--
-------------------------------------------------------------------------------
-- In [p], the following fields can be provided in input:
--
-- * [builder]: takes list of subparser results, returns AST
-- * [transformers]: as usual
-- * [name]: as usual
--
-- * [terminators]: list of strings representing the keywords which
--   might mark the end of the list. When non-empty, the list is
--   allowed to be empty. A string is treated as a single-element
--   table, whose element is that string, e.g. ["do"] is the same as
--   [{"do"}].
--
-- * [separators]: list of strings representing the keywords which can
--   separate elements of the list. When non-empty, one of these
--   keyword has to be found between each element. Lack of a separator
--   indicates the end of the list. A string is treated as a
--   single-element table, whose element is that string, e.g. ["do"]
--   is the same as [{"do"}]. If [terminators] is empty/nil, then
--   [separators] has to be non-empty.
--
-- After creation, the following fields are added:
-- * [parse] the parsing function lexer->AST
-- * [kind] == "list"
--
-------------------------------------------------------------------------------
function list (p)
   make_parser ("list", p)

   -------------------------------------------------------------------
   -- Parsing method
   -------------------------------------------------------------------
   function p:parse (lx)

      ------------------------------------------------------
      -- Used to quickly check whether there's a terminator 
      -- or a separator immediately ahead
      ------------------------------------------------------
      local function peek_is_in (keywords) 
         return keywords and lx:is_keyword(lx:peek(), unpack(keywords)) end

      local x = { }

      -- if there's a terminator to start with, don't bother trying
      if not peek_is_in (self.terminators) then 
         repeat table.insert (x, self.primary (lx)) -- read one element
         until
            -- First reason to stop: There's a separator list specified,
            -- and next token isn't one. Otherwise, consume it with [lx:next()]
            self.separators and not(peek_is_in (self.separators) and lx:next()) or
            -- Other reason to stop: terminator token ahead
            peek_is_in (self.terminators) or
            -- Last reason: end of file reached
            lx:peek().tag=="Eof"
      end

      -- Apply the builder. It can be a string, or a callable value, 
      -- or simply nothing.
      local b = self.builder
      if b then
         if type(b)=="string" then x.tag = b -- b is a string, use it as a tag
         elseif type(b)=="function" then x=b(x)
         else
            local bmt = getmetatable(b)
            if bmt and bmt.__call then x=b(x) end
         end
      end
      return transform (x, self)
   end --</list.parse>

   -------------------------------------------------------------------
   -- Construction
   -------------------------------------------------------------------
   if not p.primary then p.primary = p[1]; p[1] = nil end
   if type(p.terminators) == "string" then p.terminators = { p.terminators }
   elseif p.terminators and #p.terminators == 0 then p.terminators = nil end
   if type(p.separators) == "string" then p.separators = { p.separators }
   elseif p.separators and #p.separators == 0 then p.separators = nil end

   return p
end --</list>


-------------------------------------------------------------------------------
--
-- Keyword-conditionned parser generator
--
-------------------------------------------------------------------------------
-- 
-- Only apply a parser if a given keyword is found. The result of
-- [gg.onkeyword] parser is the result of the subparser (modulo
-- [transformers] applications).
--
-- Input fields:
--
-- * [name]: as usual
--
-- * [transformers]: as usual
--
-- * [peek]: if non-nil, the conditionning keyword is left in the lexeme
--   stream instead of being consumed.
--
-- * [primary]: the subparser. 
--
-- * [keywords]: list of strings representing triggering keywords.
--
-- * Table-part entries can contain strings, and/or exactly one parser.
--   Strings are put in [keywords], and the parser is put in [primary].
--
-- After the call, the following fields will be set:
--   
-- * [parse] the parsing method
-- * [kind] == "onkeyword"
-- * [primary]
-- * [keywords]
--
-------------------------------------------------------------------------------
function onkeyword (p)
   make_parser ("onkeyword", p)

   -------------------------------------------------------------------
   -- Parsing method
   -------------------------------------------------------------------
   function p:parse(lx)
      if lx:is_keyword (lx:peek(), unpack(self.keywords)) then
         if not self.peek then lx:next() end
         return transform (self.primary(lx), p)
      else return false end
   end

   -------------------------------------------------------------------
   -- Construction
   -------------------------------------------------------------------
   if not p.keywords then p.keywords = { } end
   for _, x in ipairs(p) do
      if type(x)=="string" then table.insert (p.keywords, x)
      else assert (not p.primary and is_parser (x)); p.primary = x end
   end
   assert (p.primary)
   return p
end --</onkeyword>


-------------------------------------------------------------------------------
--
-- Optional keyword consummer pseudo-parser generator
--
-------------------------------------------------------------------------------
--
-- This doesn't return a real parser, just a function. That function parses
-- one of the keywords passed as parameters, and returns it. It returns 
-- [false] if no matching keyword is found.
--
-------------------------------------------------------------------------------
function optkeyword (...)
   local args = {...}
   if type (args[1]) == "table" then 
      assert (#args == 1)
      args = args[1]
   end
   for _, v in ipairs(args) do assert (type(v)=="string") end
   return function (lx)
      local x = lx:is_keyword (lx:peek(), unpack (args))
      if x then lx:next(); return x
      else return false end
   end
end