The document discusses using the Paco parsing library in Elixir. It shows how to define parsers for literals, sequences, repetitions, alternatives and more. It also demonstrates parsing expressions, references to elements with quantities, and converting terms to parsers via a Parsable protocol.
25. # It's common to have something non significant
# that follows or precedes something significant
!
# In module Paco.Parser...
!
parser followed_by(p, right),
as: sequence_of([p, skip(right)])
!
parser preceded_by(p, right),
as: sequence_of([skip(left), p])
26. # An alternative and shorter version
!
what = while(ASCII.letter, at_least: 1)
!
greetings = what
|> preceded_by(lit("Hello")
|> followed_by(lex(",")))
|> followed_by(maybe(lex("!")))
!
!
parse("Hello, BEAM!", greetings) |> IO.inspect
# {:ok, ["BEAM"]}
!
parse("Hello, BEAM", greetings) |> IO.inspect
# {:ok, ["BEAM"]}
27. # Parse a sequence of numbers separated by `+` or `-`
!
number = while(ASCII.digit, at_least: 1)
operator = one_of([lex("+"), lex("-")])
!
expression = number |> separated_by(operator)
!
parse("1", expression) |> IO.inspect
# {:ok, ["1"]}
!
parse("1 + 2", expression) |> IO.inspect
# {:ok, ["1", "2"]}
!
parse("1 + 2 - 3", expression) |> IO.inspect
# {:ok, ["1", "2", "3"]}
!
# Small problem... to compute the value we need the operators!
28. # Parse a sequence of numbers separated by `+` or `-`
!
number = while(ASCII.digit, at_least: 1)
operator = one_of([lex("+"), lex("-")])
!
expression = number |> separated_by(keep(operator))
!
parse("1", expression) |> IO.inspect
# {:ok, ["1"]}
!
parse("1 + 2", expression) |> IO.inspect
# {:ok, ["1", "+", "2"]}
!
parse("1 + 2 - 3", expression) |> IO.inspect
# {:ok, ["1", "+", "2", "-", "3"]}
!
# Ok, but we need numbers not strings
29. # In module Paco.Parser...
parser bind(p, f) do
fn state, _ ->
case p.parse.(state, p) do
%Success{result: result} = success ->
case f.(result, success) do
%Failure{} = failure ->
failure
%Success{} = success ->
success
result ->
%Success{success|result: result}
end
%Failure{} = failure ->
failure
end
end
end
30. # Parse a sequence of numbers separated by `+` or `-`
!
number = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
!
operator = one_of([lex("+"), lex("-")])
!
expression = number |> separated_by(keep(operator))
!
parse("1 + 2 - 3", expression) |> IO.inspect
# {:ok, [1, "+", 2, "-", 3]}
!
# Missing only the last step... compute the result :-)
31. # Parse a sequence of numbers separated by `+` or `-`
!
number = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
!
operator = one_of([lex("+"), lex("-")])
!
expression = number
|> separated_by(keep(operator))
|> bind(&Paco.Transform.separated_by(&1,
fn("+", n, m) -> n + m
("-", n, m) -> n - m
end))
!
!
parse("1 + 2 - 3", expression) |> IO.inspect
# {:ok, 0]}
32. # Parse a{n}b{n}c{n} where n ∈ ℕ
!
# If you knew the `n` (ex. 3) it would be easy
!
p = sequence_of([while("a", 3), while("b", 3), while("c", 3)])
!
parse("aaabbbccc", p) |> IO.inspect
# {:ok, ["aaa", "bbb", "ccc"]}
!
# We need to be able to peek ahead and then create a parser
# with that knowledge
33. # In module Paco.Parser...
!
parser peek(box(p)) do
fn %State{at: at, text: text} = state, _ ->
case p.parse.(state, p) do
%Success{result: result} ->
%Success{from: at, to: at, at: at,
tail: text,
result: result}
%Failure{} = failure ->
failure
end
end
end
35. # Parse a{n}b{n}c{n} where n ∈ ℕ
!
p = peek(while("a"))
|> then(fn(a) ->
len = String.length(a)
sequence_of([while("a", len),
while("b", len),
while("c", len)])
end)
!
parse("aaabbbccc", p) |> IO.inspect
# {:ok, ["aaa", "bbb", "ccc"]}
!
parse("aaabbccc", p) |> IO.inspect
# {:error,
"expected exactly 3 characters in alphabet "b" at 1:4
but got "bbc""}
36. # An `element` is a word beginning with one uppercase letter
# followed by zero or more lowercase letters
element = sequence_of([while(ASCII.upper, 1),
while(ASCII.lower)])
!
# A `quantity` is a number greater than zero
# If the quantity is omitted assume the value of 1 as default
quantity = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
|> maybe(default: 1)
!
# A `reference` is an element optionally followed by a quantity
reference = sequence_of([element, quantity])
!
formula = repeat(reference, at_least: 1)
37. parse("H2O", formula) |> IO.inspect
# {:ok, [[["H", ""], 2], [["O", ""], 1]]}
!
# That's right but the output format sucks!
!
# What we really want is something like
# {:ok, [%{element: "H", quantity: 2},
%{element: "0", quantity: 1}]
!
# Is that possible???
38. defprotocol Paco.Parsable do
@moduledoc """
A protocol that converts terms into Paco parsers
"""
@fallback_to_any true
@doc """
Returns a parser that parses `t` and keeps the shape of `t`
"""
@spec to_parser(t) :: Paco.Parser.t
def to_parser(t)
end
39. defimpl Paco.Parsable, for: BitString do
import Paco.Parser
def to_parser(s) when is_binary(s) do
lit(s)
end
def to_parser(s) do
raise Protocol.UndefinedError, protocol: @protocol, value: s
end
end
iex(1)> "aaa" |> parse(lit("aaa"))
{:ok, "aaa"}
iex(2)> "aaa" |> parse("aaa")
{:ok, "aaa"}
40. defimpl Paco.Parsable, for: List do
import Paco.Parser
def to_parser(l) do
sequence_of(l)
end
end
iex(1)> "ab" |> parse(sequence_of([lit("a"), lit("b")]))
{:ok, ["a", "b"]}
iex(2)> "ab" |> parse(sequence_of(["a", "b"]))
{:ok, ["a", "b"]}
iex(3)> "ab" |> parse(["a", "b"])
{:ok, ["a", "b"]}
41. defimpl Paco.Parsable, for: Tuple do
import Paco.Parser
def to_parser(tuple) do
sequence_of(Tuple.to_list(tuple))
|> bind(&List.to_tuple/1)
end
end
iex(1)> "ab" |> parse({"a", "b"}))
{:ok, {"a", "b"}}
42. defimpl Paco.Parsable, for: Map do
import Paco.Parser
def to_parser(tuple) do
{keys, values} = {Map.keys(map), Map.values(map)}
sequence_of(values)
|> bind(&(Enum.zip(keys, &1) |> Enum.into(Map.new)))
end
end
iex(1)> "ab" |> parse(%{first: "a", last: "b"}))
{:ok, %{first: "a", last: "b"}}
45. element = [while(ASCII.upper, 1), while(ASCII.lower)]
|> join
!
# Bub a `quantity` is a number greater than zero!
quantity = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
|> maybe(default: 1)
!
reference = %{element: element, quantity: quantity}
!
formula = repeat(reference, at_least: 1)
!
parse("Na0", formula) |> IO.inspect
# {:ok, [%{element: "Na", quantity: 0}]}
!
# Ouch...
46. # ...
# A `quantity` is a number greater than zero
quantity = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
|> maybe(default: 1)
|> only_if(&(&1 > 0))
!
# ...
!
parse("Na0", formula) |> IO.inspect
# {:error, "0 is not acceptable at 1:3"}
47. # ...
# A `quantity` is a number greater than zero
!
error_message = "quantity must be greather than 0 %AT%"
!
greater_than_zero = &{&1 > 0, error_message}
!
quantity = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
|> maybe(default: 1)
|> only_if(greater_than_zero))
!
# ...
!
parse("Na0", formula) |> IO.inspect
# {:error, "quantity must be greather than 0 at 1:3"}
48. # Parse something like `(1, (2, 3))`
!
number = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
!
# We need to name something that is not yet defined,
# actually we need to name something in its definition
!
list = one_of([number, ???])
|> separated_by(ASCII.comma)
|> surrounded_by(ASCII.round_brackets)
49. # In module Paco.Parser...
!
parser recursive(f) do
fn state, this ->
box(f.(this)).parse.(state, this)
end
end
51. defmodule ListOfLists do
use Paco
alias Paco.ASCII
!
parser number do
while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
end
!
parser list do
one_of([number, list])
|> separated_by(ASCII.comma)
|> surrounded_by(ASCII.round_brackets)
end
end
!
Paco.parse("1", ListOfLists.number) |> IO.inspect
# {:ok, 1}
52. # In module Paco...
!
defmacro __using__(_) do
quote do
import Paco.Macro.ParserModuleDefinition
import Paco.Parser
!
Module.register_attribute(__MODULE__,
:paco_parsers,
accumulate: true)
!
@before_compile Paco
end
end
53. # In module Paco...
!
defmacro __before_compile__(env) do
root_parser = pick_root_parser_between(
Module.get_attribute(env.module, :paco_parsers)
|> Enum.reverse
)
!
quote do
def parse(s, opts []) do
Paco.parse(s, apply(__MODULE__, unquote(root_parser), []), opts)
end
!
def parse!(s, opts []) do
Paco.parse!(s, apply(__MODULE__, unquote(root_parser), []), opts)
end
end
end
54. # Everything we saw until now works with streams of text!
!
["a", "b", "", "ab", "", "a", "", "", "b", "", ""]
|> Paco.Stream.parse(lit("ab"))
|> Enum.to_list
|> IO.inspect
# ["ab", "ab", "ab"]
!
[~s|{"foo|, ~s|": "bar"|, ~s|}[1, 2|, ~s|, 3]|]
|> Paco.Parser.JSON.stream
|> Enum.to_list
|> IO.inspect
# [%{"foo" => "bar"}, [1, 2, 3]]
55. parser lit(s) do
fn %State{at: from, text: text, stream: stream} = state, this ->
case Paco.String.consume(text, s, from) do
{tail, _, to, at} ->
%Success{from: from, to: to, at: at, tail: tail, result: s}
!
{:not_enough, _, _, _, _} when is_pid(stream) ->
wait_for_more_and_continue(state, this)
{_, _, _, _, {n, _, _}} ->
%Failure{at: from, tail: text, expected: s, rank: n+1}
end
end
end
!
56. defp wait_for_more_and_continue(state, this) do
%State{text: text, stream: stream} = state
send(stream, {self, :more})
receive do
{:load, more_text} ->
this.parse.(%State{state|text: text <> more_text}, this)
:halted ->
# The stream is over, switching to a non stream mode
# is the same as to tell the parser to behave knowing
# that more input will never come
this.parse.(%State{state|stream: nil}, this)
end
end
57.
58. defmodule Paco.Parser.JSON do
alias Paco.ASCII
use Paco
!
root parser all, do: one_of([object, array])
!
parser object do
pair(string, value, separated_by: ASCII.colon)
|> separated_by(ASCII.comma)
|> surrounded_by(ASCII.curly_brackets)
|> bind(&to_map/1)
end
!
parser array do
value
|> separated_by(ASCII.comma)
|> surrounded_by(ASCII.square_brackets)
end
# ...
59. defmodule Paco.Parser.JSON do
# ...
!
parser value do
one_of([
string,
number,
object,
array,
literal_true,
literal_false,
literal_null])
end
!
parser string do
between(ASCII.double_quotes, escaped_with: "", strip: false)
|> bind(&replace_escapes_in_string/1)
end
# ...