require 'rley' # Load Rley library
require 'strscan'


########################################
# Step 1. Creating facade object of Rley library
# It provides a unified, higher-level interface
engine = Rley::Engine.new

########################################
# Step 2. Define a grammar for a pico English-like language
# based on example from NLTK book (chapter 8 of the book).
# Bird, Steven, Edward Loper and Ewan Klein: "Natural Language Processing
# with Python"; 2009, O’Reilly Media Inc., ISBN 978-0596516499
# It defines the syntax of a sentence in a mini English-like language
# with a very simplified syntax and vocabulary
engine.build_grammar do
  # Next 2 lines we define the terminal symbols
  # (= word categories in the lexicon)
  add_terminals('Noun', 'Proper-Noun', 'Verb')
  add_terminals('Determiner', 'Preposition')

  # Here we define the productions (= grammar rules)
  rule 'S' => 'NP VP'
  rule 'NP' => 'Proper-Noun'
  rule 'NP' => 'Determiner Noun'
  rule 'NP' => 'Determiner Noun PP'
  rule 'VP' => 'Verb NP'
  rule 'VP' => 'Verb NP PP'
  rule 'PP' => 'Preposition NP'
end

########################################
# Step 3. Creating a lexicon
# To simplify things, lexicon is implemented as a Hash with pairs of the form:
# word => terminal symbol name
Lexicon = {
  'man' => 'Noun',
  'dog' => 'Noun',
  'cat' => 'Noun',
  'telescope' => 'Noun',
  'park' => 'Noun',
  'saw' => 'Verb',
  'ate' => 'Verb',
  'walked' => 'Verb',
  'John' => 'Proper-Noun',
  'Mary' => 'Proper-Noun',
  'Bob' => 'Proper-Noun',
  'a' => 'Determiner',
  'an' => 'Determiner',
  'the' => 'Determiner',
  'my' => 'Determiner',
  'in' => 'Preposition',
  'on' => 'Preposition',
  'by' => 'Preposition',
  'with' => 'Preposition'
}.freeze

Position = Struct.new(:line, :column) do
  def to_s()
    "line #{line}, column #{column}"
  end
end

class NLPToken < Rley::Lexical::Token
  attr_reader(:position)
  
  def initialize(theLexeme, aTerminal, aPosition)
    super(theLexeme, aTerminal)
    @position = aPosition
  end
end

########################################
# Step 4. Create a tokenizer
# A tokenizer reads the input string and converts it into a sequence of tokens.
# Remark: Rley doesn't provide tokenizer functionality.
# Highly simplified tokenizer implementation
def tokenizer(aTextToParse)
  scanner = StringScanner.new(aTextToParse)
  tokens = []
  
  loop do
    scanner.skip(/\s+/)
    curr_pos = scanner.pos
    word = scanner.scan(/\S+/)
    break unless word

    term_name = Lexicon[word]
    raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
    pos = Position.new(1, curr_pos + 1)
    tokens << NLPToken.new(word, term_name, pos)
  end

  return tokens
end


########################################
# Step 5. Parse the input
input_to_parse = 'John saw Mary with a telescope'
# input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
# Convert input text into a sequence of token objects...
tokens = tokenizer(input_to_parse)
result = engine.parse(tokens)

puts "Parsing successful? #{result.success?}"
unless result.success?
  puts result.failure_reason.message
  exit(1)
end
 
########################################
# Step 6. Generating a parse tree from parse result
ptree = engine.to_ptree(result)

# Let's create a parse tree visitor
visitor = engine.ptree_visitor(ptree)

# Let's create a formatter (i.e. visit event listener)
# renderer = Rley::Formatter::Debug.new($stdout)

# Let's create a formatter that will render the parse tree with characters
renderer = Rley::Formatter::Asciitree.new($stdout)

# Let's create a formatter that will render the parse tree in labelled
# bracket notation
# renderer = Rley::Formatter::BracketNotation.new($stdout)

# Subscribe the formatter to the visitor's event and launch the visit
renderer.render(visitor)
# End of file