examples/NLP/pico_en_demo.rb in rley-0.5.14 vs examples/NLP/pico_en_demo.rb in rley-0.6.00
- old
+ new
@@ -1,45 +1,46 @@
require 'rley' # Load Rley library
+
########################################
-# Step 1. Define a grammar for a pico English-like language
+# Step 1. Creating facade object of Rley library
+# It provides a unified, higher-level interface
+engine = Rley::Engine.new
+
+########################################
+# Step 2. Define a grammar for a pico English-like language
# based on example from NLTK book (chapter 8 of the book).
-# Bird, Steven, Edward Loper and Ewan Klein: "Natural Language Processing
+# Bird, Steven, Edward Loper and Ewan Klein: "Natural Language Processing
# with Python"; 2009, O’Reilly Media Inc., ISBN 978-0596516499
-# It defines the syntax of a sentence in a mini English-like language
+# It defines the syntax of a sentence in a mini English-like language
# with a very simplified syntax and vocabulary
-
-# Instantiate a builder object that will build the grammar for us
-builder = Rley::Syntax::GrammarBuilder.new do
- # Next 2 lines we define the terminal symbols
+engine.build_grammar do
+ # Next 2 lines we define the terminal symbols
# (= word categories in the lexicon)
- add_terminals('Noun', 'Proper-Noun', 'Verb')
+ add_terminals('Noun', 'Proper-Noun', 'Verb')
add_terminals('Determiner', 'Preposition')
# Here we define the productions (= grammar rules)
rule 'S' => %w[NP VP]
rule 'NP' => 'Proper-Noun'
rule 'NP' => %w[Determiner Noun]
rule 'NP' => %w[Determiner Noun PP]
rule 'VP' => %w[Verb NP]
rule 'VP' => %w[Verb NP PP]
rule 'PP' => %w[Preposition NP]
-end
+end
-# And now, let's build the grammar...
-grammar = builder.grammar
-
########################################
-# Step 2. Creating a lexicon
+# Step 3. Creating a lexicon
# To simplify things, lexicon is implemented as a Hash with pairs of the form:
# word => terminal symbol name
Lexicon = {
'man' => 'Noun',
'dog' => 'Noun',
'cat' => 'Noun',
'telescope' => 'Noun',
- 'park' => 'Noun',
+ 'park' => 'Noun',
'saw' => 'Verb',
'ate' => 'Verb',
'walked' => 'Verb',
'John' => 'Proper-Noun',
'Mary' => 'Proper-Noun',
@@ -53,56 +54,52 @@
'by' => 'Preposition',
'with' => 'Preposition'
}.freeze
########################################
-# Step 3. Creating a tokenizer
-# A tokenizer reads the input string and converts it into a sequence of tokens
-# Highly simplified tokenizer implementation.
-def tokenizer(aTextToParse, aGrammar)
+# Step 4. Create a tokenizer
+# A tokenizer reads the input string and converts it into a sequence of tokens.
+# Remark: Rley doesn't provide tokenizer functionality.
+# Highly simplified tokenizer implementation
+def tokenizer(aTextToParse)
tokens = aTextToParse.scan(/\S+/).map do |word|
term_name = Lexicon[word]
raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
- terminal = aGrammar.name2symbol[term_name]
- Rley::Lexical::Token.new(word, terminal)
+ Rley::Lexical::Token.new(word, term_name)
end
-
+
return tokens
end
-########################################
-# Step 4. Create a parser for that grammar
-# Easy with Rley...
-parser = Rley::Parser::GFGEarleyParser.new(grammar)
########################################
-# Step 5. Parsing the input
+# Step 5. Parse the input
input_to_parse = 'John saw Mary with a telescope'
# input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
# Convert input text into a sequence of token objects...
-tokens = tokenizer(input_to_parse, grammar)
-result = parser.parse(tokens)
+tokens = tokenizer(input_to_parse)
+result = engine.parse(tokens)
puts "Parsing successful? #{result.success?}"
unless result.success?
puts result.failure_reason.message
exit(1)
end
########################################
# Step 6. Generating a parse tree from parse result
-ptree = result.parse_tree
+ptree = engine.to_ptree(result)
# Let's create a parse tree visitor
-visitor = Rley::ParseTreeVisitor.new(ptree)
+visitor = engine.ptree_visitor(ptree)
# Let's create a formatter (i.e. visit event listener)
# renderer = Rley::Formatter::Debug.new($stdout)
# Let's create a formatter that will render the parse tree with characters
renderer = Rley::Formatter::Asciitree.new($stdout)
-# Let's create a formatter that will render the parse tree in labelled
+# Let's create a formatter that will render the parse tree in labelled
# bracket notation
# renderer = Rley::Formatter::BracketNotation.new($stdout)
# Subscribe the formatter to the visitor's event and launch the visit
renderer.render(visitor)