# encoding: utf-8
require 'set'
module Piglet
class Interpreter
def initialize(&block)
@top_level_statements = [ ]
interpret(&block) if block_given?
end
def interpret(&block)
if block_given?
instance_eval(&block)
end
self
end
def to_pig_latin(&block)
interpret(&block) if block_given?
return '' if @top_level_statements.empty?
handled_relations = Set.new
statements = [ ]
@top_level_statements.each do |top_level_statement|
if top_level_statement.respond_to?(:relation) && ! top_level_statement.relation.nil?
assignments(top_level_statement.relation, handled_relations).each do |assignment|
statements << assignment
end
end
statements << top_level_statement
end
statements.flatten.map { |s| s.to_s }.join(";\n") + ";\n"
end
def next_relation_alias
@counter ||= 0
@counter += 1
"relation_#{@counter}"
end
protected
# LOAD
#
# load('some/path') # => LOAD 'some/path'
# load('some/path', :using => 'Xyz') # => LOAD 'some/path' USING Xyz
# load('some/path', :using => :pig_storage) # => LOAD 'some/path' USING PigStorage
# load('some/path', :schema => [:a, :b]) # => LOAD 'some/path' AS (a, b)
# load('some/path', :schema => %w(a b c d)) # => LOAD 'some/path' AS (a, b, c, d)
# load('some/path', :schema => [%w(a chararray), %(b int)]) # => LOAD 'some/path' AS (a:chararray, b:int)
#
#--
#
# NOTE: the syntax load('path', :schema => {:a => :chararray, :b => :int})
# would be nice, but the order of the keys can't be guaranteed in Ruby 1.8.
def load(path, options={})
Inout::Load.new(path, self, options)
end
# STORE
#
# store(x, 'some/path') # => STORE x INTO 'some/path'
# store(x, 'some/path', :using => 'Xyz') # => STORE x INTO 'some/path' USING Xyz
# store(x, 'some/path', :using => :pig_storage) # => STORE x INTO 'some/path' USING PigStorage
def store(relation, path, options={})
@top_level_statements << Inout::Store.new(relation, path, options)
end
# DUMP
#
# dump(x) # => DUMP x
def dump(relation)
@top_level_statements << Inout::Dump.new(relation)
end
# ILLUSTRATE
#
# illustrate(x) # => ILLUSTRATE x
def illustrate(relation)
@top_level_statements << Inout::Illustrate.new(relation)
end
# DESCRIBE
#
# describe(x) # => DESCRIBE x
def describe(relation)
@top_level_statements << Inout::Describe.new(relation)
end
# EXPLAIN
#
# explain # => EXPLAIN
# explain(x) # => EXPLAIN(x)
def explain(relation=nil)
@top_level_statements << Inout::Explain.new(relation)
end
# REGISTER
#
# register 'path/to/lib.jar' # => REGISTER path/to/lib.jar
def register(path)
@top_level_statements << Udf::Register.new(path)
end
# DEFINE
#
# define('test', :function => 'com.example.Test') # => DEFINE test com.example.Test
# define('test', :command => 'test.rb') # => DEFINE test `test.rb`
# define('test', :command => 'test.rb', :input => :stdin) # => DEFINE test `test.rb` INPUT(stdin)
# define('test', :command => 'test.rb', :input => 'path/x') # => DEFINE test `test.rb` INPUT('path/x')
# define('test', :command => 'test.rb', :output => :stdout) # => DEFINE test `test.rb` OUTPUT(stdout)
# define('test', :command => 'test.rb', :ship => 'a/b/c') # => DEFINE test `test.rb` SHIP('a/b/c')
# define('test', :command => 'test.rb', :cache => ['x', 'y']) # => DEFINE test `test.rb` CACHE('x', 'y')
#
# The :input
and :output
options can take pretty
# complicated definitions in addition to the examples above:
#
# :input => {:from => :stdin, :using => :pig_storage} # => INPUT(stdin USING PigStorage)
# :output => {:to => :stdout, :using => 'MySerializer'} # => OUTPUT(stdout USING MySerializer)
# :output => [{:to => :stdout, :using => 'MySerializer'}, 'some/path'] # => OUTPUT(stdout USING MySerializer, 'some/path')
def define(ali4s, options=nil)
@top_level_statements << Udf::Define.new(ali4s, options)
unless respond_to?(ali4s)
def metaclass
class << self
return self
end
end
metaclass.send(:define_method, ali4s) do |*args|
Field::UdfExpression.new(ali4s, *args)
end
end
end
# %declare
#
# declare(:my_var, 'value') # => %declare my_var 'value'
# declare('quote', "He said 'hello!'") # => %declare quote 'He said \'hello!\''
# declare('cmd', 'uniq', :backticks => true) # => %declare cmd `uniq`
def declare(name, value, options=nil)
@top_level_statements << Param::Declare.new(name, value, options)
end
# %default
#
# default(:my_var, 'value') # => %default my_var 'value'
# default('quote', "He said 'hello!'") # => %default quote 'He said \'hello!\''
# default('cmd', 'uniq', :backticks => true) # => %default cmd `uniq`
def default(name, value, options=nil)
@top_level_statements << Param::Default.new(name, value, options)
end
private
def assignments(relation, ignore_set)
return [] if ignore_set.include?(relation)
assignment = Assignment.new(relation)
ignore_set << relation
if relation.sources
(relation.sources.map { |source| assignments(source, ignore_set) } + [assignment]).flatten
else
[assignment]
end
end
end
private
class Assignment # :nodoc:
attr_reader :target
def initialize(relation)
@target = relation
end
def to_s
"#{@target.alias} = #{@target.to_s}"
end
end
end