spec/piglet_spec.rb in piglet-0.2.5 vs spec/piglet_spec.rb in piglet-0.3.0
- old
+ new
@@ -241,65 +241,110 @@
end
end
describe 'FOREACH … GENERATE' do
it 'outputs a FOREACH … GENERATE statement' do
- @interpreter.interpret { dump(load('in').foreach { |r| :a }) }
+ @interpreter.interpret { dump(load('in').foreach { :a }) }
@interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a/)
end
it 'outputs a FOREACH … GENERATE statement with a list of fields' do
- @interpreter.interpret { dump(load('in').foreach { |r| [:a, :b, :c] }) }
+ @interpreter.interpret { dump(load('in').foreach { [:a, :b, :c] }) }
@interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a, b, c/)
end
it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation' do
- @interpreter.interpret { dump(load('in').foreach { |r| [r.a, r.b, r.c] }) }
+ @interpreter.interpret { dump(load('in').foreach { [a, b, c] }) }
@interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a, b, c/)
end
it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation with positional syntax' do
- @interpreter.interpret { dump(load('in').foreach { |r| [r[0], r[1], r[2]] }) }
+ @interpreter.interpret { dump(load('in').foreach { [self[0], self[1], self[2]] }) }
@interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE \$0, \$1, \$2/)
end
it 'outputs a FOREACH … GENERATE statement with aggregate functions applied to the fields' do
- @interpreter.interpret { dump(load('in').foreach { |r| [r.a.max, r.b.min, r.c.avg] }) }
+ @interpreter.interpret { dump(load('in').foreach { [a.max, b.min, c.avg] }) }
@interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE MAX\(a\), MIN\(b\), AVG\(c\)/)
end
it 'outputs a FOREACH … GENERATE statement with fields that access inner fields' do
- @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b, r.b.c, r.c.d] }) }
+ @interpreter.interpret { dump(load('in').foreach { [a.b, b.c, c.d] }) }
@interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b, b.c, c.d/)
end
it 'outputs a FOREACH … GENERATE statement that includes field aliasing' do
- @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b.as(:c), r.a.b.as(:d)] }) }
+ @interpreter.interpret { dump(load('in').foreach { [a.b.as(:c), a.b.as(:d)] }) }
@interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b AS c, a.b AS d/)
end
end
+
+ describe 'FOREACH ... { ... GENERATE }' do
+ it 'outputs a FOREACH ... { ... GENERATE } statement for named fields' do
+ @interpreter.interpret { dump(load('in').nested_foreach { [a, b, c] }) }
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = a;\s+(\w+) = b;\s+(\w+) = c;\s+GENERATE \1, \2, \3;\s+\}/m)
+ end
+
+ it 'outputs a FOREACH ... { ... GENERATE } statement for positional fields' do
+ @interpreter.interpret { dump(load('in').nested_foreach { [self[0], self[1], self[2]] }) }
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = \$0\;\s+(\w+) = \$1\;\s+(\w+) = \$2\;\s+GENERATE \1, \2, \3\;\s+\}/m)
+ end
+
+ it 'outputs a FOREACH ... { ... GENERATE } statement with aggregate functions applied to fields' do
+ @interpreter.interpret { dump(load('in').nested_foreach { [a.max, b.min, c.avg] }) }
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = a;\s+(\w+) = MAX\(\1\);\s+(\w+) = b;\s+(\w+) = MIN\(\3\);\s+(\w+) = c;\s+(\w+) = AVG\(\5\);\s+GENERATE \2, \4, \6;\s+\}/m)
+ end
+
+ it 'outputs a FOREACH ... { ... GENERATE } statement with fields that access inner fields' do
+ @interpreter.interpret { dump(load('in').nested_foreach { [a.b, b.c]}) }
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = a;\s+(\w+) = \1.b;\s+(\w+) = b;\s+(\w+) = \3.c;\s+GENERATE \2, \4;\s+\}/m)
+ end
+
+ it 'outputs a FOREACH ... { ... GENERATE } statement with user defined functions' do
+ @interpreter.interpret do
+ define('my_udf', :function => 'com.example.My')
+ dump(load('in').nested_foreach { [my_udf(a, 3, "hello")] })
+ end
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = a;\s+(\w+) = my_udf\(\1, 3, 'hello'\);\s+GENERATE \2;\s+\}/)
+ end
+
+ it 'outputs a FOREACH ... { ... GENERATE } statement with bag methods' do
+ @interpreter.interpret { dump(load('in').nested_foreach { [self[1].distinct.sample(0.3).limit(5).order(:x).filter { x == 5 }] }) }
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = \$1;\s+(\w+) = DISTINCT \1;\s+(\w+) = SAMPLE \2 0.3;\s+(\w+) = LIMIT \3 5;\s+(\w+) = ORDER \4 BY x;\s+(\w+) = FILTER \5 BY x == 5;\s+GENERATE \6;\s+\}/m)
+ end
+
+ it 'outputs a FOREACH ... { ... GENERATE } statement with field aliasing' do
+ @interpreter.interpret { dump(load('in').nested_foreach { a = b.distinct; [a.as(:c)] }) }
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = b;\s+(\w+) = DISTINCT \1;\s+GENERATE \2 AS c;\s+\}/)
+ end
+
+ it 'outputs a FOREACH ... { ... GENERATE } statement with flatten' do
+ @interpreter.interpret { dump(load('in').nested_foreach { [a.flatten] }) }
+ @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = a;\s+(\w+) = FLATTEN\(\1\);\s+GENERATE \2;\s+\}/m)
+ end
+ end
describe 'FILTER' do
it 'outputs a FILTER statement' do
- @interpreter.interpret { dump(load('in').filter { |r| r.a == 3 }) }
+ @interpreter.interpret { dump(load('in').filter { a == 3 }) }
@interpreter.to_pig_latin.should match(/FILTER \w+ BY a == 3/)
end
it 'outputs a FILTER statement with a complex test' do
- @interpreter.interpret { dump(load('in').filter { |r| (r.a > r.b).and(r.c.ne(3)) }) }
+ @interpreter.interpret { dump(load('in').filter { (a > b).and(c.ne(3)) }) }
@interpreter.to_pig_latin.should match(/FILTER \w+ BY \(a > b\) AND \(c != 3\)/)
end
end
describe 'SPLIT' do
it 'outputs a SPLIT statement' do
@interpreter.interpret do
- a, b = load('in').split { |r| [r.a >= 0, r.a < 0]}
+ a, b = load('in').split { [first >= 0, second < 0] }
dump(a)
dump(b)
end
- @interpreter.to_pig_latin.should match(/SPLIT \w+ INTO \w+ IF a >= 0, \w+ IF a < 0/)
+ @interpreter.to_pig_latin.should match(/SPLIT \w+ INTO \w+ IF first >= 0, \w+ IF second < 0/)
end
end
describe 'ORDER' do
it 'outputs an ORDER statement' do
@@ -515,11 +560,11 @@
it 'makes the defined UDF available as a method in the interpreter scope, so that it can be used in a FOREACH and it\'s result renamed using AS' do
output = @interpreter.to_pig_latin do
define('my_udf', :function => 'com.example.My')
a = load('in')
- b = a.foreach { |r| [my_udf('foo', 3, 'hello \'world\'', r[0]).as(:bar)]}
+ b = a.foreach { [my_udf('foo', 3, 'hello \'world\'', self[0]).as(:bar)]}
store(b, 'out')
end
output.should match(/FOREACH \w+ GENERATE my_udf\('foo', 3, 'hello \\'world\\'', \$0\) AS bar/)
end
end
@@ -566,39 +611,39 @@
end
context 'field expressions' do
it 'parenthesizes expressions with different operators' do
output = @interpreter.to_pig_latin do
- store(load('in').filter { |r| r.x.and(r.y.or(r.z)).and(r.w) }, 'out')
+ store(load('in').filter { self.x.and(self.y.or(self.z)).and(self.w) }, 'out')
end
output.should include('x AND (y OR z) AND w')
end
it 'doesn\'t parenthesizes expressions with the same operator' do
output = @interpreter.to_pig_latin do
- store(load('in').filter { |r| r.x.and(r.y.and(r.z)).and(r.w) }, 'out')
+ store(load('in').filter { self.x.and(self.y.and(self.z)).and(self.w) }, 'out')
end
output.should include('x AND y AND z AND w')
end
it 'doesn\'t parenthesize function calls' do
output = @interpreter.to_pig_latin do
- store(load('in').foreach { |r| [r.x.max + r.y.min] }, 'out')
+ store(load('in').foreach { [self.x.max + self.y.min] }, 'out')
end
output.should include('MAX(x) + MIN(y)')
end
it 'doesn\'t parenthesize a suffix expression followed by an infix expression' do
output = @interpreter.to_pig_latin do
- store(load('in').foreach { |r| [r.x.null?.or(r.y)] }, 'out')
+ store(load('in').foreach { [self.x.null?.or(self.y)] }, 'out')
end
output.should include('x is null OR y')
end
it 'parenthesizes a prefix expression followed by an infix expression' do
output = @interpreter.to_pig_latin do
- store(load('in').foreach { |r| [r.x.not.and(r.y)] }, 'out')
+ store(load('in').foreach { [self.x.not.and(self.y)] }, 'out')
end
output.should include('(NOT x) AND y')
end
end
@@ -613,19 +658,19 @@
[:impression, :int],
[:engagement, :int],
[:click_thru, :int]
])
%w(site size name).each do |dimension|
- result = sessions.group(:ad_id, dimension).foreach do |r|
+ result = sessions.group(:ad_id, dimension).foreach do
[
- r[0].ad_id.as(:ad_id),
+ self[0].ad_id.as(:ad_id),
literal(dimension).as(:dimension),
- r[0].field(dimension).as(:value),
- r[1].exposure.sum.as(:exposures),
- r[1].impression.sum.as(:impressions),
- r[1].engagement.sum.as(:engagements),
- r[1].click_thru.sum.as(:click_thrus)
+ self[0].field(dimension).as(:value),
+ self[1].exposure.sum.as(:exposures),
+ self[1].impression.sum.as(:impressions),
+ self[1].engagement.sum.as(:engagements),
+ self[1].click_thru.sum.as(:click_thrus)
]
end
store(result, "report_metrics-#{dimension}")
end
end
@@ -798,11 +843,11 @@
it 'knows the schema of a relation projection' do
schema = catch(:schema) do
@interpreter.interpret do
relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
- relation2 = relation1.foreach { |r| [r.a] }
+ relation2 = relation1.foreach { [a] }
throw :schema, relation2.schema
end
end
schema.field_names.should eql([:a])
schema.field_type(:a).should eql(:float)
@@ -810,11 +855,11 @@
it 'knows the schema of a relation projection containing a call to MAX' do
schema = catch(:schema) do
@interpreter.interpret do
relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
- relation2 = relation1.foreach { |r| [r.a.max] }
+ relation2 = relation1.foreach { [a.max] }
throw :schema, relation2.schema
end
end
schema.field_names.should eql([nil])
schema.field_type(0).should eql(:float)
@@ -822,11 +867,11 @@
it 'knows the schema of a relation projection containing a call to COUNT' do
schema = catch(:schema) do
@interpreter.interpret do
relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
- relation2 = relation1.foreach { |r| [r.a.count] }
+ relation2 = relation1.foreach { [a.count] }
throw :schema, relation2.schema
end
end
schema.field_names.should eql([nil])
schema.field_type(0).should eql(:long)
@@ -834,43 +879,43 @@
it 'knows the schema of a relation projection containing a field rename' do
schema = catch(:schema) do
@interpreter.interpret do
relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
- relation2 = relation1.foreach { |r| [r.a.count.as(:x)] }
+ relation2 = relation1.foreach { [a.count.as(:x)] }
throw :schema, relation2.schema
end
end
schema.field_names.should eql([:x])
end
it 'knows the schema of a relation projection containing a literal string' do
schema = catch(:schema) do
@interpreter.interpret do
relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
- relation2 = relation1.foreach { |r| [literal('blipp')] }
+ relation2 = relation1.foreach { [literal('blipp')] }
throw :schema, relation2.schema
end
end
schema.field_type(0).should eql(:chararray)
end
it 'knows the schema of a relation projection containing a literal integer' do
schema = catch(:schema) do
@interpreter.interpret do
relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
- relation2 = relation1.foreach { |r| [literal(4)] }
+ relation2 = relation1.foreach { [literal(4)] }
throw :schema, relation2.schema
end
end
schema.field_type(0).should eql(:int)
end
it 'knows the schema of a relation projection containing a literal float' do
schema = catch(:schema) do
@interpreter.interpret do
relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
- relation2 = relation1.foreach { |r| [literal(3.14)] }
+ relation2 = relation1.foreach { [literal(3.14)] }
throw :schema, relation2.schema
end
end
schema.field_type(0).should eql(:double)
end