lib/alf.rb in alf-0.9.3 vs lib/alf.rb in alf-0.10.0
- old
+ new
@@ -10,343 +10,42 @@
#
# Classy data-manipulation dressed in a DSL (+ commandline)
#
module Alf
-
+
#
+ # Encapsulates all types
+ #
+ module Types
+ require 'alf/types/attr_name'
+ require 'alf/types/boolean'
+ require 'alf/types/heading'
+ require 'alf/types/ordering'
+ require 'alf/types/attr_list'
+ require 'alf/types/renaming'
+ require 'alf/types/tuple_expression'
+ require 'alf/types/tuple_predicate'
+ require 'alf/types/summarization'
+ require 'alf/types/tuple_computation'
+
+ # Install all types on Alf now
+ constants.each do |s|
+ Alf.const_set(s, const_get(s))
+ end
+ end
+
+ #
# Provides tooling methods that are used here and there in Alf.
#
module Tools
-
- #
- # Parse a string with commandline arguments and returns an array.
- #
- # Example:
- #
- # parse_commandline_args("--text --size=10") # => ['--text', '--size=10']
- #
- def parse_commandline_args(args)
- args = args.split(/\s+/)
- result = []
- until args.empty?
- if args.first[0,1] == '"'
- if args.first[-1,1] == '"'
- result << args.shift[1...-1]
- else
- block = [ args.shift[1..-1] ]
- while args.first[-1,1] != '"'
- block << args.shift
- end
- block << args.shift[0...-1]
- result << block.join(" ")
- end
- elsif args.first[0,1] == "'"
- if args.first[-1,1] == "'"
- result << args.shift[1...-1]
- else
- block = [ args.shift[1..-1] ]
- while args.first[-1,1] != "'"
- block << args.shift
- end
- block << args.shift[0...-1]
- result << block.join(" ")
- end
- else
- result << args.shift
- end
- end
- result
- end
+ require 'alf/tools/coerce'
+ require 'alf/tools/to_ruby_literal'
+ require 'alf/tools/to_lispy'
+ require 'alf/tools/tuple_handle'
+ require 'alf/tools/miscellaneous'
- # Helper to define methods with multiple signatures.
- #
- # Example:
- #
- # varargs([1, "hello"], [Integer, String]) # => [1, "hello"]
- # varargs(["hello"], [Integer, String]) # => [nil, "hello"]
- #
- def varargs(args, types)
- types.collect{|t| t===args.first ? args.shift : nil}
- end
-
- #
- # Attempt to require(who) the most friendly way as possible.
- #
- def friendly_require(who, dep = nil, retried = false)
- gem(who, dep) if dep && defined?(Gem)
- require who
- rescue LoadError => ex
- if retried
- raise "Unable to require #{who}, which is now needed\n"\
- "Try 'gem install #{who}'"
- else
- require 'rubygems' unless defined?(Gem)
- friendly_require(who, dep, true)
- end
- end
-
- # Returns the unqualified name of a ruby class or module
- #
- # Example
- #
- # class_name(Alf::Tools) -> :Tools
- #
- def class_name(clazz)
- clazz.name.to_s =~ /([A-Za-z0-9_]+)$/
- $1.to_sym
- end
-
- #
- # Converts an unqualified class or module name to a ruby case method name.
- #
- # Example
- #
- # ruby_case(:Alf) -> "alf"
- # ruby_case(:HelloWorld) -> "hello_world"
- #
- def ruby_case(s)
- s.to_s.gsub(/[A-Z]/){|x| "_#{x.downcase}"}[1..-1]
- end
-
- #
- # Returns the first non nil values from arguments
- #
- # Example
- #
- # coalesce(nil, 1, "abc") -> 1
- #
- def coalesce(*args)
- args.find{|x| !x.nil?}
- end
-
- #
- # Iterates over enum and yields the block on each element.
- # Collect block results as key/value pairs returns them as
- # a Hash.
- #
- def tuple_collect(enum)
- tuple = {}
- enum.each do |elm|
- k, v = yield(elm)
- tuple[k] = v
- end
- tuple
- end
-
- #
- # Provides a handle, implementing a flyweight design pattern on tuples.
- #
- class TupleHandle
-
- # Creates an handle instance
- def initialize
- @tuple = nil
- end
-
- #
- # Sets the next tuple to use.
- #
- # This method installs the handle as a side effect
- # on first call.
- #
- def set(tuple)
- build(tuple) if @tuple.nil?
- @tuple = tuple
- self
- end
-
- #
- # Compiles a tuple expression and returns a lambda
- # instance that can be passed to evaluate later.
- #
- def self.compile(expr)
- case expr
- when Proc
- expr
- when NilClass
- compile('true')
- when Hash
- if expr.empty?
- compile(nil)
- else
- compile expr.each_pair.collect{|k,v|
- "(self.#{k} == #{Myrrha.to_ruby_literal(v)})"
- }.join(" && ")
- end
- when Array
- compile(Hash[*expr])
- when String, Symbol
- eval("lambda{ #{expr} }")
- else
- raise ArgumentError, "Unable to compile #{expr} to a TupleHandle"
- end
- end
-
- #
- # Evaluates an expression on the current tuple. Expression
- # can be a lambda or a string (immediately compiled in the
- # later case).
- #
- def evaluate(expr)
- if RUBY_VERSION < "1.9"
- instance_eval(&TupleHandle.compile(expr))
- else
- instance_exec(&TupleHandle.compile(expr))
- end
- end
-
- private
-
- #
- # Builds this handle with a tuple.
- #
- # This method should be called only once and installs
- # instance methods on the handle with keys of _tuple_.
- #
- def build(tuple)
- tuple.keys.each do |k|
- (class << self; self; end).send(:define_method, k) do
- @tuple[k]
- end
- end
- end
-
- end # class TupleHandle
-
- #
- # Defines a projection key
- #
- class ProjectionKey
- include Tools
-
- # Projection attributes
- attr_accessor :attributes
-
- # Allbut projection?
- attr_accessor :allbut
-
- def initialize(attributes, allbut = false)
- @attributes = attributes
- @allbut = allbut
- end
-
- def self.coerce(arg)
- case arg
- when Array
- ProjectionKey.new(arg, false)
- when OrderingKey
- ProjectionKey.new(arg.attributes, false)
- when ProjectionKey
- arg
- else
- raise ArgumentError, "Unable to coerce #{arg} to a projection key"
- end
- end
-
- def to_ordering_key
- OrderingKey.new attributes.collect{|arg|
- [arg, :asc]
- }
- end
-
- def project(tuple)
- split(tuple).first
- end
-
- def split(tuple)
- projection, rest = {}, tuple.dup
- attributes.each do |a|
- projection[a] = tuple[a]
- rest.delete(a)
- end
- @allbut ? [rest, projection] : [projection, rest]
- end
-
- end # class ProjectionKey
-
- #
- # Encapsulates tools for computing orders on tuples
- #
- class OrderingKey
-
- attr_reader :ordering
-
- def initialize(ordering = [])
- @ordering = ordering
- @sorter = nil
- end
-
- #
- # Coerces `arg` to an ordering key.
- #
- # Implemented coercions are:
- # * Array of symbols (all attributes in ascending order)
- # * Array of [Symbol, :asc|:desc] pairs (obvious semantics)
- # * ProjectionKey (all its attributes in ascending order)
- # * OrderingKey (self)
- #
- # @return [OrderingKey]
- # @raises [ArgumentError] when `arg` is not recognized
- #
- def self.coerce(arg)
- case arg
- when Array
- if arg.all?{|a| a.is_a?(Array)}
- OrderingKey.new(arg)
- elsif arg.all?{|a| a.is_a?(Symbol)}
- sliced = arg.each_slice(2)
- if sliced.all?{|a,o| [:asc,:desc].include?(o)}
- OrderingKey.new sliced.to_a
- else
- OrderingKey.new arg.collect{|a| [a, :asc]}
- end
- end
- when ProjectionKey
- arg.to_ordering_key
- when OrderingKey
- arg
- else
- raise ArgumentError, "Unable to coerce #{arg} to an ordering key"
- end
- end
-
- def attributes
- @ordering.collect{|arg| arg.first}
- end
-
- def order_by(attr, order = :asc)
- @ordering << [attr, order]
- @sorter = nil
- self
- end
-
- def order_of(attr)
- @ordering.find{|arg| arg.first == attr}.last
- end
-
- def compare(t1,t2)
- @ordering.each do |attr,order|
- x, y = t1[attr], t2[attr]
- comp = x.respond_to?(:<=>) ? (x <=> y) : (x.to_s <=> y.to_s)
- comp *= -1 if order == :desc
- return comp unless comp == 0
- end
- return 0
- end
-
- def sorter
- @sorter ||= lambda{|t1,t2| compare(t1, t2)}
- end
-
- def +(other)
- other = OrderingKey.coerce(other)
- OrderingKey.new(@ordering + other.ordering)
- end
-
- end # class OrderingKey
-
extend Tools
end # module Tools
#
# Encapsulates the interface with the outside world, providing base iterators
@@ -373,215 +72,15 @@
# detection and resolving of the --env=... option when alf is used in shell.
# See Environment.register, Environment.autodetect and Environment.recognizes?
# for details.
#
class Environment
-
- # Registered environments
- @@environments = []
-
- #
- # Register an environment class under a specific name.
- #
- # Registered class must implement a recognizes? method that takes an array
- # of arguments; it must returns true if an environment instance can be built
- # using those arguments, false otherwise. Please be very specific in the
- # implementation for returning true. See also autodetect and recognizes?
- #
- # @param [Symbol] name name of the environment kind
- # @param [Class] clazz class that implemented the environment
- #
- def self.register(name, clazz)
- @@environments << [name, clazz]
- (class << self; self; end).
- send(:define_method, name) do |*args|
- clazz.new(*args)
- end
- end
-
- #
- # Auto-detect the environment to use for specific arguments.
- #
- # This method returns an instance of the first registered Environment class
- # that returns true to an invocation of recognizes?(args). It raises an
- # ArgumentError if no such class can be found.
- #
- # @return [Environment] an environment instance
- # @raise [ArgumentError] when no registered class recognizes the arguments
- #
- def self.autodetect(*args)
- if (args.size == 1) && args.first.is_a?(Environment)
- return args.first
- else
- @@environments.each do |name,clazz|
- return clazz.new(*args) if clazz.recognizes?(args)
- end
- end
- raise ArgumentError, "Unable to auto-detect Environment with #{args.inspect}"
- end
-
- #
- # (see Environment.autodetect)
- #
- def self.coerce(*args)
- autodetect(*args)
- end
-
- #
- # Returns true _args_ can be used for building an environment instance,
- # false otherwise.
- #
- # When returning true, an immediate invocation of new(*args) should
- # succeed. While runtime exception are admitted (no such database, for
- # example), argument errors should not occur (missing argument, wrong
- # typing, etc.).
- #
- # Please be specific in the implementation of this extension point, as
- # registered environments for a chain and each of them should have a
- # chance of being selected.
- #
- def self.recognizes?(args)
- false
- end
-
- #
- # Returns a dataset whose name is provided.
- #
- # This method resolves named datasets to tuple enumerables. When the
- # dataset exists, this method must return an Iterator, typically a
- # Reader instance. Otherwise, it must throw a NoSuchDatasetError.
- #
- # @param [Symbol] name the name of a dataset
- # @return [Iterator] an iterator, typically a Reader instance
- # @raise [NoSuchDatasetError] when the dataset does not exists
- #
- def dataset(name)
- end
- undef :dataset
-
- #
- # Branches this environment and puts some additional explicit
- # definitions.
- #
- # This method is provided for (with ...) expressions and should not
- # be overriden by subclasses.
- #
- # @param [Hash] a set of (name, Iterator) pairs.
- # @return [Environment] an environment instance with new definitions set
- #
- def branch(defs)
- Explicit.new(defs, self)
- end
-
- #
- # Specialization of Environment that works with explicitely defined
- # datasources and allow branching and unbranching.
- #
- class Explicit < Environment
-
- #
- # Creates a new environment instance with initial definitions
- # and optional child environment.
- #
- def initialize(defs = {}, child = nil)
- @defs = defs
- @child = child
- end
-
- #
- # Unbranches this environment and returns its child
- #
- def unbranch
- @child
- end
-
- # (see Environment#dataset)
- def dataset(name)
- if @defs.has_key?(name)
- @defs[name]
- elsif @child
- @child.dataset(name)
- else
- raise "No such dataset #{name}"
- end
- end
-
- end # class Explicit
-
- #
- # Specialization of Environment to work on files of a given folder.
- #
- # This kind of environment resolves datasets by simply looking at
- # recognized files in a specific folder. "Recognized" files are simply
- # those for which a Reader subclass has been previously registered.
- # This environment then serves reader instances.
- #
- class Folder < Environment
-
- #
- # (see Environment.recognizes?)
- #
- # Returns true if args contains onely a String which is an existing
- # folder.
- #
- def self.recognizes?(args)
- (args.size == 1) &&
- args.first.is_a?(String) &&
- File.directory?(args.first.to_s)
- end
-
- #
- # Creates an environment instance, wired to the specified folder.
- #
- # @param [String] folder path to the folder to use as dataset source
- #
- def initialize(folder)
- @folder = folder
- end
-
- # (see Environment#dataset)
- def dataset(name)
- if file = find_file(name)
- Reader.reader(file, self)
- else
- raise "No such dataset #{name} (#{@folder})"
- end
- end
-
- protected
-
- def find_file(name)
- # TODO: refactor this, because it allows getting out of the folder
- if File.exists?(name.to_s)
- name.to_s
- elsif File.exists?(explicit = File.join(@folder, name.to_s)) &&
- File.file?(explicit)
- explicit
- else
- Dir[File.join(@folder, "#{name}.*")].find do |f|
- File.file?(f)
- end
- end
- end
-
- Environment.register(:folder, self)
- end # class Folder
-
- #
- # Returns the default environment
- #
- def self.default
- examples
- end
-
- #
- # Returns the examples environment
- #
- def self.examples
- folder File.expand_path('../../examples/operators', __FILE__)
- end
-
+ require 'alf/environment/class_methods'
+ require 'alf/environment/base'
+ require 'alf/environment/explicit'
+ require 'alf/environment/folder'
+
end # class Environment
#
# Marker module for all elements implementing tuple iterators.
#
@@ -598,52 +97,13 @@
# readers.
#
module Iterator
include Enumerable
- #
- # Wire the iterator input and an optional execution environment.
- #
- # Iterators (typically Reader and Operator instances) work from input data
- # that come from files, or other operators, and so on. This method wires
- # this input data to the iterator. Wiring is required before any attempt
- # to call each, unless autowiring occurs at construction. The exact kind of
- # input object is left at discretion of Iterator implementations.
- #
- # @param [Object] input the iterator input, at discretion of the Iterator
- # implementation.
- # @param [Environment] environment an optional environment for resolving
- # named datasets if needed.
- # @return [Object] self
- #
- def pipe(input, environment = nil)
- self
- end
- undef :pipe
-
- #
- # Coerces something to an iterator
- #
- def self.coerce(arg, environment = nil)
- case arg
- when Iterator, Array
- arg
- else
- Reader.coerce(arg, environment)
- end
- end
-
- #
- # Converts this iterator to an in-memory Relation.
- #
- # @return [Relation] a relation instance, as the set of tuples
- # that would be yield by this iterator.
- #
- def to_rel
- Relation::coerce(self)
- end
-
+ require 'alf/iterator/class_methods'
+ require 'alf/iterator/base'
+ require 'alf/iterator/proxy'
end # module Iterator
#
# Implements an Iterator at the interface with the outside world.
#
@@ -671,245 +131,16 @@
# # itself. This factory method can be used with a String, or an IO object.
# r = Reader.foo([a path or a IO object])
#
class Reader
include Iterator
-
- # Registered readers
- @@readers = []
-
- #
- # Registers a reader class associated with specific file extensions
- #
- # Registered class must provide a constructor with the following signature
- # <code>new(path_or_io, environment = nil)</code>. The name must be a symbol
- # which can safely be used as a ruby method name. A factory class method of
- # that name and same signature is automatically installed on the Reader
- # class.
- #
- # @param [Symbol] name a name for the kind of data decoded
- # @param [Array] extensions file extensions mapped to the registered reader
- # class (should include the '.', e.g. '.foo')
- # @param [Class] class Reader subclass used to decode this kind of files
- #
- def self.register(name, extensions, clazz)
- @@readers << [name, extensions, clazz]
- (class << self; self; end).
- send(:define_method, name) do |*args|
- clazz.new(*args)
- end
- end
-
- #
- # When filepath is a String, returns a reader instance for a specific file
- # whose path is given as argument. Otherwise, delegate the call to
- # <code>coerce(filepath)</code>
- #
- # @param [String] filepath path to a file for which extension is recognized
- # @param [Array] args optional additional arguments that must be passed at
- # reader's class new method.
- # @return [Reader] a reader instance
- #
- def self.reader(filepath, *args)
- if filepath.is_a?(String)
- ext = File.extname(filepath)
- if registered = @@readers.find{|r| r[1].include?(ext)}
- registered[2].new(filepath, *args)
- else
- raise "No registered reader for #{ext} (#{filepath})"
- end
- elsif args.empty?
- coerce(filepath)
- else
- raise ArgumentError, "Unable to return a reader for #{filepath} and #{args}"
- end
- end
-
- #
- # Coerces an argument to a reader, using an optional environment to convert
- # named datasets.
- #
- # This method automatically provides readers for Strings and Symbols through
- # passed environment (**not** through the reader factory) and for IO objects
- # (through Rash reader). It is part if Alf's internals and should be used
- # with care.
- #
- def self.coerce(arg, environment = nil)
- case arg
- when Reader
- arg
- when IO
- rash(arg, environment)
- when String, Symbol
- if environment
- environment.dataset(arg.to_sym)
- else
- raise "No environment set"
- end
- else
- raise ArgumentError, "Unable to coerce #{arg.inspect} to a reader"
- end
- end
-
- # Default reader options
- DEFAULT_OPTIONS = {}
-
- # @return [Environment] Wired environment
- attr_accessor :environment
-
- # @return [String or IO] Input IO, or file name
- attr_accessor :input
-
- # @return [Hash] Reader's options
- attr_accessor :options
-
- #
- # Creates a reader instance.
- #
- # @param [String or IO] path to a file or IO object for input
- # @param [Environment] environment wired environment, serving this reader
- # @param [Hash] options Reader's options (see doc of subclasses)
- #
- def initialize(*args)
- @input, @environment, @options = case args.first
- when String, IO, StringIO
- Tools.varargs(args, [args.first.class, Environment, Hash])
- else
- Tools.varargs(args, [String, Environment, Hash])
- end
- @options = self.class.const_get(:DEFAULT_OPTIONS).merge(@options || {})
- end
-
- #
- # (see Iterator#pipe)
- #
- def pipe(input, env = environment)
- @input = input
- self
- end
-
- #
- # (see Iterator#each)
- #
- # @private the default implementation reads lines of the input stream and
- # yields the block with <code>line2tuple(line)</code> on each of them. This
- # method may be overriden if this behavior does not fit reader's needs.
- #
- def each
- each_input_line do |line|
- tuple = line2tuple(line)
- yield tuple unless tuple.nil?
- end
- end
-
- protected
-
- #
- # Returns the input file path, or nil if this Reader is bound to an IO
- # directly.
- #
- def input_path
- input.is_a?(String) ? input : nil
- end
- #
- # Coerces the input object to an IO and yields the block with it.
- #
- # StringIO and IO input are yield directly while file paths are first
- # opened in read mode and then yield.
- #
- def with_input_io
- case input
- when IO, StringIO
- yield input
- when String
- File.open(input, 'r'){|io| yield io}
- else
- raise "Unable to convert #{input} to an IO object"
- end
- end
-
- #
- # Returns the whole input text.
- #
- # This feature should only be used by subclasses on inputs that are
- # small enough to fit in memory. Consider implementing readers without this
- # feature on files that could be larger.
- #
- def input_text
- with_input_io{|io| io.readlines.join}
- end
-
- #
- # Yields the block with each line of the input text in turn.
- #
- # This method is an helper for files that capture one tuple on each input
- # line. It should be used in those cases, as the resulting reader will not
- # load all input in memory but serve tuples on demand.
- #
- def each_input_line
- with_input_io{|io| io.each_line(&Proc.new)}
- end
-
- #
- # Converts a line previously read from the input stream to a tuple.
- #
- # The line is simply ignored is this method return nil. Errors should be
- # properly handled by raising exceptions. This method MUST be implemented
- # by subclasses unless each is overriden.
- #
- def line2tuple(line)
- end
- undef :line2tuple
-
- #
- # Specialization of the Reader contract for .rash files.
- #
- # A .rash file/stream contains one ruby hash literal on each line. This
- # reader simply decodes each of them in turn with Kernel.eval, providing a
- # state-less reader (that is, tuples are not all loaded in memory at once).
- #
- class Rash < Reader
-
- # (see Reader#line2tuple)
- def line2tuple(line)
- begin
- h = Kernel.eval(line)
- raise "hash expected, got #{h}" unless h.is_a?(Hash)
- rescue Exception => ex
- $stderr << "Skipping #{line.strip}: #{ex.message}\n"
- nil
- else
- return h
- end
- end
-
- Reader.register(:rash, [".rash"], self)
- end # class Rash
-
- #
- # Specialization of the Reader contrat for .alf files.
- #
- # A .alf file simply contains a query expression in the Lispy DSL. This
- # reader decodes and compiles the expression and delegates the enumeration
- # to the obtained operator.
- #
- # Note that an Environment must be wired at creation or piping time.
- # NoSuchDatasetError will certainly occur otherwise.
- #
- class AlfFile < Reader
-
- # (see Reader#each)
- def each
- op = Alf.lispy(environment).compile(input_text, input_path)
- op.each(&Proc.new)
- end
-
- Reader.register(:alf, [".alf"], self)
- end # module AlfFile
-
- end # module Reader
+ require 'alf/reader/class_methods'
+ require 'alf/reader/base'
+ require 'alf/reader/rash'
+ require 'alf/reader/alf_file'
+ end # class Reader
#
# Renders a relation (given by any Iterator) in a specific format.
#
# A renderer takes an Iterator instance as input and renders it on an output
@@ -931,2659 +162,161 @@
# # Also, a factory method is automatically installed on the Renderer class
# # itself.
# r = Renderer.foo([an Iterator])
#
class Renderer
+ require 'alf/renderer/class_methods'
+ require 'alf/renderer/base'
+ require 'alf/renderer/rash'
+ require 'alf/renderer/text'
- # Registered renderers
- @@renderers = []
-
- #
- # Register a renderering class with a given name and description.
- #
- # Registered class must at least provide a constructor with an empty
- # signature. The name must be a symbol which can safely be used as a ruby
- # method name. A factory class method of that name and degelation signature
- # is automatically installed on the Renderer class.
- #
- # @param [Symbol] name a name for the output format
- # @param [String] description an output format description (for 'alf show')
- # @param [Class] clazz Renderer subclass used to render in this format
- #
- def self.register(name, description, clazz)
- @@renderers << [name, description, clazz]
- (class << self; self; end).
- send(:define_method, name) do |*args|
- clazz.new(*args)
- end
- end
-
- #
- # Returns a Renderer instance for the given output format name.
- #
- # @param [Symbol] name name of an output format previously registered
- # @param [...] args other arguments to pass to the renderer constructor
- # @return [Renderer] a Renderer instance, already wired if args are
- # provided
- #
- def self.renderer(name, *args)
- if r = @@renderers.find{|triple| triple[0] == name}
- r[2].new(*args)
- else
- raise "No renderer registered for #{name}"
- end
- end
+ end # class Renderer
- #
- # Yields each (name,description,clazz) previously registered in turn
- #
- def self.each_renderer
- @@renderers.each(&Proc.new)
- end
-
- # Default renderer options
- DEFAULT_OPTIONS = {}
+ #
+ # Marker module and namespace for Alf main commands, those that are **not**
+ # operators at all.
+ #
+ module Command
+ require 'alf/command/class_methods'
+ require 'alf/command/doc_manager'
- # Renderer input (typically an Iterator)
- attr_accessor :input
-
- # @return [Environment] Optional wired environment
- attr_accessor :environment
+ # This is the main documentation extractor
+ DOC_EXTRACTOR = DocManager.new
- # @return [Hash] Renderer's options
- attr_accessor :options
-
#
- # Creates a reader instance.
+ # Delegator command factory
#
- # @param [Iterator] iterator an Iterator of tuples to render
- # @param [Environment] environment wired environment, serving this reader
- # @param [Hash] options Reader's options (see doc of subclasses)
- #
- def initialize(*args)
- @input, @environment, @options = case args.first
- when Array
- Tools.varargs(args, [Array, Environment, Hash])
- else
- Tools.varargs(args, [Iterator, Environment, Hash])
- end
- @options = self.class.const_get(:DEFAULT_OPTIONS).merge(@options || {})
+ def Alf.Delegator()
+ Quickl::Delegator(){|builder|
+ builder.doc_extractor = DOC_EXTRACTOR
+ yield(builder) if block_given?
+ }
end
-
- #
- # Sets the renderer input.
- #
- # This method mimics {Iterator#pipe} and have the same contract.
- #
- def pipe(input, env = environment)
- self.environment = env
- self.input = input
- self
- end
#
- # Executes the rendering, outputting the resulting tuples on the provided
- # output buffer.
+ # Command factory
#
- # The default implementation simply coerces the input as an Iterator and
- # delegates the call to {#render}.
- #
- def execute(output = $stdout)
- render(Iterator.coerce(input, environment), output)
- end
-
- protected
-
- #
- # Renders tuples served by the iterator to the output buffer provided and
- # returns the latter.
- #
- # This method must be implemented by subclasses unless {#execute} is
- # overriden.
- #
- def render(iterator, output)
- end
- undef :render
-
- #
- # Implements the Renderer contract through inspect
- #
- class Rash < Renderer
-
- # (see Renderer#render)
- def render(input, output)
- input.each do |tuple|
- output << Myrrha.to_ruby_literal(tuple) << "\n"
- end
- output
- end
-
- Renderer.register(:rash, "as ruby hashes", self)
- end # class Rash
-
- end # module Renderer
-
- #
- # Provides a factory over Alf operators and handles the interface with
- # Quickl for commandline support.
- #
- # This module is part of Alf's internal architecture and should not be used
- # at all by third-party projects.
- #
- module Factory
-
- # @see Quickl::Command
- def Command(file, line)
- Quickl::Command(file, line){|builder|
+ def Alf.Command()
+ Quickl::Command(){|builder|
builder.command_parent = Alf::Command::Main
+ builder.doc_extractor = DOC_EXTRACTOR
yield(builder) if block_given?
}
end
-
- # @see Operator
- def Operator(file, line)
- Command(file, line) do |b|
- b.instance_module Alf::Operator
- end
- end
-
- extend Factory
- end # module Factory
-
- #
- # Marker module and namespace for Alf main commands, those that are **not**
- # operators at all.
- #
- module Command
- #
- # alf - Classy data-manipulation dressed in a DSL (+ commandline)
- #
- # SYNOPSIS
- # alf [--version] [--help]
- # alf -e '(lispy command)'
- # alf [FILE.alf]
- # alf [alf opts] OPERATOR [operator opts] ARGS ...
- # alf help OPERATOR
- #
- # OPTIONS
- # #{summarized_options}
- #
- # RELATIONAL COMMANDS
- # #{summarized_subcommands subcommands.select{|cmd|
- # cmd.include?(Alf::Operator::Relational) &&
- # !cmd.include?(Alf::Operator::Experimental)
- # }}
- #
- # EXPERIMENTAL OPERATORS
- # #{summarized_subcommands subcommands.select{|cmd|
- # cmd.include?(Alf::Operator::Relational) &&
- # cmd.include?(Alf::Operator::Experimental)
- # }}
- #
- # NON-RELATIONAL COMMANDS
- # #{summarized_subcommands subcommands.select{|cmd|
- # cmd.include?(Alf::Operator::NonRelational)
- # }}
- #
- # OTHER NON-RELATIONAL COMMANDS
- # #{summarized_subcommands subcommands.select{|cmd|
- # cmd.include?(Alf::Command)
- # }}
- #
- # See '#{program_name} help COMMAND' for details about a specific command.
- #
- class Main < Quickl::Delegator(__FILE__, __LINE__)
- include Command
-
- # Environment instance to use to get base iterators
- attr_accessor :environment
-
- # Output renderer
- attr_accessor :renderer
-
- # Creates a command instance
- def initialize(env = Environment.default)
- @environment = env
- end
-
- # Install options
- options do |opt|
- @execute = false
- opt.on("-e", "--execute", "Execute one line of script (Lispy API)") do
- @execute = true
- end
-
- @renderer = nil
- Renderer.each_renderer do |name,descr,clazz|
- opt.on("--#{name}", "Render output #{descr}"){
- @renderer = clazz.new
- }
- end
-
- opt.on('--env=ENV',
- "Set the environment to use") do |value|
- @environment = Environment.autodetect(value)
- end
-
- opt.on('-rlibrary', "require the library, before executing alf") do |value|
- require(value)
- end
-
- opt.on_tail('-h', "--help", "Show help") do
- raise Quickl::Help
- end
-
- opt.on_tail('-v', "--version", "Show version") do
- raise Quickl::Exit, "alf #{Alf::VERSION}"\
- " (c) 2011, Bernard Lambeau"
- end
- end # Alf's options
-
- #
- def _normalize(args)
- opts = []
- while !args.empty? && (args.first =~ /^\-/)
- opts << args.shift
- end
- if args.empty? or (args.size == 1 && File.exists?(args.first))
- opts << "exec"
- end
- opts += args
- end
-
- #
- # Overrided because Quickl only keep --options but modifying it there
- # should probably be considered a broken API.
- #
- def _run(argv = [])
- argv = _normalize(argv)
-
- # 1) Extract my options and parse them
- my_argv = []
- while argv.first =~ /^-/
- my_argv << argv.shift
- end
- parse_options(my_argv)
-
- # 2) build the operator according to -e option
- operator = if @execute
- Alf.lispy(environment).compile(argv.first)
- else
- super
- end
-
- # 3) if there is a requester, then we do the job (assuming bin/alf)
- # with the renderer to use. Otherwise, we simply return built operator
- if operator && requester
- renderer = self.renderer ||= Renderer::Rash.new
- renderer.pipe(operator, environment).execute($stdout)
- else
- operator
- end
- end
-
- end
-
- #
- # Output input tuples through a specific renderer (text, yaml, ...)
- #
- # SYNOPSIS
- # #{program_name} #{command_name} DATASET
- #
- # OPTIONS
- # #{summarized_options}
- #
- # DESCRIPTION
- #
- # When a dataset name is specified as commandline arg, request the
- # environment to provide this dataset and prints it. Otherwise, take what
- # comes on standard input.
- #
- # Note that this command is not an operator and should not be piped anymore.
- #
- class Show < Factory::Command(__FILE__, __LINE__)
- include Command
-
- options do |opt|
- @renderer = nil
- Renderer.each_renderer do |name,descr,clazz|
- opt.on("--#{name}", "Render output #{descr}"){
- @renderer = clazz.new
- }
- end
- end
-
- def execute(args)
- requester.renderer = (@renderer || requester.renderer || Text::Renderer.new)
- args = [ $stdin ] if args.empty?
- args.first
- end
-
- end # class Show
-
- #
- # Executes an .alf file on current environment
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [FILE]
- #
- # OPTIONS
- # #{summarized_options}
- #
- # DESCRIPTION
- #
- # This command executes the .alf file passed as first argument (or what comes
- # on standard input) as a alf query to be executed on the current environment.
- #
- class Exec < Factory::Command(__FILE__, __LINE__)
- include Command
-
- def execute(args)
- Reader.alf(args.first || $stdin, requester.environment)
- end
-
- end # class Exec
-
- #
- # Show help about a specific command
- #
- # SYNOPSIS
- # #{program_name} #{command_name} COMMAND
- #
- class Help < Factory::Command(__FILE__, __LINE__)
- include Command
-
- # Let NoSuchCommandError be passed to higher stage
- no_react_to Quickl::NoSuchCommand
-
- # Command execution
- def execute(args)
- if args.size != 1
- puts super_command.help
- else
- cmd = has_command!(args.first, super_command)
- puts cmd.help
- end
- nil
- end
-
- end # class Help
-
- end
+ require 'alf/command/main'
+ require 'alf/command/exec'
+ require 'alf/command/help'
+ require 'alf/command/show'
+ end # module Command
#
# Marker for all operators, relational and non-relational ones.
#
module Operator
include Iterator, Tools
-
- #
- # Yields non-relational then relational operators, in turn.
- #
- def self.each
- Operator::NonRelational.each{|x| yield(x)}
- Operator::Relational.each{|x| yield(x)}
- end
- #
- # Encapsulates method that allows making operator introspection, that is,
- # knowing operator cardinality and similar stuff.
- #
- module Introspection
-
- #
- # Returns true if this operator is an unary operator, false otherwise
- #
- def unary?
- ancestors.include?(Operator::Unary)
- end
-
- #
- # Returns true if this operator is a binary operator, false otherwise
- #
- def binary?
- ancestors.include?(Operator::Binary)
- end
-
- end # module Introspection
-
- # Ensures that the Introspection module is set on real operators
- def self.included(mod)
- mod.extend(Introspection) if mod.is_a?(Class)
- end
-
#
- # Encapsulates method definitions that convert operators to Quickl
- # commands
+ # Operator factory
#
- module CommandMethods
-
- protected
-
- #
- # Configures the operator from arguments taken from command line.
- #
- # This method is intended to be overriden by subclasses and must return the
- # operator itself.
- #
- def set_args(args)
- self
+ def Alf.Operator()
+ Alf.Command() do |b|
+ b.instance_module Alf::Operator
end
-
- #
- # Overrides Quickl::Command::Single#_run to handles the '--' separator
- # correctly.
- #
- # This is because parse_options tend to eat the '--' separator... This
- # could be handled in Quickl itself, but it should be considered a broken
- # API and will only be available in quickl >= 0.3.0 (probably)
- #
- def _run(argv = [])
- operands, args = split_command_args(argv).collect do |arr|
- parse_options(arr)
- end
- self.set_args(args)
- if operands = command_line_operands(operands)
- env = environment || (requester ? requester.environment : nil)
- self.pipe(operands, env)
- end
- self
- end
-
- def split_command_args(args)
- case (i = args.index("--"))
- when NilClass
- [args, []]
- when 0
- [[ $stdin ], args[1..-1]]
- else
- [args[0...i], args[i+1..-1]]
- end
- end
-
- def command_line_operands(operands)
- operands
- end
-
- end # module CommandMethods
- include CommandMethods
-
- # Operators input datasets
- attr_accessor :datasets
-
- # Optional environment
- attr_reader :environment
-
- # Sets the environment on this operator and propagate on
- # datasets
- def environment=(env)
- # this is to avoid infinite loop (TODO: why is there infinite loops??)
- return if @environment == env
-
- # set and propagate on children
- @environment = env
- datasets.each do |dataset|
- if dataset.respond_to?(:environment)
- dataset.environment = env
- end
- end if datasets
-
- env
end
-
- #
- # Sets the operator input
- #
- def pipe(input, env = environment)
- raise NotImplementedError, "Operator#pipe should be overriden"
- end
-
- #
- # Yields each tuple in turn
- #
- # This method is implemented in a way that ensures that all operators are
- # thread safe. It is not intended to be overriden, use _each instead.
- #
- def each
- op = self.dup
- op._prepare
- op._each(&Proc.new)
- end
-
- protected
-
- #
- # Prepares the iterator before subsequent call to _each.
- #
- # This method is intended to be overriden by suclasses to install what's
- # need for successful iteration. The default implementation does nothing.
- #
- def _prepare
- end
- # Internal implementation of the iterator.
- #
- # This method must be implemented by subclasses. It is safe to use instance
- # variables (typically initialized in _prepare) here.
- #
- def _each
- end
+ require 'alf/operator/class_methods'
+ require 'alf/operator/signature'
+ require 'alf/operator/base'
+ require 'alf/operator/nullary'
+ require 'alf/operator/unary'
+ require 'alf/operator/binary'
+ require 'alf/operator/cesure'
+ require 'alf/operator/transform'
+ require 'alf/operator/shortcut'
+ require 'alf/operator/experimental'
- #
- # Specialization of Operator for operators that work on a unary input
- #
- module Unary
- include Operator
-
- #
- # Sets the operator input
- #
- def pipe(input, env = environment)
- self.environment = env
- self.datasets = [ input ]
- self
- end
-
- protected
-
- def command_line_operands(operands)
- operands.first || $stdin
- end
- #
- # Simply returns the first dataset
- #
- def input
- Iterator.coerce(datasets.first, environment)
- end
-
- #
- # Yields the block with each input tuple.
- #
- # This method should be preferred to <code>input.each</code> when possible.
- #
- def each_input_tuple
- input.each(&Proc.new)
- end
-
- end # module Unary
-
- #
- # Specialization of Operator for operators that work on a binary input
#
- module Binary
- include Operator
-
- #
- # Sets the operator input
- #
- def pipe(input, env = environment)
- self.environment = env
- self.datasets = input
- self
- end
-
- protected
-
- def command_line_operands(operands)
- (operands.size < 2) ? ([$stdin] + operands) : operands
- end
-
- # Returns the left operand
- def left
- Iterator.coerce(datasets.first, environment)
- end
-
- # Returns the right operand
- def right
- Iterator.coerce(datasets.last, environment)
- end
-
- end # module Binary
-
+ # Marker module and namespace for non relational operators
#
- # Specialization of Operator for operators that simply convert single tuples
- # to single tuples.
- #
- module Transform
- include Unary
+ module NonRelational
+ require 'alf/operator/non_relational/autonum'
+ require 'alf/operator/non_relational/defaults'
+ require 'alf/operator/non_relational/compact'
+ require 'alf/operator/non_relational/sort'
+ require 'alf/operator/non_relational/clip'
+ require 'alf/operator/non_relational/coerce'
+ require 'alf/operator/non_relational/generator'
- protected
-
- # (see Operator#_each)
- def _each
- each_input_tuple do |tuple|
- yield _tuple2tuple(tuple)
- end
- end
-
#
- # Transforms an input tuple to an output tuple
+ # Yields the block with each operator module in turn
#
- def _tuple2tuple(tuple)
- end
-
- end # module Transform
-
- #
- # Specialization of Operator for implementing operators that rely on a
- # cesure algorithm.
- #
- module Cesure
- include Unary
-
- protected
-
- # (see Operator#_each)
- def _each
- receiver, proj_key, prev_key = Proc.new, cesure_key, nil
- each_input_tuple do |tuple|
- cur_key = proj_key.project(tuple)
- if cur_key != prev_key
- flush_cesure(prev_key, receiver) unless prev_key.nil?
- start_cesure(cur_key, receiver)
- prev_key = cur_key
- end
- accumulate_cesure(tuple, receiver)
+ def self.each
+ constants.each do |c|
+ val = const_get(c)
+ yield(val) if val.ancestors.include?(Operator::NonRelational)
end
- flush_cesure(prev_key, receiver) unless prev_key.nil?
end
-
- def cesure_key
- end
-
- def start_cesure(key, receiver)
- end
-
- def accumulate_cesure(tuple, receiver)
- end
-
- def flush_cesure(key, receiver)
- end
-
- end # module Cesure
-
- #
- # Specialization of Operator for operators that are shortcuts for longer
- # expressions.
- #
- module Shortcut
- include Operator
-
- #
- # Sets the operator input
- #
- def pipe(input, env = environment)
- self.environment = env
- self.datasets = input
- self
- end
-
- protected
-
- # (see Operator#_each)
- def _each
- longexpr.each(&Proc.new)
- end
- #
- # Compiles the longer expression and returns it.
- #
- # @return (Iterator) the compiled longer expression, typically another
- # Operator instance
- #
- def longexpr
- end
- undef :longexpr
-
- #
- # This is an helper ala Lispy#chain for implementing (#longexpr).
- #
- # @param [Array] elements a list of Iterator-able
- # @return [Operator] the first element of the list, but piped with the
- # next one, and so on.
- #
- def chain(*elements)
- elements = elements.reverse
- elements[1..-1].inject(elements.first) do |c, elm|
- elm.pipe(c, environment)
- elm
- end
- end
-
- end # module Shortcut
-
- # Marker for experimental operators
- module Experimental; end
+ end # NonRelational
- end # module Operator
-
- #
- # Marker module and namespace for non relational operators
- #
- module Operator::NonRelational
-
#
- # Yields the block with each operator module in turn
+ # Marker module and namespace for relational operators
#
- def self.each
- constants.each do |c|
- val = const_get(c)
- yield(val) if val.ancestors.include?(Operator::NonRelational)
- end
- end
-
- #
- # Extend its operand with an unique autonumber attribute
- #
- # SYNOPSIS
- #
- # #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
- #
- # DESCRIPTION
- #
- # This non-relational operator guarantees uniqueness of output tuples by
- # adding an attribute called 'ATTRNAME' whose value is an Integer. No
- # guarantee is given about ordering of output tuples, nor to the fact
- # that this autonumber is sequential. Only that all values are different.
- # If the presence of duplicates was the only "non-relational" aspect of
- # input tuples, the result may be considered a valid relation representation.
- #
- # IN RUBY
- #
- # (autonum OPERAND, ATTRNAME = :autonum)
- #
- # (autonum :suppliers)
- # (autonum :suppliers, :unique_id)
- #
- # IN SHELL
- #
- # #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
- #
- # alf autonum suppliers
- # alf autonum suppliers -- unique_id
- #
- class Autonum < Factory::Operator(__FILE__, __LINE__)
- include Operator::NonRelational, Operator::Transform
-
- # Names of the new attribute to add
- attr_accessor :attrname
-
- def initialize(attrname = :autonum)
- @attrname = attrname
- end
-
- protected
-
- # (see Operator::CommandMethods#set_args)
- def set_args(args)
- @attrname = args.last.to_sym unless args.empty?
- end
-
- # (see Operator#_prepare)
- def _prepare
- @autonum = -1
- end
-
- # (see Operator::Transform#_tuple2tuple)
- def _tuple2tuple(tuple)
- tuple.merge(@attrname => (@autonum += 1))
- end
-
- end # class Autonum
-
- #
- # Force default values on missing/nil attributes
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 VAL1 ...
- #
- # OPTIONS
- # #{summarized_options}
- #
- # API & EXAMPLE
- #
- # # Non strict mode
- # (defaults :suppliers, :country => 'Belgium')
- #
- # # Strict mode (--strict)
- # (defaults :suppliers, {:country => 'Belgium'}, true)
- #
- # DESCRIPTION
- #
- # This operator rewrites tuples so as to ensure that all values for specified
- # attributes ATTRx are defined and not nil. Missing or nil attributes are
- # replaced by the associated default value VALx.
- #
- # When used in shell, the hash of default values is built from commandline
- # arguments ala Hash[...]. However, to keep type safety VALx are interpreted
- # as ruby literals and built with Kernel.eval. This means that strings must
- # be doubly quoted. For the example of the API section:
- #
- # alf defaults suppliers -- country "'Belgium'"
- #
- # When used in --strict mode, the operator simply project resulting tuples on
- # attributes for which a default value has been specified. Using the strict
- # mode guarantess that the heading of all tuples is the same, and that no nil
- # value ever remains. However, this operator never remove duplicates.
- #
- class Defaults < Factory::Operator(__FILE__, __LINE__)
- include Operator::NonRelational, Operator::Transform
+ module Relational
+ require 'alf/operator/relational/project'
+ require 'alf/operator/relational/extend'
+ require 'alf/operator/relational/rename'
+ require 'alf/operator/relational/restrict'
+ require 'alf/operator/relational/join'
+ require 'alf/operator/relational/intersect'
+ require 'alf/operator/relational/minus'
+ require 'alf/operator/relational/union'
+ require 'alf/operator/relational/matching'
+ require 'alf/operator/relational/not_matching'
+ require 'alf/operator/relational/wrap'
+ require 'alf/operator/relational/unwrap'
+ require 'alf/operator/relational/group'
+ require 'alf/operator/relational/ungroup'
+ require 'alf/operator/relational/summarize'
+ require 'alf/operator/relational/rank'
+ require 'alf/operator/relational/quota'
- # Default values as a ATTR -> VAL hash
- attr_accessor :defaults
-
- # Strict mode?
- attr_accessor :strict
-
- # Builds a Defaults operator instance
- def initialize(defaults = {}, strict = false)
- @defaults = defaults
- @strict = strict
- end
-
- options do |opt|
- opt.on('-s', '--strict', 'Strictly restrict to default attributes'){
- self.strict = true
- }
- end
-
- protected
-
- # (see Operator::CommandMethods#set_args)
- def set_args(args)
- @defaults = tuple_collect(args.each_slice(2)) do |k,v|
- [k.to_sym, Kernel.eval(v)]
- end
- self
- end
-
- # (see Operator::Transform#_tuple2tuple)
- def _tuple2tuple(tuple)
- if strict
- tuple_collect(@defaults){|k,v|
- [k, coalesce(tuple[k], v)]
- }
- else
- @defaults.merge tuple_collect(tuple){|k,v|
- [k, coalesce(v, @defaults[k])]
- }
- end
- end
-
- end # class Defaults
-
- #
- # Remove tuple duplicates
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [OPERAND]
- #
- # API & EXAMPLE
- #
- # # clip, unlike project, typically leave duplicates
- # (compact (clip :suppliers, [ :city ]))
- #
- # DESCRIPTION
- #
- # This operator remove duplicates from input tuples. As defaults, it is a non
- # relational operator that helps normalizing input for implementing relational
- # operators. This one is centric in converting bags of tuples to sets of
- # tuples, as required by true relations.
- #
- # alf compact ...
- #
- class Compact < Factory::Operator(__FILE__, __LINE__)
- include Operator::NonRelational, Operator::Shortcut, Operator::Unary
-
- # Removes duplicates according to a complete order
- class SortBased
- include Operator::Cesure
-
- def cesure_key
- @cesure_key ||= ProjectionKey.new([],true)
- end
-
- def accumulate_cesure(tuple, receiver)
- @tuple = tuple
- end
-
- def flush_cesure(key, receiver)
- receiver.call(@tuple)
- end
-
- end # class SortBased
-
- # Removes duplicates by loading all in memory and filtering
- # them there
- class BufferBased
- include Operator::Unary
-
- def _prepare
- @tuples = input.to_a.uniq
- end
-
- def _each
- @tuples.each(&Proc.new)
- end
-
- end # class BufferBased
-
- protected
-
- def longexpr
- chain BufferBased.new,
- datasets
- end
-
- end # class Compact
-
- #
- # Sort input tuples according to an order relation
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 ORDER1 ATTR2 ORDER2...
- #
- # API & EXAMPLE
- #
- # # sort on supplier name in ascending order
- # (sort :suppliers, [:name])
- #
- # # sort on city then on name
- # (sort :suppliers, [:city, :name])
- #
- # # sort on city DESC then on name ASC
- # (sort :suppliers, [[:city, :desc], [:name, :asc]])
- #
- # => See OrderingKey about specifying orderings
- #
- # DESCRIPTION
- #
- # This operator sorts input tuples on ATTR1 then ATTR2, etc. and outputs
- # them sorted after that. This is, of course, a non relational operator as
- # relations are unordered sets. It is provided to implement operators that
- # need tuples to be sorted to work correctly. When used in shell, the key
- # ordering must be specified in its longest form:
- #
- # alf sort suppliers -- name asc
- # alf sort suppliers -- city desc name asc
- #
- # LIMITATIONS
- #
- # The fact that the ordering must be completely specified with commandline
- # arguments is a limitation, shortcuts could be provided in the future.
- #
- class Sort < Factory::Operator(__FILE__, __LINE__)
- include Operator::NonRelational, Operator::Unary
-
- def initialize(ordering_key = [])
- @ordering_key = OrderingKey.coerce(ordering_key)
- yield self if block_given?
- end
-
- def ordering=(ordering)
- @ordering_key = OrderingKey.coerce(ordering)
- end
-
- protected
-
- def set_args(args)
- self.ordering = args.collect{|c| c.to_sym}.each_slice(2).to_a
- self
- end
-
- def _prepare
- @buffer = Buffer::Sorted.new(@ordering_key)
- @buffer.add_all(input)
- end
-
- def _each
- @buffer.each(&Proc.new)
- end
-
- end # class Sort
-
- #
- # Clip input tuples to a subset of attributes
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ...
- #
- # OPTIONS
- # #{summarized_options}
- #
- # API & EXAMPLE
- #
- # # Keep only name and city attributes
- # (clip :suppliers, [:name, :city])
- #
- # # Keep all but name and city attributes
- # (clip :suppliers, [:name, :city], true)
- #
- # DESCRIPTION
- #
- # This operator clips tuples on attributes whose names are specified as
- # arguments. This is similar to the relational PROJECT operator, expect
- # that this one does not removed duplicates that can occur from clipping.
- # In other words, clipping may lead to bags of tuples instead of sets.
- #
- # When used in shell, the clipping/projection key is simply taken from
- # commandline arguments:
- #
- # alf clip suppliers -- name city
- # alf clip suppliers --allbut -- name city
- #
- class Clip < Factory::Operator(__FILE__, __LINE__)
- include Operator::NonRelational, Operator::Transform
-
- # Builds a Clip operator instance
- def initialize(attributes = [], allbut = false)
- @projection_key = ProjectionKey.new(attributes, allbut)
- yield self if block_given?
- end
-
- def attributes=(attrs)
- @projection_key.attributes = attrs
- end
-
- def allbut=(allbut)
- @projection_key.allbut = allbut
- end
-
- # Installs the options
- options do |opt|
- opt.on('-a', '--allbut', 'Apply a ALLBUT clipping') do
- self.allbut = true
- end
- end
-
- protected
-
- # (see Operator::CommandMethods#set_args)
- def set_args(args)
- self.attributes = args.collect{|a| a.to_sym}
- self
- end
-
- # (see Operator::Transform#_tuple2tuple)
- def _tuple2tuple(tuple)
- @projection_key.project(tuple)
- end
-
- end # class Clip
-
- end # Operator::NonRelational
-
- #
- # Marker module and namespace for relational operators
- #
- module Operator::Relational
-
- #
- # Yields the block with each operator module in turn
- #
- def self.each
- constants.each do |c|
- val = const_get(c)
- yield(val) if val.ancestors.include?(Operator::Relational)
- end
- end
-
- # Relational projection (clip + compact)
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ...
- #
- # OPTIONS
- # #{summarized_options}
- #
- # API & EXAMPLE
- #
- # # Project on name and city attributes
- # (project :suppliers, [:name, :city])
- #
- # # Project on all but name and city attributes
- # (allbut :suppliers, [:name, :city])
- #
- # DESCRIPTION
- #
- # This operator projects tuples on attributes whose names are specified as
- # arguments. This is similar to clip, except that this ones is a truly
- # relational one, that is, it also removes duplicates tuples.
- #
- # When used in shell, the clipping/projection key is simply taken from
- # commandline arguments:
- #
- # alf project suppliers -- name city
- # alf project --allbut suppliers -- name city
- #
- class Project < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Shortcut, Operator::Unary
-
- # Builds a Project operator instance
- def initialize(attributes = [], allbut = false)
- @projection_key = ProjectionKey.new(attributes, allbut)
- yield self if block_given?
- end
-
- def attributes=(attrs)
- @projection_key.attributes = attrs
- end
-
- def allbut=(allbut)
- @projection_key.allbut = allbut
- end
-
- # Installs the options
- options do |opt|
- opt.on('-a', '--allbut', 'Apply a ALLBUT projection') do
- self.allbut = true
- end
- end
-
- protected
-
- # (see Operator::CommandMethods#set_args)
- def set_args(args)
- self.attributes = args.collect{|a| a.to_sym}
- self
- end
-
- # (see Operator::Shortcut#longexpr)
- def longexpr
- chain Operator::NonRelational::Compact.new,
- Operator::NonRelational::Clip.new(@projection_key.attributes,
- @projection_key.allbut),
- datasets
- end
-
- end # class Project
-
- #
- # Relational extension (additional, computed attributes)
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 EXPR1 ATTR2 EXPR2...
- #
- # API & EXAMPLE
- #
- # (extend :supplies, :sp => lambda{ sid + "/" + pid },
- # :big => lambda{ qty > 100 ? true : false })
- #
- # DESCRIPTION
- #
- # This command extend input tuples with new attributes (named ATTR1, ...)
- # whose value is the result of evaluating tuple expressions (i.e. EXPR1, ...).
- # See main documentation about the semantics of tuple expressions. When used
- # in shell, the hash of extensions is built from commandline arguments ala
- # Hash[...]. Tuple expressions must be specified as code literals there:
- #
- # alf extend supplies -- sp 'sid + "/" + pid' big "qty > 100 ? true : false"
- #
- # Attributes ATTRx should not already exist, no behavior is guaranteed if
- # this precondition is not respected.
- #
- class Extend < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Transform
-
- # Extensions as a Hash attr => lambda{...}
- attr_accessor :extensions
-
- # Builds an Extend operator instance
- def initialize(extensions = {})
- @extensions = extensions
- end
-
- protected
-
- # (see Operator::CommandMethods#set_args)
- def set_args(args)
- @extensions = tuple_collect(args.each_slice(2)){|k,v|
- [k.to_sym, TupleHandle.compile(v)]
- }
- self
- end
-
- # (see Operator#_prepare)
- def _prepare
- @handle = TupleHandle.new
- end
-
- # (see Operator::Transform#_tuple2tuple)
- def _tuple2tuple(tuple)
- tuple.merge tuple_collect(@extensions){|k,v|
- [k, @handle.set(tuple).evaluate(v)]
- }
- end
-
- end # class Extend
-
- #
- # Relational renaming (rename some attributes)
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [OPERAND] -- OLD1 NEW1 ...
- #
- # OPTIONS
- # #{summarized_options}
- #
- # API & EXAMPLE
- #
- # (rename :suppliers, :name => :supplier_name, :city => :supplier_city)
- #
- # DESCRIPTION
- #
- # This command renames OLD attributes as NEW as specified by arguments.
- # Attributes OLD should exist in source tuples while attributes NEW should
- # not. When used in shell, renaming attributes are built ala Hash[...] from
- # commandline arguments:
- #
- # alf rename suppliers -- name supplier_name city supplier_city
- #
- class Rename < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Transform
-
- # Hash of source -> target attribute renamings
- attr_accessor :renaming
-
- # Builds a Rename operator instance
- def initialize(renaming = {})
- @renaming = renaming
- end
-
- protected
-
- # (see Operator::CommandMethods#set_args)
- def set_args(args)
- @renaming = Hash[*args.collect{|c| c.to_sym}]
- self
- end
-
- # (see Operator::Transform#_tuple2tuple)
- def _tuple2tuple(tuple)
- tuple_collect(tuple){|k,v| [@renaming[k] || k, v]}
- end
-
- end # class Rename
-
- #
- # Relational restriction (aka where, predicate filtering)
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [OPERAND] -- EXPR
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 VAL1 ...
- #
- # API & EXAMPLE
- #
- # # Restrict to suppliers with status greater than 20
- # (restrict :suppliers, lambda{ status > 20 })
- #
- # # Restrict to suppliers that live in London
- # (restrict :suppliers, lambda{ city == 'London' })
- #
- # DESCRIPTION
- #
- # This command restricts tuples to those for which EXPR evaluates to true.
- # EXPR must be a valid tuple expression that should return a truth-value.
- # When used in shell, the predicate is taken as a string and compiled with
- # TupleHandle.compile. We also provide a shortcut for equality expressions.
- # Note that, in that case, values are expected to be ruby code literals,
- # evaluated with Kernel.eval. Therefore, strings must be doubly quoted.
- #
- # alf restrict suppliers -- "status > 20"
- # alf restrict suppliers -- city "'London'"
- #
- class Restrict < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Unary
-
- # Restriction predicate
- attr_accessor :predicate
-
- # Builds a Restrict operator instance
- def initialize(predicate = "true")
- @predicate = TupleHandle.compile(predicate)
- yield self if block_given?
- end
-
- protected
-
- # (see Operator::CommandMethods#set_args)
- def set_args(args)
- @predicate = if args.size > 1
- TupleHandle.compile tuple_collect(args.each_slice(2)){|a,expr|
- [a, Kernel.eval(expr)]
- }
- else
- TupleHandle.compile(args.first)
- end
- self
- end
-
- # (see Operator#_each)
- def _each
- handle = TupleHandle.new
- each_input_tuple{|t| yield(t) if handle.set(t).evaluate(@predicate) }
- end
-
- end # class Restrict
-
- #
- # Relational join (and cross-join)
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [LEFT] RIGHT
- #
- # API & EXAMPLE
- #
- # (join :suppliers, :parts)
- #
- # DESCRIPTION
- #
- # This operator computes the (natural) join of two input iterators. Natural
- # join means that, unlike what is commonly used in SQL, the default behavior
- # is to join on common attributes. You can use the rename operator if this
- # behavior does not fit your needs.
- #
- # alf join suppliers supplies
- #
- class Join < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Shortcut, Operator::Binary
-
+ #
+ # Yields the block with each operator module in turn
#
- # Performs a Join of two relations through a Hash buffer on the right
- # one.
- #
- class HashBased
- include Operator::Binary
-
- #
- # Implements a special Buffer for join-based relational operators.
- #
- # Example:
- #
- # buffer = Buffer::Join.new(...) # pass the right part of the join
- # left.each do |left_tuple|
- # key, rest = buffer.split(tuple)
- # buffer.each(key) do |right_tuple|
- # #
- # # do whatever you want with left and right tuples
- # #
- # end
- # end
- #
- class JoinBuffer
-
- #
- # Creates a buffer instance with the right part of the join.
- #
- # @param [Iterator] enum a tuple iterator, right part of the join.
- #
- def initialize(enum)
- @buffer = nil
- @key = nil
- @enum = enum
- end
-
- #
- # Splits a left tuple according to the common key.
- #
- # @param [Hash] tuple a left tuple of the join
- # @return [Array] an array of two elements, the key and the rest
- # @see ProjectionKey#split
- #
- def split(tuple)
- _init(tuple) unless @key
- @key.split(tuple)
- end
-
- #
- # Yields each right tuple that matches a given key value.
- #
- # @param [Hash] key a tuple that matches elements of the common key
- # (typically the first element returned by #split)
- #
- def each(key)
- @buffer[key].each(&Proc.new) if @buffer.has_key?(key)
- end
-
- private
-
- # Initialize the buffer with a right tuple
- def _init(right)
- @buffer = Hash.new{|h,k| h[k] = []}
- @enum.each do |left|
- @key = Tools::ProjectionKey.coerce(left.keys & right.keys) unless @key
- @buffer[@key.project(left)] << left
- end
- @key = Tools::ProjectionKey.coerce([]) unless @key
- end
-
- end # class JoinBuffer
-
- protected
-
- # (see Operator#_each)
- def _each
- buffer = JoinBuffer.new(right)
- left.each do |left_tuple|
- key, rest = buffer.split(left_tuple)
- buffer.each(key) do |right|
- yield(left_tuple.merge(right))
- end
- end
+ def self.each
+ constants.each do |c|
+ val = const_get(c)
+ yield(val) if val.ancestors.include?(Operator::Relational)
end
-
end
-
- protected
-
- # (see Shortcut#longexpr)
- def longexpr
- chain HashBased.new,
- datasets
- end
-
- end # class Join
- #
- # Relational intersection (aka a logical and)
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [LEFT] RIGHT
- #
- # API & EXAMPLE
- #
- # # Give suppliers that live in Paris and have status >= 20
- # (intersect \\
- # (restrict :suppliers, lambda{ status >= 20 }),
- # (restrict :suppliers, lambda{ city == 'Paris' }))
- #
- # DESCRIPTION
- #
- # This operator computes the intersection between its two operands. The
- # intersection is simply the set of common tuples between them. Both operands
- # must have the same heading.
- #
- # alf intersect ... ...
- #
- class Intersect < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Shortcut, Operator::Binary
-
- class HashBased
- include Operator::Binary
-
- protected
-
- def _prepare
- @index = Hash.new
- right.each{|t| @index[t] = true}
- end
-
- def _each
- left.each do |left_tuple|
- yield(left_tuple) if @index.has_key?(left_tuple)
- end
- end
-
- end
-
- protected
-
- # (see Shortcut#longexpr)
- def longexpr
- chain HashBased.new,
- datasets
- end
-
- end # class Intersect
-
- #
- # Relational minus (aka difference)
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [LEFT] RIGHT
- #
- # API & EXAMPLE
- #
- # # Give all suppliers but those living in Paris
- # (minus :suppliers,
- # (restrict :suppliers, lambda{ city == 'Paris' }))
- #
- # DESCRIPTION
- #
- # This operator computes the difference between its two operands. The
- # difference is simply the set of tuples in left operands non shared by
- # the right one.
- #
- # alf minus ... ...
- #
- class Minus < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Shortcut, Operator::Binary
-
- class HashBased
- include Operator::Binary
-
- protected
-
- def _prepare
- @index = Hash.new
- right.each{|t| @index[t] = true}
- end
-
- def _each
- left.each do |left_tuple|
- yield(left_tuple) unless @index.has_key?(left_tuple)
- end
- end
-
- end
-
- protected
-
- # (see Shortcut#longexpr)
- def longexpr
- chain HashBased.new,
- datasets
- end
-
- end # class Minus
-
- #
- # Relational union
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [LEFT] RIGHT
- #
- # API & EXAMPLE
- #
- # (union (project :suppliers, [:city]),
- # (project :parts, [:city]))
- #
- # DESCRIPTION
- #
- # This operator computes the union join of two input iterators. Input
- # iterators should have the same heading. The result never contain duplicates.
- #
- # alf union ... ...
- #
- class Union < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Shortcut, Operator::Binary
-
- class DisjointBased
- include Operator::Binary
-
- protected
-
- def _each
- left.each(&Proc.new)
- right.each(&Proc.new)
- end
-
- end
-
- protected
-
- # (see Shortcut#longexpr)
- def longexpr
- chain Operator::NonRelational::Compact.new,
- DisjointBased.new,
- datasets
- end
-
- end # class Union
-
- #
- # Relational matching
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [LEFT] RIGHT
- #
- # API & EXAMPLE
- #
- # (matching :suppliers, :supplies)
- #
- # DESCRIPTION
- #
- # This operator restricts left tuples to those for which there exists at
- # least one right tuple that joins. This is a shortcut operator for the
- # longer expression:
- #
- # (project (join xxx, yyy), [xxx's attributes])
- #
- # In shell:
- #
- # alf matching suppliers supplies
- #
- class Matching < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Shortcut, Operator::Binary
-
- #
- # Performs a Matching of two relations through a Hash buffer on the right
- # one.
- #
- class HashBased
- include Operator::Binary
-
- # (see Operator#_each)
- def _each
- seen, key = nil, nil
- left.each do |left_tuple|
- seen ||= begin
- h = Hash.new
- right.each do |right_tuple|
- key ||= Tools::ProjectionKey.coerce(left_tuple.keys & right_tuple.keys)
- h[key.project(right_tuple)] = true
- end
- key ||= Tools::ProjectionKey.coerce([])
- h
- end
- yield(left_tuple) if seen.has_key?(key.project(left_tuple))
- end
- end
-
- end # class HashBased
-
- protected
-
- # (see Shortcut#longexpr)
- def longexpr
- chain HashBased.new,
- datasets
- end
-
- end # class Matching
-
- #
- # Relational not matching
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [LEFT] RIGHT
- #
- # API & EXAMPLE
- #
- # (not_matching :suppliers, :supplies)
- #
- # DESCRIPTION
- #
- # This operator restricts left tuples to those for which there does not
- # exist any right tuple that joins. This is a shortcut operator for the
- # longer expression:
- #
- # (minus xxx, (matching xxx, yyy))
- #
- # In shell:
- #
- # alf not-matching suppliers supplies
- #
- class NotMatching < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Shortcut, Operator::Binary
-
- #
- # Performs a NotMatching of two relations through a Hash buffer on the
- # right one.
- #
- class HashBased
- include Operator::Binary
-
- # (see Operator#_each)
- def _each
- seen, key = nil, nil
- left.each do |left_tuple|
- seen ||= begin
- h = Hash.new
- right.each do |right_tuple|
- key ||= Tools::ProjectionKey.coerce(left_tuple.keys & right_tuple.keys)
- h[key.project(right_tuple)] = true
- end
- key ||= Tools::ProjectionKey.coerce([])
- h
- end
- yield(left_tuple) unless seen.has_key?(key.project(left_tuple))
- end
- end
-
- end # class HashBased
-
- protected
-
- # (see Shortcut#longexpr)
- def longexpr
- chain HashBased.new,
- datasets
- end
-
- end # class NotMatching
+ end # module Relational
- #
- # Relational wraping (tuple-valued attributes)
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
- #
- # API & EXAMPLE
- #
- # (wrap :suppliers, [:city, :status], :loc_and_status)
- #
- # DESCRIPTION
- #
- # This operator wraps attributes ATTR1 to ATTRN as a new, tuple-based
- # attribute whose name is NEWNAME. When used in shell, names of wrapped
- # attributes are taken from commandline arguments, expected the last one
- # which defines the new name to use:
- #
- # alf wrap suppliers -- city status loc_and_status
- #
- class Wrap < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Transform
-
- # Array of wraping attributes
- attr_accessor :attributes
-
- # New name for the wrapped attribute
- attr_accessor :as
-
- # Builds a Wrap operator instance
- def initialize(attributes = [], as = :wrapped)
- @attributes = attributes
- @as = as
- end
-
- protected
-
- # (see Operator::CommandMethods#set_args)
- def set_args(args)
- @as = args.pop.to_sym
- @attributes = args.collect{|a| a.to_sym}
- self
- end
-
- # (see Operator::Transform#_tuple2tuple)
- def _tuple2tuple(tuple)
- others = tuple_collect(tuple.keys - @attributes){|k| [k,tuple[k]] }
- others[as] = tuple_collect(attributes){|k| [k, tuple[k]] }
- others
- end
-
- end # class Wrap
-
- #
- # Relational un-wraping (inverse of wrap)
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [OPERAND] -- ATTR
- #
- # API & EXAMPLE
- #
- # # Assuming wrapped = (wrap :suppliers, [:city, :status], :loc_and_status)
- # (unwrap wrapped, :loc_and_status)
- #
- # DESCRIPTION
- #
- # This operator unwraps the tuple-valued attribute named ATTR so as to
- # flatten its pairs with 'upstream' tuple. The latter should be such so that
- # no name collision occurs. When used in shell, the name of the attribute to
- # unwrap is taken as the first commandline argument:
- #
- # alf unwrap wrap -- loc_and_status
- #
- class Unwrap < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Transform
-
- # Name of the attribute to unwrap
- attr_accessor :attribute
-
- # Builds a Rename operator instance
- def initialize(attribute = :wrapped)
- @attribute = attribute
- end
-
- protected
-
- # (see Operator::CommandMethods#set_args)
- def set_args(args)
- @attribute = args.first.to_sym
- self
- end
-
- # (see Operator::Transform#_tuple2tuple)
- def _tuple2tuple(tuple)
- tuple = tuple.dup
- wrapped = tuple.delete(@attribute) || {}
- tuple.merge(wrapped)
- end
-
- end # class Unwrap
-
- #
- # Relational grouping (relation-valued attributes)
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
- #
- # API & EXAMPLE
- #
- # (group :supplies, [:pid, :qty], :supplying)
- # (group :supplies, [:sid], :supplying, true)
- #
- # DESCRIPTION
- #
- # This operator groups attributes ATTR1 to ATTRN as a new, relation-valued
- # attribute whose name is NEWNAME. When used in shell, names of grouped
- # attributes are taken from commandline arguments, expected the last one
- # which defines the new name to use:
- #
- # alf group supplies -- pid qty supplying
- # alf group supplies --allbut -- sid supplying
- #
- class Group < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Unary
-
- # Attributes on which grouping applies
- attr_accessor :attributes
-
- # Attribute name for grouping tuple
- attr_accessor :as
-
- # Group all but attributes?
- attr_accessor :allbut
-
- # Creates a Group instance
- def initialize(attributes = [], as = :group, allbut = false)
- @attributes = attributes
- @as = as
- @allbut = allbut
- end
-
- options do |opt|
- opt.on('--allbut', "Group all but specified attributes"){ @allbut = true }
- end
-
- protected
-
- # (see Operator::CommandMethods#set_args)
- def set_args(args)
- @as = args.pop.to_sym
- @attributes = args.collect{|a| a.to_sym}
- self
- end
-
- # See Operator#_prepare
- def _prepare
- pkey = ProjectionKey.new(attributes, !allbut)
- @index = Hash.new{|h,k| h[k] = Set.new}
- each_input_tuple do |tuple|
- key, rest = pkey.split(tuple)
- @index[key] << rest
- end
- end
-
- # See Operator#_each
- def _each
- @index.each_pair do |k,v|
- yield(k.merge(@as => Relation.coerce(v)))
- end
- end
-
- end # class Group
-
- #
- # Relational un-grouping (inverse of group)
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [OPERAND] -- ATTR
- #
- # API & EXAMPLE
- #
- # # Assuming grouped = (group enum, [:pid, :qty], :supplying)
- # (ungroup grouped, :supplying)
- #
- # DESCRIPTION
- #
- # This operator ungroups the relation-valued attribute named ATTR and outputs
- # tuples as the flattening of each of of its tuples merged with the upstream
- # one. Sub relation should be such so that no name collision occurs. When
- # used in shell, the name of the attribute to ungroup is taken as the first
- # commandline argument:
- #
- # alf ungroup group -- supplying
- #
- class Ungroup < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Unary
-
- # Relation-value attribute to ungroup
- attr_accessor :attribute
-
- # Creates a Group instance
- def initialize(attribute = :grouped)
- @attribute = attribute
- end
-
- protected
-
- # (see Operator::CommandMethods#set_args)
- def set_args(args)
- @attribute = args.pop.to_sym
- self
- end
-
- # See Operator#_each
- def _each
- each_input_tuple do |tuple|
- tuple = tuple.dup
- subrel = tuple.delete(@attribute)
- subrel.each do |subtuple|
- yield(tuple.merge(subtuple))
- end
- end
- end
-
- end # class Ungroup
-
- #
- # Relational summarization (group-by + aggregate ops)
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [OPERAND] [--allbut] --by=KEY1,KEY2... -- AGG1 EXPR1...
- #
- # OPTIONS
- # #{summarized_options}
- #
- # API & EXAMPLE
- #
- # (summarize :supplies, [:sid],
- # :total_qty => Aggregator.sum(:qty))
- #
- # # Or, to specify an allbut projection
- # (summarize :supplies, [:qty, :pid],
- # :total_qty => Aggregator.sum(:qty), true)
- #
- # DESCRIPTION
- #
- # This operator summarizes input tuples on the projection on KEY1,KEY2,...
- # attributes and applies aggregate operators on sets of matching tuples.
- # Introduced names AGG should be disjoint from KEY attributes.
- #
- # When used in shell, the aggregations are taken from commandline arguments
- # AGG and EXPR, where AGG is the name of a new attribute and EXPR is an
- # aggregation expression evaluated on Aggregator:
- #
- # alf summarize supplies --by=sid -- total_qty "sum(:qty)"
- # alf summarize supplies --allbut --by=pid,qty -- total_qty "sum(:qty)"
- #
- class Summarize < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Shortcut, Operator::Unary
-
- # By attributes
- attr_accessor :by
-
- # Allbut on by?
- attr_accessor :allbut
-
- # Aggregations as a AGG => Aggregator(EXPR) hash
- attr_accessor :aggregators
-
- def initialize(by = [], aggregators = {}, allbut = false)
- @by = by
- @allbut = allbut
- @aggregators = aggregators
- end
-
- # Installs the options
- options do |opt|
- opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
- @by = args.collect{|a| a.to_sym}
- end
- opt.on('--allbut', 'Make an allbut projection/summarization') do
- @allbut = true
- end
- end
-
- # Summarizes according to a complete order
- class SortBased
- include Alf::Operator::Cesure
-
- attr_reader :cesure_key
- attr_reader :aggregators
-
- def initialize(by_key, aggregators)
- @cesure_key, @aggregators = by_key, aggregators
- end
-
- protected
-
- def start_cesure(key, receiver)
- @aggs = tuple_collect(@aggregators) do |a,agg|
- [a, agg.least]
- end
- end
-
- def accumulate_cesure(tuple, receiver)
- @aggs = tuple_collect(@aggregators) do |a,agg|
- [a, agg.happens(@aggs[a], tuple)]
- end
- end
-
- def flush_cesure(key, receiver)
- @aggs = tuple_collect(@aggregators) do |a,agg|
- [a, agg.finalize(@aggs[a])]
- end
- receiver.call key.merge(@aggs)
- end
-
- end # class SortBased
+ end # module Operator
- # Summarizes in-memory with a hash
- class HashBased
- include Operator::Relational, Operator::Unary
-
- attr_reader :by_key
- attr_reader :aggregators
-
- def initialize(by_key, aggregators)
- @by_key, @aggregators = by_key, aggregators
- end
-
- protected
-
- def _each
- index = Hash.new do |h,k|
- h[k] = tuple_collect(@aggregators) do |a,agg|
- [a, agg.least]
- end
- end
- each_input_tuple do |tuple|
- key, rest = by_key.split(tuple)
- index[key] = tuple_collect(@aggregators) do |a,agg|
- [a, agg.happens(index[key][a], tuple)]
- end
- end
- index.each_pair do |key,aggs|
- aggs = tuple_collect(@aggregators) do |a,agg|
- [a, agg.finalize(aggs[a])]
- end
- yield key.merge(aggs)
- end
- end
-
- end
-
- protected
-
- # (see Operator::CommandMethods#set_args)
- def set_args(args)
- @aggregators = tuple_collect(args.each_slice(2)) do |a,expr|
- [a.to_sym, Aggregator.compile(expr)]
- end
- self
- end
-
- def longexpr
- if @allbut
- by_key = Tools::ProjectionKey.new(@by, @allbut)
- chain HashBased.new(by_key, @aggregators),
- datasets
- else
- by_key = Tools::ProjectionKey.new(@by, @allbut)
- chain SortBased.new(by_key, @aggregators),
- Operator::NonRelational::Sort.new(by_key.to_ordering_key),
- datasets
- end
- end
-
- end # class Summarize
-
- #
- # Relational ranking (explicit tuple positions)
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [OPERAND] --order=OR1... -- [RANKNAME]
- #
- # OPTIONS
- # #{summarized_options}
- #
- # API & EXAMPLE
- #
- # # Position attribute => # of tuples with smaller weight
- # (rank :parts, [:weight], :position)
- #
- # # Position attribute => # of tuples with greater weight
- # (rank :parts, [[:weight, :desc]], :position)
- #
- # DESCRIPTION
- #
- # This operator computes the ranking of input tuples, according to an order
- # relation. Precisely, it extends the input tuples with a RANKNAME attribute
- # whose value is the number of tuples which are considered strictly less
- # according to the specified order. For the two examples above:
- #
- # alf rank parts --order=weight -- position
- # alf rank parts --order=weight,desc -- position
- #
- # Note that, unless the ordering key includes a candidate key for the input
- # relation, the newly RANKNAME attribute is not necessarily a candidate key
- # for the output one. In the example above, adding the :pid attribute
- # ensured that position will contain all different values:
- #
- # alf rank parts --order=weight,pid -- position
- #
- # Or even:
- #
- # alf rank parts --order=weight,desc,pid,asc -- position
- #
- class Rank < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Shortcut, Operator::Unary
-
- # Ranking order
- attr_accessor :order
-
- # Ranking attribute name
- attr_accessor :ranking_name
-
- def initialize(order = [], ranking_name = :rank)
- @order, @ranking_name = order, ranking_name
- end
-
- options do |opt|
- opt.on('--order=x,y,z', 'Specify ranking order', Array) do |args|
- @order = args.collect{|a| a.to_sym}
- end
- end
-
- class SortBased
- include Operator::Cesure
-
- def initialize(order, ranking_name)
- @order, @ranking_name = order, ranking_name
- end
-
- def ordering_key
- OrderingKey.coerce @order
- end
-
- def cesure_key
- ProjectionKey.coerce(ordering_key)
- end
-
- def start_cesure(key, receiver)
- @rank ||= 0
- @last_block = 0
- end
-
- def accumulate_cesure(tuple, receiver)
- receiver.call tuple.merge(@ranking_name => @rank)
- @last_block += 1
- end
-
- def flush_cesure(key, receiver)
- @rank += @last_block
- end
-
- end # class SortBased
-
- protected
-
- # (see Operator::CommandMethods#set_args)
- def set_args(args)
- unless args.empty?
- self.ranking_name = args.first.to_sym
- end
- self
- end
-
- def ordering_key
- OrderingKey.coerce @order
- end
-
- def longexpr
- sort_key = ordering_key
- chain SortBased.new(sort_key, @ranking_name),
- Operator::NonRelational::Sort.new(sort_key),
- datasets
- end
-
- end # class Rank
-
- #
- # Relational quota-queries (position, sum progression, etc.)
- #
- # SYNOPSIS
- # #{program_name} #{command_name} [OPERAND] --by=KEY1,... --order=OR1... AGG1 EXPR1...
- #
- # OPTIONS
- # #{summarized_options}
- #
- # API & EXAMPLE
- #
- # (quota :supplies, [:sid], [:qty],
- # :position => Aggregator.count,
- # :sum_qty => Aggregator.sum(:qty))
- #
- # DESCRIPTION
- #
- # This operator computes quota values on input tuples.
- #
- # alf quota supplies --by=sid --order=qty -- position count sum_qty "sum(:qty)"
- #
- class Quota < Factory::Operator(__FILE__, __LINE__)
- include Operator::Relational, Operator::Experimental,
- Operator::Shortcut, Operator::Unary
-
- # Quota by
- attr_accessor :by
-
- # Quota order
- attr_accessor :order
-
- # Quota aggregations
- attr_accessor :aggregators
-
- def initialize(by = [], order = [], aggregators = {})
- @by, @order, @aggregators = by, order, aggregators
- end
-
- options do |opt|
- opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
- @by = args.collect{|a| a.to_sym}
- end
- opt.on('--order=x,y,z', 'Specify order attributes', Array) do |args|
- @order = args.collect{|a| a.to_sym}
- end
- end
-
- class SortBased
- include Operator::Cesure
-
- def initialize(by, order, aggregators)
- @by, @order, @aggregators = by, order, aggregators
- end
-
- def cesure_key
- ProjectionKey.coerce @by
- end
-
- def ordering_key
- OrderingKey.coerce @order
- end
-
- def start_cesure(key, receiver)
- @aggs = tuple_collect(@aggregators) do |a,agg|
- [a, agg.least]
- end
- end
-
- def accumulate_cesure(tuple, receiver)
- @aggs = tuple_collect(@aggregators) do |a,agg|
- [a, agg.happens(@aggs[a], tuple)]
- end
- thisone = tuple_collect(@aggregators) do |a,agg|
- [a, agg.finalize(@aggs[a])]
- end
- receiver.call tuple.merge(thisone)
- end
-
- end # class SortBased
-
- protected
-
- # (see Operator::CommandMethods#set_args)
- def set_args(args)
- @aggregators = tuple_collect(args.each_slice(2)) do |a,expr|
- [a.to_sym, Aggregator.compile(expr)]
- end
- self
- end
-
- def cesure_key
- ProjectionKey.coerce @by
- end
-
- def ordering_key
- OrderingKey.coerce @order
- end
-
- def longexpr
- sort_key = cesure_key.to_ordering_key + ordering_key
- chain SortBased.new(@by, @order, @aggregators),
- Operator::NonRelational::Sort.new(sort_key),
- datasets
- end
-
- end # class Quota
-
- end
-
#
# Aggregation operator.
#
class Aggregator
-
- # Aggregate options
- attr_reader :options
-
- #
- # Automatically installs factory methods for inherited classes.
- #
- # Example:
- # class Sum < Aggregate # will give a method Aggregator.sum
- # ...
- # end
- # Aggregator.sum(:size) # factor an Sum aggregator on tuple[:size]
- # Aggregator.sum{ size } # idem but works on any tuple expression
- #
- def self.inherited(clazz)
- basename = Tools.ruby_case(Tools.class_name(clazz))
- instance_eval <<-EOF
- def #{basename}(*args, &block)
- #{clazz}.new(*args, &block)
- end
- EOF
- end
-
- def self.compile(expr, &block)
- instance_eval(expr, &block)
- end
-
- #
- # Creates an Aggregator instance.
- #
- # This constructor can be used either by passing an attribute
- # argument or a block that will be evaluated on a TupleHandle
- # instance set on each aggregated tuple.
- #
- # Aggregator.new(:size) # will aggregate on tuple[:size]
- # Aggregator.new{ size * price } # ... on tuple[:size] * tuple[:price]
- #
- def initialize(attribute = nil, options = {}, &block)
- attribute, options = nil, attribute if attribute.is_a?(Hash)
- @handle = Tools::TupleHandle.new
- @options = default_options.merge(options)
- @functor = Tools::TupleHandle.compile(attribute || block)
- end
-
- #
- # Returns the default options to use
- #
- def default_options
- {}
- end
-
- #
- # Returns the least value, which is the one to use on an empty
- # set.
- #
- # This method is intended to be overriden by subclasses; default
- # implementation returns nil.
- #
- def least
- nil
- end
-
- #
- # This method is called on each aggregated tuple and must return
- # an updated _memo_ value. It can be seen as the block typically
- # given to Enumerable.inject.
- #
- # The default implementation collects the pre-value on the tuple
- # and delegates to _happens.
- #
- def happens(memo, tuple)
- _happens(memo, @handle.set(tuple).evaluate(@functor))
- end
-
- #
- # This method finalizes a computation.
- #
- # Argument _memo_ is either _least_ or the result of aggregating
- # through _happens_. The default implementation simply returns
- # _memo_. The method is intended to be overriden for complex
- # aggregations that need statefull information. See Avg for an
- # example
- #
- def finalize(memo)
- memo
- end
-
- #
- # Aggregates over an enumeration of tuples.
- #
- def aggregate(enum)
- finalize(
- enum.inject(least){|memo,tuple|
- happens(memo, tuple)
- })
- end
-
- protected
-
- #
- # @see happens.
- #
- # This method is intended to be overriden and returns _value_
- # by default, making this aggregator a "Last" one...
- #
- def _happens(memo, value)
- value
- end
-
- #
- # Defines a COUNT aggregation operator
- #
- class Count < Aggregator
- def least(); 0; end
- def happens(memo, tuple) memo + 1; end
- end # class Count
-
- #
- # Defines a SUM aggregation operator
- #
- class Sum < Aggregator
- def least(); 0; end
- def _happens(memo, val) memo + val; end
- end # class Sum
-
- #
- # Defines an AVG aggregation operator
- #
- class Avg < Aggregator
- def least(); [0.0, 0.0]; end
- def _happens(memo, val) [memo.first + val, memo.last + 1]; end
- def finalize(memo) memo.first / memo.last end
- end # class Sum
-
- #
- # Defines a MIN aggregation operator
- #
- class Min < Aggregator
- def least(); nil; end
- def _happens(memo, val)
- memo.nil? ? val : (memo < val ? memo : val)
- end
- end # class Min
-
- #
- # Defines a MAX aggregation operator
- #
- class Max < Aggregator
- def least(); nil; end
- def _happens(memo, val)
- memo.nil? ? val : (memo > val ? memo : val)
- end
- end # class Max
-
- #
- # Defines a COLLECT aggregation operator
- #
- class Group < Aggregator
- def initialize(*attrs)
- super(nil, {}){
- Tools.tuple_collect(attrs){|k| [k, self.send(k)] }
- }
- end
- def least(); Set.new; end
- def _happens(memo, val)
- memo << val
- end
- def finalize(memo)
- Relation.coerce memo
- end
- end
-
- #
- # Defines a COLLECT aggregation operator
- #
- class Collect < Aggregator
- def least(); []; end
- def _happens(memo, val)
- memo << val
- end
- end
-
- #
- # Defines a CONCAT aggregation operator
- #
- class Concat < Aggregator
- def least(); ""; end
- def default_options
- {:before => "", :after => "", :between => ""}
- end
- def _happens(memo, val)
- memo << options[:between].to_s unless memo.empty?
- memo << val.to_s
- end
- def finalize(memo)
- options[:before].to_s + memo + options[:after].to_s
- end
- end
-
+ require 'alf/aggregator/class_methods'
+ require 'alf/aggregator/base'
+ require 'alf/aggregator/aggregators'
+
end # class Aggregator
#
# Base class for implementing buffers.
#
class Buffer
-
- #
- # Keeps tuples ordered on a specific key
- #
- # Example:
- #
- # sorted = Buffer::Sorted.new OrderingKey.new(...)
- # sorted.add_all(...)
- # sorted.each do |tuple|
- # # tuples are ordered here
- # end
- #
- class Sorted < Buffer
-
- #
- # Creates a buffer instance with an ordering key
- #
- def initialize(ordering_key)
- @ordering_key = ordering_key
- @buffer = []
- end
-
- #
- # Adds all elements of an iterator to the buffer
- #
- def add_all(enum)
- sorter = @ordering_key.sorter
- @buffer = merge_sort(@buffer, enum.to_a.sort(&sorter), sorter)
- end
-
- #
- # (see Buffer#each)
- #
- def each
- @buffer.each(&Proc.new)
- end
-
- private
-
- # Implements a merge sort between two iterators s1 and s2
- def merge_sort(s1, s2, sorter)
- (s1 + s2).sort(&sorter)
- end
-
- end # class Buffer::Sorted
-
+ require 'alf/buffer/sorted'
+
end # class Buffer
#
- # Defines a Heading, that is, a set of attribute (name,domain) pairs.
- #
- class Heading
-
- #
- # Creates a Heading instance
- #
- # @param [Hash] a hash of attribute (name, type) pairs where name is
- # a Symbol and type is a Class
- #
- def self.[](attributes)
- Heading.new(attributes)
- end
-
- # @return [Hash] a (freezed) hash of (name, type) pairs
- attr_reader :attributes
-
- #
- # Creates a Heading instance
- #
- # @param [Hash] a hash of attribute (name, type) pairs where name is
- # a Symbol and type is a Class
- #
- def initialize(attributes)
- @attributes = attributes.dup.freeze
- end
-
- #
- # Returns heading's cardinality
- #
- def cardinality
- attributes.size
- end
- alias :size :cardinality
- alias :count :cardinality
-
- #
- # Returns heading's hash code
- #
- def hash
- @hash ||= attributes.hash
- end
-
- #
- # Checks equality with other heading
- #
- def ==(other)
- other.is_a?(Heading) && (other.attributes == attributes)
- end
- alias :eql? :==
-
- #
- # Converts this heading to a Hash of (name,type) pairs
- #
- def to_hash
- attributes.dup
- end
-
- #
- # Returns a Heading literal
- #
- def to_ruby_literal
- attributes.empty? ?
- "Alf::Heading::EMPTY" :
- "Alf::Heading[#{Myrrha.to_ruby_literal(attributes)[1...-1]}]"
- end
- alias :inspect :to_ruby_literal
-
- EMPTY = Alf::Heading.new({})
- end # class Heading
-
- #
# Defines an in-memory relation data structure.
#
# A relation is a set of tuples; a tuple is a set of attribute (name, value)
# pairs. The class implements such a data structure with full relational
# algebra installed as instance methods.
@@ -3598,154 +331,14 @@
#
# See main Alf documentation about relational operators.
#
class Relation
include Iterator
-
- protected
-
- # @return [Set] the set of tuples
- attr_reader :tuples
-
- public
-
- #
- # Creates a Relation instance.
- #
- # @param [Set] tuples a set of tuples
- #
- def initialize(tuples)
- raise ArgumentError unless tuples.is_a?(Set)
- @tuples = tuples
- end
-
- #
- # Coerces `val` to a relation.
- #
- # Recognized arguments are: Relation (identity coercion), Set of ruby hashes,
- # Array of ruby hashes, Alf::Iterator.
- #
- # @return [Relation] a relation instance for the given set of tuples
- # @raise [ArgumentError] when `val` is not recognized
- #
- def self.coerce(val)
- case val
- when Relation
- val
- when Set
- Relation.new(val)
- when Array
- Relation.new val.to_set
- when Iterator
- Relation.new val.to_set
- else
- raise ArgumentError, "Unable to coerce #{val} to a Relation"
- end
- end
-
- # (see Relation.coerce)
- def self.[](*tuples)
- coerce(tuples)
- end
-
- #
- # (see Iterator#each)
- #
- def each(&block)
- tuples.each(&block)
- end
-
- #
- # Returns relation's cardinality (number of tuples).
- #
- # @return [Integer] relation's cardinality
- #
- def cardinality
- tuples.size
- end
- alias :size :cardinality
- alias :count :cardinality
-
- # Returns true if this relation is empty
- def empty?
- cardinality == 0
- end
-
- #
- # Install the DSL through iteration over defined operators
- #
- Operator::each do |op_class|
- meth_name = Tools.ruby_case(Tools.class_name(op_class)).to_sym
- if op_class.unary?
- define_method(meth_name) do |*args|
- op = op_class.new(*args).pipe(self)
- Relation.coerce(op)
- end
- elsif op_class.binary?
- define_method(meth_name) do |right, *args|
- op = op_class.new(*args).pipe([self, Iterator.coerce(right)])
- Relation.coerce(op)
- end
- else
- raise "Unexpected operator #{op_class}"
- end
- end # Operators::each
-
- alias :+ :union
- alias :- :minus
- # Shortcut for project(attributes, true)
- def allbut(attributes)
- project(attributes, true)
- end
-
- #
- # (see Object#hash)
- #
- def hash
- @tuples.hash
- end
-
- #
- # (see Object#==)
- #
- def ==(other)
- return nil unless other.is_a?(Relation)
- other.tuples == self.tuples
- end
- alias :eql? :==
-
- #
- # Returns a textual representation of this relation
- #
- def to_s
- Alf::Renderer.text(self).execute("")
- end
-
- #
- # Returns an array with all tuples in this relation.
- #
- # @param [Tools::OrderingKey] an optional ordering key (any argument
- # recognized by OrderingKey.coerce is supported here).
- # @return [Array] an array of hashes, in requested order (if specified)
- #
- def to_a(okey = nil)
- okey = Tools::OrderingKey.coerce(okey) if okey
- ary = tuples.to_a
- ary.sort!(&okey.sorter) if okey
- ary
- end
-
- #
- # Returns a literal representation of this relation
- #
- def to_ruby_literal
- "Alf::Relation[" +
- tuples.collect{|t| Myrrha.to_ruby_literal(t)}.join(', ') + "]"
- end
- alias :inspect :to_ruby_literal
-
+ require "alf/relation/class_methods"
+ require "alf/relation/instance_methods"
+
DEE = Relation.coerce([{}])
DUM = Relation.coerce([])
end # class Relation
# Implements a small LISP-like DSL on top of Alf.
@@ -3762,127 +355,14 @@
# conditions are not part of the DSL itself, thus not considered as part of
# the API, and may therefore evolve at any time. In other words, this module
# is not intended to be directly included by third-party classes.
#
module Lispy
+ require 'alf/lispy/instance_methods'
- alias :ruby_extend :extend
-
- # The environment
- attr_accessor :environment
-
- #
- # Compiles a query expression given by a String or a block and returns
- # the result (typically a tuple iterator)
- #
- # Example
- #
- # # with a string
- # op = compile "(restrict :suppliers, lambda{ city == 'London' })"
- #
- # # or with a block
- # op = compile {
- # (restrict :suppliers, lambda{ city == 'London' })
- # }
- #
- # @param [String] expr a Lispy expression to compile
- # @return [Iterator] the iterator resulting from compilation
- #
- def compile(expr = nil, path = nil, &block)
- if expr.nil?
- instance_eval(&block)
- else
- b = _clean_binding
- (path ? Kernel.eval(expr, b, path) : Kernel.eval(expr, b))
- end
- end
-
- #
- # Evaluates a query expression given by a String or a block and returns
- # the result as an in-memory relation (Alf::Relation)
- #
- # Example:
- #
- # # with a string
- # rel = evaluate "(restrict :suppliers, lambda{ city == 'London' })"
- #
- # # or with a block
- # rel = evaluate {
- # (restrict :suppliers, lambda{ city == 'London' })
- # }
- #
- def evaluate(expr = nil, path = nil, &block)
- compile(expr, path, &block).to_rel
- end
-
- #
- # Delegated to the current environment
- #
- # This method returns the dataset associated to a given name. The result
- # may depend on the current environment, but is generally an Iterator,
- # often a Reader instance.
- #
- # @param [Symbol] name name of the dataset to retrieve
- # @return [Iterator] the dataset as an iterator
- # @see Environment#dataset
- #
- def dataset(name)
- raise "Environment not set" unless @environment
- @environment.dataset(name)
- end
-
- # Functional equivalent to Alf::Relation[...]
- def relation(*tuples)
- Relation.coerce(tuples)
- end
-
- #
- # Install the DSL through iteration over defined operators
- #
- Operator::each do |op_class|
- meth_name = Tools.ruby_case(Tools.class_name(op_class)).to_sym
- if op_class.unary?
- define_method(meth_name) do |child, *args|
- child = Iterator.coerce(child, environment)
- op_class.new(*args).pipe(child, environment)
- end
- elsif op_class.binary?
- define_method(meth_name) do |left, right, *args|
- operands = [left, right].collect{|x| Iterator.coerce(x, environment)}
- op_class.new(*args).pipe(operands, environment)
- end
- else
- raise "Unexpected operator #{op_class}"
- end
- end # Operators::each
-
- def allbut(child, attributes)
- (project child, attributes, true)
- end
-
- #
- # Runs a command as in shell.
- #
- # Example:
- #
- # lispy = Alf.lispy(Alf::Environment.examples)
- # op = lispy.run(['restrict', 'suppliers', '--', "city == 'Paris'"])
- #
- def run(argv, requester = nil)
- Alf::Command::Main.new(environment).run(argv, requester)
- end
-
- Agg = Alf::Aggregator
DUM = Relation::DUM
DEE = Relation::DEE
-
- private
-
- def _clean_binding
- binding
- end
-
end # module Lispy
#
# Builds and returns a lispy engine on a specific environment.
#
@@ -3904,7 +384,6 @@
lispy.environment = Environment.coerce(env)
lispy
end
end # module Alf
-require "alf/text"
-require "alf/yaml"
\ No newline at end of file
+require "alf/extra"