module Sequel class Dataset # Allows you to join multiple datasets/tables and have the result set # split into component tables. # # This differs from the usual usage of join, which returns the result set # as a single hash. For example: # # # CREATE TABLE artists (id INTEGER, name TEXT); # # CREATE TABLE albums (id INTEGER, name TEXT, artist_id INTEGER); # DB[:artists].left_outer_join(:albums, :artist_id=>:id).first # => {:id=>(albums.id||artists.id), :name=>(albums.name||artist.names), :artist_id=>albums.artist_id} # DB[:artists].graph(:albums, :artist_id=>:id).first # => {:artists=>{:id=>artists.id, :name=>artists.name}, :albums=>{:id=>albums.id, :name=>albums.name, :artist_id=>albums.artist_id}} # # Using a join such as left_outer_join, the attribute names that are shared between # the tables are combined in the single return hash. You can get around that by # using .select with correct aliases for all of the columns, but it is simpler to # use graph and have the result set split for you. In addition, graph respects # any row_proc or transform attributes of the current dataset and the datasets # you use with graph. # # Arguments: # * dataset - Can be a symbol (specifying a table), another dataset, # or an object that responds to .dataset and yields a symbol or a dataset # * join_conditions - A conditions hash that is passed to the join_table method # * options - A hash of graph options. The following options are currently used: # * :table_alias - The alias to use for the table. If not specified, doesn't # alias the table. You will get an error if the the alias (or table) name is # used more than once. # * :join_type - The type of join to use (passed to join_table). Defaults to # :left_outer. # * :select - Whether to select the columns from the you are joining, and # include them as a separate hash in the output. With this set to false, # it is like simply joining the tables. This is designed to be used for # many_to_many join tables, where the columns are just foreign keys to primary # keys in other tables. def graph(dataset, join_conditions, options = {}) # Allow the use of a model, dataset, or symbol as the first argument # Find the table name/dataset based on the argument dataset = dataset.dataset if dataset.respond_to?(:dataset) case dataset when Symbol table = dataset dataset = @db[dataset] when ::Sequel::Dataset table = dataset.first_source else raise Error, "The dataset argument should be a symbol, dataset, or model" end # Raise Sequel::Error with explanation that the table alias has been used raise_alias_error = lambda do raise(Error, "this #{options[:table_alias] ? 'alias' : 'table'} has already been been used, please specify " \ "#{options[:table_alias] ? 'a different alias' : 'an alias via the :table_alias option'}") end # Only allow table aliases that haven't been used table_alias = options[:table_alias] || table raise_alias_error.call if @opts[:graph] && @opts[:graph][:table_aliases] && @opts[:graph][:table_aliases].include?(table_alias) # Join the table early in order to avoid cloning the dataset twice ds = join_table(options[:join_type] || :left_outer, table == table_alias ? table : "#{table} #{table_alias}", join_conditions) opts = ds.opts # Whether to include the table in the result set add_table = options[:select] == false ? false : true # Whether to add the columns to the list of column aliases add_columns = !ds.opts.include?(:graph_aliases) # Setup the initial graph data structure if it doesn't exist unless graph = opts[:graph] master = ds.first_source raise_alias_error.call if master == table_alias # Master hash storing all .graph related information graph = opts[:graph] = {} # Associates column aliases back to tables and columns column_aliases = graph[:column_aliases] = {} # Associates table alias (the master is never aliased) table_aliases = graph[:table_aliases] = {master=>self} # Keep track of the alias numbers used ca_num = graph[:column_alias_num] = {} # All columns in the master table are never # aliased, but are not included if set_graph_aliases # has been used. if add_columns select = (opts[:select] ||= []) columns.each do |column| column_aliases[column] = [master, column] select.push(:"#{master}__#{column}") end end end # Add the table alias to the list of aliases # Even if it isn't been used in the result set, # we add a key for it with a nil value so we can check if it # is used more than once table_aliases = graph[:table_aliases] table_aliases[table_alias] = add_table ? dataset : nil # Add the columns to the selection unless we are ignoring them if add_table && add_columns select = opts[:select] column_aliases = graph[:column_aliases] ca_num = graph[:column_alias_num] # If the column hasn't been used yet, don't alias it. # If it has been used, try table_column. # If that has been used, try table_column_N # using the next value of N that we know hasn't been # used dataset.columns.each do |column| col_alias, c = if column_aliases[column] tc = :"#{table_alias}_#{column}" if column_aliases[tc] if can = ca_num[tc] ca_num[tc] += 1 tc = :"#{tc}_#{can}" else ca_num[tc] = 1 tc = :"#{tc}_0" end end [tc, :"#{table_alias}__#{column}___#{tc}"] else [column, :"#{table_alias}__#{column}"] end column_aliases[col_alias] = [table_alias, column] select.push(c) end end ds end # This allows you to manually specify the graph aliases to use # when using graph. You can use it to only select certain # columns, and have those columns mapped to specific aliases # in the result set. This is the equivalent of .select for a # graphed dataset, and must be used instead of .select whenever # graphing is used. Example: # # DB[:artists].graph(:albums, :artist_id=>:id).set_graph_aliases(:artist_name=>[:artists, :name], :album_name=>[:albums, :name]).first # => {:artists=>{:name=>artists.name}, :albums=>{:name=>albums.name}} # # Arguments: # * graph_aliases - Should be a hash with keys being symbols of # column aliases, and values being arrays with two symbol elements. # The first element of the array should be the table alias, # and the second should be the actual column name. def set_graph_aliases(graph_aliases) ds = select(*graph_aliases.collect{|col_alias, tc| :"#{tc[0]}__#{tc[1]}#{"___#{col_alias}" unless tc[1] == col_alias}"}) ds.opts[:graph_aliases]=graph_aliases ds end private # Fetch the rows, split them into component table parts, # tranform and run the row_proc on each part (if applicable), # and yield a hash of the parts. def graph_each(opts, &block) # Reject tables with nil datasets, as they are excluded from # the result set datasets = @opts[:graph][:table_aliases].to_a.reject{|ta,ds| ds.nil?} # Get just the list of table aliases into a local variable, for speed table_aliases = datasets.collect{|ta,ds| ta} # Get an array of arrays, one for each dataset, with # the necessary information about each dataset, for speed datasets = datasets.collect do |ta, ds| [ta, ds, ds.instance_variable_get(:@transform), ds.row_proc] end # Use the manually set graph aliases, if any, otherwise # use the ones automatically created by .graph column_aliases = @opts[:graph_aliases] || @opts[:graph][:column_aliases] fetch_rows(select_sql(opts)) do |r| graph = {} # Create the sub hashes, one per table table_aliases.each{|ta| graph[ta]={}} # Split the result set based on the column aliases # If there are columns in the result set that are # not in column_aliases, they are ignored column_aliases.each do |col_alias, tc| ta, column = tc graph[ta][column] = r[col_alias] end # For each dataset, transform and run the row # row_proc if applicable datasets.each do |ta,ds,tr,rp| g = graph[ta] g = ds.transform_load(g) if tr g = rp[g] if rp graph[ta] = g end yield graph end self end end end