require 'ostruct' begin require 'rubygems' rescue LoadError; end require 'activerecord' require 'xml/libxml' module VORuby module ActiveVOTable class Callbacks #:nodoc: include XML::SaxParser::Callbacks attr_accessor :base_class, :meta_location, :data_location, :metadata, :row, :data_tables, :fields_tables, :inside_td, :in_error def initialize(base_class) self.base_class = base_class self.row = [] self.metadata = [] self.meta_location = OpenStruct.new(:resource => 0, :table => 0, :field => 0) self.data_location = OpenStruct.new(:resource => 0, :table => 0, :row => 0, :column => 0) self.inside_td = false self.in_error = false end def on_start_element(name, attrs) case name when 'RESOURCE' self.meta_location.resource += 1 self.data_location.resource += 1 self.meta_location.table = 0 self.data_location.table = 0 when 'TABLE' self.meta_location.table += 1 self.meta_location.field = 0 self.data_location.table += 1 self.data_location.row = 0 self.data_location.column = 0 create_db_tables() when 'FIELD' self.meta_location.field += 1 self.metadata << attrs when 'DATA' create_data_schema() # create the table and make it available create_metadata_table() when 'TABLEDATA' self.base_class.connection.begin_db_transaction() when 'TR' self.data_location.resource = self.meta_location.resource self.data_location.table = self.meta_location.table self.data_location.row += 1 self.data_location.column = 0 self.row.clear when 'TD' self.data_location.column += 1 self.inside_td = true when 'INFO' self.in_error = true if attrs['name'] == 'QUERY_STATUS' and attrs['value'] == 'ERROR' end end def on_end_element(name) case name when 'TABLE' self.metadata.clear when 'TABLEDATA' self.base_class.connection.commit_db_transaction() when 'TR' create_record() when 'TD' self.inside_td = false when 'INFO' self.in_error = false end end def on_characters(chars) raise "Query status error: #{chars}" if self.in_error self.row << chars if self.inside_td end def data_table_name "#{self.base_class.table_name}_#{Base::DATA_ID}_#{self.data_location.resource}_#{self.data_location.table}" end def fields_table_name "#{self.base_class.table_name}_#{Base::SCHEMA_ID}_#{self.meta_location.resource}_#{self.meta_location.table}" end private def create_db_tables self.base_class.connection.create_table(data_table_name()){} self.base_class.connection.create_table(fields_table_name()){} end def create_data_schema # A couple additional columns representing the position of the table in the votable as a whole. self.base_class.connection.add_column(self.data_table_name, :resource_num, :integer, :null => false, :default => 1) self.base_class.connection.add_column(self.data_table_name, :table_num, :integer, :null => false, :default => 1) # The columns in the votable table itself. self.metadata.each do |md| column_name = (md['name'] || md['ID'] || '').downcase.gsub(/\W+/, '_') #column_name = 'record_type' if column_name == 'type' # 'type' triggers ActiveRecord's self.base_class.connection.add_column( self.data_table_name, column_name, db_data_type(md['datatype'], md['arraysize']) ) end # Add a simple index. self.base_class.connection.add_index(self.data_table_name, [:resource_num, :table_num]) end def create_metadata_table # A couple additional columns representing the position of the table in the votable as a whole. self.base_class.connection.add_column(self.fields_table_name, :resource_num, :integer, :null => false, :default => 1) self.base_class.connection.add_column(self.fields_table_name, :table_num, :integer, :null => false, :default => 1) # The columns defined by the votable table fields itself. self.base_class.connection.add_column(self.fields_table_name, :vid, :string) self.base_class.connection.add_column(self.fields_table_name, :unit, :string) self.base_class.connection.add_column(self.fields_table_name, :datatype, :string) self.base_class.connection.add_column(self.fields_table_name, :precision, :string) self.base_class.connection.add_column(self.fields_table_name, :width, :integer) self.base_class.connection.add_column(self.fields_table_name, :ref, :string) self.base_class.connection.add_column(self.fields_table_name, :name, :string) self.base_class.connection.add_column(self.fields_table_name, :ucd, :string) self.base_class.connection.add_column(self.fields_table_name, :utype, :string) self.base_class.connection.add_column(self.fields_table_name, :arraysize, :string) self.base_class.connection.add_column(self.fields_table_name, :type, :string) # A simple index. self.base_class.connection.add_index(self.fields_table_name, [:resource_num, :table_num]) # and its columns column_names = [ self.base_class.connection.quote_column_name('resource_num'), self.base_class.connection.quote_column_name('table_num'), self.base_class.connection.quote_column_name('vid'), self.base_class.connection.quote_column_name('unit'), self.base_class.connection.quote_column_name('datatype'), self.base_class.connection.quote_column_name('precision'), self.base_class.connection.quote_column_name('width'), self.base_class.connection.quote_column_name('ref'), self.base_class.connection.quote_column_name('name'), self.base_class.connection.quote_column_name('ucd'), self.base_class.connection.quote_column_name('utype'), self.base_class.connection.quote_column_name('arraysize'), self.base_class.connection.quote_column_name('type') ] self.base_class.connection.begin_db_transaction() columns = self.base_class.connection.columns(self.fields_table_name) self.metadata.each do |md| # quote each value column_values = [ self.base_class.connection.quote(self.meta_location.resource, columns.find{ |c| c.name == 'resource_num'}), self.base_class.connection.quote(self.meta_location.table, columns.find{ |c| c.name == 'table_num' }), self.base_class.connection.quote(md['ID'], columns.find{ |c| c.name == 'vid'}), self.base_class.connection.quote(md['unit'], columns.find{ |c| c.name == 'unit'}), self.base_class.connection.quote(md['datatype'], columns.find{ |c| c.name == 'datatype'}), self.base_class.connection.quote(md['precision'], columns.find{ |c| c.name == 'precision'}), self.base_class.connection.quote(md['width'], columns.find{ |c| c.name == 'width'}), self.base_class.connection.quote(md['ref'], columns.find{ |c| c.name == 'ref'}), self.base_class.connection.quote(md['name'], columns.find{ |c| c.name == 'name'}), self.base_class.connection.quote(md['ucd'], columns.find{ |c| c.name == 'ucd'}), self.base_class.connection.quote(md['utype'], columns.find{ |c| c.name == 'utype'}), self.base_class.connection.quote(md['arraysize'], columns.find{ |c| c.name == 'arraysize'}), self.base_class.connection.quote(md['type'], columns.find{ |c| c.name == 'type'}) ] column_names = columns.reject{ |c| c.name == 'id' }.collect{ |c| self.base_class.connection.quote_column_name(c.name) } self.base_class.connection.insert( "INSERT INTO #{self.base_class.connection.quote_table_name(self.fields_table_name)} (#{column_names.join(', ')}) VALUES (#{column_values.join(', ')})" ) end self.base_class.connection.commit_db_transaction() end def create_record column_names = [ self.base_class.connection.quote_column_name('resource_num'), self.base_class.connection.quote_column_name('table_num') ] column_values = [ self.base_class.connection.quote(self.data_location.resource), self.base_class.connection.quote(self.data_location.table) ] # The first three columns are id (which is auto-incremented), # resource_num and table_num (which we've taken care of above). self.base_class.connection.columns(self.data_table_name)[3..-1].each_with_index { |c, i| column_values << self.base_class.connection.quote(self.row[i], c) column_names << self.base_class.connection.quote_column_name(c.name) } self.base_class.connection.insert( "INSERT INTO #{self.base_class.connection.quote_table_name(self.data_table_name)} (#{column_names.join(', ')}) VALUES (#{column_values.join(', ')})" ) end def db_data_type(xml_datatype, xml_arraysize=nil) return :string if xml_arraysize # cop out if the arraysize is specified case xml_datatype when 'boolean' then :boolean when 'bit' then :integer when 'unsignedByte' then :text when 'short' then :integer when 'int' then :integer when 'log' then :float when 'char' then :string when 'unicodeChar' then :string when 'float' then :float when 'double' then :float when 'floatComplex' then :string when 'doubleComplex' then :string else :string end end end # Paging methods for data. module Pager DEFAULT_PER_PAGE = 25 def per_page=(n) @per_page = n end def per_page @per_page || DEFAULT_PER_PAGE end def page(p=1, page_size=nil, options={}) options ||= {} options[:limit] = page_size || per_page options[:offset] = options[:limit] * (p - 1) find(:all, options) end def each_page(page_size=nil, options={}) num_per_page = page_size || per_page (1..(count().to_f / num_per_page.to_f).ceil()).each do |n| yield(page(n, num_per_page), n) end end end # Have you ever wished you could treat your VOTable[http://www.ivoa.net/Documents/latest/VOT.html] # as a database? Well, now you can. ActiveVOTable is a package for reading votables into # a relational database. It uses the SAX parser in LibXML[http://libxml.rubyforge.org/] so it can # handle votables of arbitrary size, and it wraps the resulting database tables in ActiveRecord[http://ar.rubyonrails.com/] # so that you can easily execute queries against it. It can even handle votables with multiple tables # in multiple resources. # # ActiveVOTable::Base.logger = Logger.new(STDOUT) # set the logger to output to STDOUT, exactly as in ActiveRecord # ActiveVOTable::Base.establish_connection(:adapter => 'sqlite3', :database => 'votables.sqlite3') # connect to the database # # # read in the file results.vot into a SQLite database # vot = ActiveVOTable::Base.from(:xml, File.new('results.vot'), 'results') # # table = vot.first # a votable can have more than one TABLE element, but we're interested in the first one. # # # every table has a data section and schema section, both of which are accessible # data = table.data # schema = table.schema # # # both data and schemata are ultimately simply subclasses of ActiveRecord::Base and have all the same methods # puts data.find(:first).inspect # # => # puts schema.find(:first).inspect # # => # # # Note that in the above example the #vid method of the ResultSchema11 instance corresponds to the ID attribute of the FIELD # and the #vtype method corresponds to the FIELD's type attribute. # # # when you're done, you can optionally delete any tables hanging around in the database # vot.cleanup() # # # if the tables are already sitting around in the database (perhaps from a previous run)... # vot = ActiveVOTable::Base.from(:db, 'results') # # ActiveVOTable::Base#from and ActiveVOTable::Base#cleanup both take an optional hash of connection parameters # (the same kind ActiveRecord::Base#establish_connection does). This allows you the flexibility to upload # your votables to different databases if desired. So something like this works: # # vot1 = ActiveVOTable::Base.from(:xml, # File.new('results1.vot'), # 'results1', # this *must* be different from below # :adapter => 'sqlite3', :database => 'votables.sqlite3' # ) # # vot2 = ActiveVOTable::Base.from(:xml, # File.new('results2.vot'), # 'results2', # *must* be different from above # :adapter => 'mysql', :host => 'localhost', :username => 'me', :password => 'secret', :database => 'votables' # ) # # vot1.cleanup # vot2.cleanup class Base < ActiveRecord::Base SCHEMA_ID = 'schema' DATA_ID = 'data' def self.table_search_pattern #:nodoc: "_(#{SCHEMA_ID}|#{DATA_ID})_(\\d+)_(\\d+)" end # List the names of all the database tables associated with the # votable in question. And addition regex may be given # to further refine the list, if desired. # # puts vot.dbtables.inspect # # => ['result_data_1_1', 'result_schema_1_1'] def self.dbtables(regex=nil) raise "#{connection.adapter_name} does not support #tables" if !connection.respond_to?(:tables) self.connection.tables.find_all{ |t| base_match = t.match(/^(#{self.table_name()})#{self.table_search_pattern()}$/) regex ? (base_match and t.match(regex)) : base_match }.sort { |a, b| a_matches = a.match(/^(#{self.table_name})#{self.table_search_pattern()}$/) b_matches = b.match(/^(#{self.table_name})#{self.table_search_pattern()}$/) a_matches[1] <=> b_matches[1] and a_matches[2] <=> a_matches[2] and a_matches[3].to_i <=> a_matches[3].to_i and b_matches[4].to_i <=> b_matches[4].to_i } end def self.find_or_create_class(klass_name, subclass=Base) #:nodoc: const_defined?(klass_name) ? const_get(klass_name) : const_set(klass_name, Class.new(subclass)) end # List all the schema objects associated with the votable. # Typically, you'll use #tables instead (which associates # a schema with its corresponding data), but this is occassionally # useful. # # puts vot.schemata.inspect # # => [ActiveVOTable::Base::ResultSchema11] def self.schemata self.dbtables(/_#{SCHEMA_ID}_\d+_\d+$/).collect do |t| schema_klass = self.find_or_create_class(t.classify, self) schema_klass.set_table_name(t) schema_klass end end # List all the data objects associated with the votable. # Typically, you'll use #tables instead (which associates # a schema with its corresponding data), but this is occassionally # useful. # # puts vot.data.inspect # # => [ActiveVOTable::Base::ResultData11] def self.data self.dbtables(/_#{DATA_ID}_\d+_\d+$/).collect do |t| data_klass = self.find_or_create_class(t.classify, self) data_klass.extend(Pager) data_klass.set_table_name(t) data_klass end end # List all the tables associated with the votable. # Each member of the turned array responds to a #schema and # #data method. # # vot.tables.each do |t| # puts t.schema # puts t.data # end def self.tables data_list = self.data list = [] self.schemata.each_with_index do |s, i| list << OpenStruct.new(:schema => s, :data => data_list[i]) end list end # Delete any tables and (optionally) Ruby constants associated with an already existing votable. # # # ActiveVOTable::Base::Result11, ActiveVOTable::Base::Result11Schema # # and ActiveVOTable::Base::Result11Data are still hanging around after this # vot.cleanup # # or... # # # ActiveVOTable::Base::Result11, ActiveVOTable::Base::Result11Schema # # and ActiveVOTable::Base::Result11Data no longer exist... # vot.cleanup(true) # # There is also a block form which allows you access to the classes after their database # tables have been destroyed but before the classes themselves have been disposed of. This # is mostly for testing purposes and very rarely used in real life. # # vot.cleanup(true) do |v| # # the db tables are gone, but I can still play with the classes if I want... # end def self.cleanup(remove_class_on_cleanup=false) self.dbtables.each { |t| self.connection.drop_table(t) } yield self if block_given? superclass.send(:remove_const, self.table_name.classify) if remove_class_on_cleanup and superclass.send(:const_defined?, self.table_name.classify) end # Instantiate a votable from XML. # # +xml+:: May be a string or a File object. # +name+:: A string representing the root names of the tables that will be created in the database. If none is specified the string 'votable' + a timestamp will be used. # +dboptions+:: An optional hash of database connection parameters (exactly as you'd pass to ActiveRecord::Base#establish_connection). Necessary only if ActiveVOTable::Base#establish_connection hasn't been called. # # An array of objects representing the tables in the votables is returned. # Each of these objects has a #data method and a #schema method corresponding # to the TABLEDATA and FIELD elements in the VOTable specification. # # vot = ActiveVOTable::Base.from_xml( # File.new('votable.xml'), # parse the file votable.xml # 'my_votable' # every table created will be prefixed with 'my_votable', # :adapter => 'mysql', :host => 'localhost', :username => 'me', :password => 'secret', :database => 'votables', # stick it into a MySQL database # ) # # vot.each do |table| # puts table.schema.find(:all).inspect # retrieve rich semantic information about each column # puts table.data.find(:all, :conditions => ['ra > ?', 10.2]).inspect # find the actual data # end # # Assuming votable.xml had 2 resources, each with one table (for example) the table structure created # would look like: my_votable_fields_1_1, my_votable_rows_1_1, my_votable_fields_2_1, my_votable_rows_2_1. def self.from_xml(xml, name=nil, conn_params=nil, logger=nil) name ||= "votable_#{DateTime.now.strftime('%Y%m%d%H%M%S%L')}" raise "XML source must be a string or a File object, not '#{xml}'" if !xml.is_a?(String) and !xml.is_a?(File) parser = XML::SaxParser.new xml.is_a?(String) ? parser.string = xml : parser.filename = xml.path k = self.from_database(name, conn_params, logger, false) callbacks = Callbacks.new(k) parser.callbacks = callbacks parser.parse k end # Instantiate a votable from a database. # # Identical to #from_xml, except that it is assumed the appropriate tables already # exist in the database. # # vot = ActiveVOTable::Base.from_database( # 'my_votable' # for those tables that begin with 'my_votable', # :adapter => 'mysql', :host => 'localhost', :username => 'me', :password => 'secret', :database => 'votables', # look in the MySQL database # ) def self.from_database(name, conn_params=nil, logger=nil, tables_must_exist=true) k = self.find_or_create_class(name.classify, Base) k.establish_connection(conn_params) if conn_params k.table_name = name k.logger = logger if logger k.inheritance_column = nil # 'type' is a common votable keyword, so we really want to turn simple inheritance off raise "tables corresponding to '#{name}' do not appear to exist" if tables_must_exist and k.dbtables.size == 0 k end # A convenience method around #from_xml and #from_database. # The connection parameters are only necessary if you haven't previously called # ActiveVOTable::Base#establish_connection. # # ActiveVOTable::Base.from(:xml, # File.new('votable.xml'), # 'my_votable' # :adapter => 'mysql', :host => 'localhost', :username => 'me', :password => 'secret', :database => 'votables', # ) # # ActiveVOTable::Base.from(:db, # 'my_votable', # :adapter => 'mysql', :host => 'localhost', :username => 'me', :password => 'secret', :database => 'votables', # ) def self.from(src, *args) case src when :xml then self.from_xml(*args) when :db then self.from_database(*args) else raise "Source must one of: :xml, :db (not '#{src}')" end end end end end