lib/picky/sources/db.rb in picky-1.5.2 vs lib/picky/sources/db.rb in picky-1.5.3
- old
+ new
@@ -1,7 +1,7 @@
module Sources
-
+
# Describes a database source. Needs a SELECT statement
# (with id in it), and a file option or the options from an AR config file.
#
# The select statement can be as complicated as you want,
# as long as it has an id in it and as long as it can be
@@ -13,42 +13,48 @@
# Sources::DB.new('SELECT id, title, author, year FROM books', file: 'app/some_db.yml')
# Sources::DB.new('SELECT b.id, b.title, b.author, b.publishing_year as year FROM books b INNER JOIN ON ...', file: 'app/some_db.yml')
# Sources::DB.new('SELECT id, title, author, year FROM books', adapter: 'mysql', host:'localhost', ...)
#
class DB < Base
-
+
# The select statement that was passed in.
#
attr_reader :select_statement
-
+
# The database adapter.
#
attr_reader :database
-
+
# The database connection options that were either passed in or loaded from the given file.
#
- attr_reader :connection_options
-
+ attr_reader :connection_options, :options
+
@@traversal_id = :__picky_id
-
+
def initialize select_statement, options = { file: 'app/db.yml' }
@select_statement = select_statement
@database = create_database_adapter
@options = options
end
-
+
+ def to_s
+ parameters = [select_statement.inspect]
+ parameters << options unless options.empty?
+ %Q{#{self.class.name}(#{parameters.join(', ')})}
+ end
+
# Creates a database adapter for use with this source.
def create_database_adapter # :nodoc:
# TODO Do not use ActiveRecord directly.
#
# TODO Use set_table_name etc.
#
adapter_class = Class.new ActiveRecord::Base
adapter_class.abstract_class = true
adapter_class
end
-
+
# Configure the backend.
#
# Options:
# Either
# * file => 'some/filename.yml' # With an active record configuration.
@@ -61,78 +67,78 @@
else
options
end
self
end
-
+
# Connect the backend.
#
# Will raise unless connection options have been given.
#
def connect_backend
configure @options
raise "Database backend not configured" unless connection_options
database.establish_connection connection_options
end
-
+
# Take a snapshot of the data.
#
# Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
#
def take_snapshot index
connect_backend
-
+
origin = snapshot_table_name index
on_database = database.connection
-
+
# Drop the table if it exists.
#
on_database.drop_table origin if on_database.table_exists?(origin)
-
+
# The adapters currently do not support this.
#
on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
-
+
# Add a column that Picky uses to traverse the table's entries.
#
on_database.add_column origin, @@traversal_id, :primary_key, :null => :false
-
+
# Execute any special queries this index needs executed.
#
on_database.execute index.after_indexing if index.after_indexing
end
-
+
# Counts all the entries that are used for the index.
#
def count index
connect_backend
-
+
database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index)}").to_i
end
-
+
# The name of the snapshot table created by Picky.
#
def snapshot_table_name index
"picky_#{index.name}_index"
end
-
+
# Harvests the data to index in chunks.
#
def harvest index, category, &block
connect_backend
-
+
(0..count(index)).step(chunksize) do |offset|
get_data index, category, offset, &block
end
end
-
+
# Gets the data from the backend.
#
def get_data index, category, offset, &block # :nodoc:
-
+
select_statement = harvest_statement_with_offset index, category, offset
-
+
# TODO Rewrite ASAP.
#
if database.connection.adapter_name == "PostgreSQL"
id_key = 'id'
text_key = category.from.to_s
@@ -144,31 +150,31 @@
database.connection.execute(select_statement).each do |id, text|
yield id, text if text
end
end
end
-
+
# Builds a harvest statement for getting data to index.
#
def harvest_statement_with_offset index, category, offset
statement = harvest_statement index, category
-
+
statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
-
+
"#{statement} st.#{@@traversal_id} > #{offset} LIMIT #{chunksize}"
end
-
+
# The harvest statement used to pull data from the snapshot table.
#
def harvest_statement index, category
"SELECT id, #{category.from} FROM #{snapshot_table_name(index)} st"
end
-
+
# The amount of records that are loaded each chunk.
#
def chunksize
25_000
end
-
+
end
-
+
end
\ No newline at end of file