lib/picky/sources/db.rb in picky-0.0.5 vs lib/picky/sources/db.rb in picky-0.0.6
- old
+ new
@@ -1,20 +1,60 @@
module Sources
class DB < Base
- attr_reader :select_statement, :database
+ attr_reader :select_statement, :database, :connection_options
- def initialize select_statement, database_adapter
+ def initialize select_statement, with_options = { :file => 'app/db.yml' }
@select_statement = select_statement
- @database = database_adapter
+ @database = create_database_adapter
+ configure with_options
end
+ # Get a configured Database backend.
+ #
+ # Options:
+ # Either
+ # * file => 'some/filename.yml' # With an active record configuration.
+ # Or
+ # * The configuration as a hash.
+ #
+ def create_database_adapter
+ adapter_class = Class.new ActiveRecord::Base
+ adapter_class.abstract_class = true
+ adapter_class
+ end
+
+ # Configure the backend.
+ #
+ # Options:
+ # Either
+ # * file => 'some/filename.yml' # With an active record configuration.
+ # Or
+ # * The configuration as a hash.
+ #
+ def configure options
+ @connection_options = if filename = options[:file]
+ File.open(File.join(PICKY_ROOT, filename)) { |f| YAML::load(f) }
+ else
+ options
+ end
+ self
+ end
+
+ # Connect the backend.
+ #
+ def connect_backend
+ return if PICKY_ENVIRONMENT.to_s == 'test' # TODO Unclean.
+ raise "Database backend not configured" unless connection_options
+ database.establish_connection connection_options
+ end
+
# Take the snapshot.
#
def take_snapshot type
- database.connect
+ connect_backend
origin = snapshot_table_name type
database.connection.execute "DROP TABLE IF EXISTS #{origin}"
database.connection.execute "CREATE TABLE #{origin} AS #{select_statement}"
@@ -27,10 +67,12 @@
end
# Counts all the entries that are used for the index.
#
def count type
+ connect_backend
+
database.connection.select_value("SELECT COUNT(id) FROM #{snapshot_table_name(type)}").to_i
end
# Ok here?
#
@@ -42,26 +84,44 @@
#
# Subclasses should override harvest_statement to define how their data is found.
# Example:
# "SELECT indexed_id, value FROM bla_table st WHERE kind = 'bla'"
#
- def harvest type, field, offset, chunksize
- database.connect
+ def harvest type, field
+ connect_backend
- database.connection.execute harvest_statement_with_offset(type, field, offset, chunksize)
+ (0..count(type)).step(chunksize) do |offset|
+ get_data(type, field, offset).each do |indexed_id, text|
+ next unless text
+ text.force_encoding 'utf-8' # TODO Still needed?
+ yield indexed_id, text
+ end
+ end
end
+ # Override in subclasses.
+ #
+ def chunksize
+ 25_000
+ end
+
+ # Gets database from the backend.
+ #
+ def get_data type, field, offset
+ database.connection.execute harvest_statement_with_offset(type, field, offset)
+ end
+
# Base harvest statement for dbs.
#
def harvest_statement type, field
"SELECT indexed_id, #{field.name} FROM #{snapshot_table_name(type)} st"
end
# Builds a harvest statement for getting data to index.
#
# TODO Use the adapter for this.
#
- def harvest_statement_with_offset type, field, offset, chunksize
+ def harvest_statement_with_offset type, field, offset
statement = harvest_statement type, field
if statement.include? 'WHERE'
statement += ' AND'
else
\ No newline at end of file