lib/neo4j/migration.rb in neo4j-4.1.5 vs lib/neo4j/migration.rb in neo4j-5.0.0.rc.1
- old
+ new
@@ -1,5 +1,7 @@
+require 'benchmark'
+
module Neo4j
class Migration
def migrate
fail 'not implemented'
end
@@ -15,11 +17,11 @@
def default_path
Rails.root if defined? Rails
end
def joined_path(path)
- File.join(path, 'db', 'neo4j-migrate')
+ File.join(path.to_s, 'db', 'neo4j-migrate')
end
class AddIdProperty < Neo4j::Migration
attr_reader :models_filename
@@ -39,68 +41,76 @@
end
end
def setup
FileUtils.mkdir_p('db/neo4j-migrate')
- unless File.file?(models_filename)
- File.open(models_filename, 'w') do |file|
- file.write("# Provide models to which IDs should be added.\n# It will only modify nodes that do not have IDs. There is no danger of overwriting data.\n# models: [Student,Lesson,Teacher,Exam]\nmodels: []")
- end
+
+ return if File.file?(models_filename)
+
+ File.open(models_filename, 'w') do |file|
+ message = <<MESSAGE
+# Provide models to which IDs should be added.
+# # It will only modify nodes that do not have IDs. There is no danger of overwriting data.
+# # models: [Student,Lesson,Teacher,Exam]\nmodels: []
+MESSAGE
+ file.write(message)
end
end
private
def add_ids_to(model)
- require 'benchmark'
-
max_per_batch = (ENV['MAX_PER_BATCH'] || default_max_per_batch).to_i
label = model.mapped_label_name
- property = model.primary_key
- nodes_left = 1
last_time_taken = nil
- until nodes_left == 0
- nodes_left = Neo4j::Session.query.match(n: label).where("NOT has(n.#{property})").return('COUNT(n) AS ids').first.ids
+ until (nodes_left = idless_count(label, model.primary_key)) == 0
+ print_status(last_time_taken, max_per_batch, nodes_left)
- time_per_node = last_time_taken / max_per_batch if last_time_taken
- print_output "Running first batch...\r"
- if time_per_node
- eta_seconds = (nodes_left * time_per_node).round
- print_output "#{nodes_left} nodes left. Last batch: #{(time_per_node * 1000.0).round(1)}ms / node (ETA: #{eta_seconds / 60} minutes)\r"
+ count = [nodes_left, max_per_batch].min
+ last_time_taken = Benchmark.realtime do
+ max_per_batch = id_batch_set(label, model.primary_key, count.times.map { new_id_for(model) }, count)
end
+ end
+ end
- return if nodes_left == 0
- to_set = [nodes_left, max_per_batch].min
+ def idless_count(label, id_property)
+ Neo4j::Session.query.match(n: label).where("NOT has(n.#{id_property})").pluck('COUNT(n) AS ids').first
+ end
- new_ids = to_set.times.map { new_id_for(model) }
- begin
- last_time_taken = id_batch_set(label, property, new_ids, to_set)
- rescue Neo4j::Server::CypherResponse::ResponseError, Faraday::TimeoutError
- new_max_per_batch = (max_per_batch * 0.8).round
- output "Error querying #{max_per_batch} nodes. Trying #{new_max_per_batch}"
- max_per_batch = new_max_per_batch
- end
- end
+ def print_status(last_time_taken, max_per_batch, nodes_left)
+ time_per_node = last_time_taken / max_per_batch if last_time_taken
+ message = if time_per_node
+ eta_seconds = (nodes_left * time_per_node).round
+ "#{nodes_left} nodes left. Last batch: #{(time_per_node * 1000.0).round(1)}ms / node (ETA: #{eta_seconds / 60} minutes)\r"
+ else
+ "Running first batch...\r"
+ end
+
+ print_output message
end
- def id_batch_set(label, property, new_ids, to_set)
- Benchmark.realtime do
- begin
- tx = Neo4j::Transaction.new
- Neo4j::Session.query("MATCH (n:`#{label}`) WHERE NOT has(n.#{property})
- with COLLECT(n) as nodes, #{new_ids} as ids
- FOREACH(i in range(0,#{to_set - 1})|
- FOREACH(node in [nodes[i]]|
- SET node.#{property} = ids[i]))
- RETURN distinct(true)
- LIMIT #{to_set}")
- ensure
- tx.close
- end
- end
+
+ def id_batch_set(label, id_property, new_ids, count)
+ tx = Neo4j::Transaction.new
+
+ Neo4j::Session.query("MATCH (n:`#{label}`) WHERE NOT has(n.#{id_property})
+ with COLLECT(n) as nodes, #{new_ids} as ids
+ FOREACH(i in range(0,#{count - 1})|
+ FOREACH(node in [nodes[i]]|
+ SET node.#{id_property} = ids[i]))
+ RETURN distinct(true)
+ LIMIT #{count}")
+
+ count
+ rescue Neo4j::Server::CypherResponse::ResponseError, Faraday::TimeoutError
+ new_max_per_batch = (max_per_batch * 0.8).round
+ output "Error querying #{max_per_batch} nodes. Trying #{new_max_per_batch}"
+ new_max_per_batch
+ ensure
+ tx.close
end
def default_max_per_batch
900
end
@@ -131,13 +141,14 @@
end
def setup
output "Creating file #{classnames_filepath}. Please use this as the migration guide."
FileUtils.mkdir_p('db/neo4j-migrate')
- unless File.file?(classnames_filepath)
- source = File.join(File.dirname(__FILE__), '..', '..', 'config', 'neo4j', classnames_filename)
- FileUtils.copy_file(source, classnames_filepath)
- end
+
+ return if File.file?(classnames_filepath)
+
+ source = File.join(File.dirname(__FILE__), '..', '..', 'config', 'neo4j', classnames_filename)
+ FileUtils.copy_file(source, classnames_filepath)
end
private
attr_reader :classnames_filename, :classnames_filepath, :model_map