lib/neo4j/migration.rb in neo4j-4.1.5 vs lib/neo4j/migration.rb in neo4j-5.0.0.rc.1

- old
+ new

@@ -1,5 +1,7 @@ +require 'benchmark' + module Neo4j class Migration def migrate fail 'not implemented' end @@ -15,11 +17,11 @@ def default_path Rails.root if defined? Rails end def joined_path(path) - File.join(path, 'db', 'neo4j-migrate') + File.join(path.to_s, 'db', 'neo4j-migrate') end class AddIdProperty < Neo4j::Migration attr_reader :models_filename @@ -39,68 +41,76 @@ end end def setup FileUtils.mkdir_p('db/neo4j-migrate') - unless File.file?(models_filename) - File.open(models_filename, 'w') do |file| - file.write("# Provide models to which IDs should be added.\n# It will only modify nodes that do not have IDs. There is no danger of overwriting data.\n# models: [Student,Lesson,Teacher,Exam]\nmodels: []") - end + + return if File.file?(models_filename) + + File.open(models_filename, 'w') do |file| + message = <<MESSAGE +# Provide models to which IDs should be added. +# # It will only modify nodes that do not have IDs. There is no danger of overwriting data. +# # models: [Student,Lesson,Teacher,Exam]\nmodels: [] +MESSAGE + file.write(message) end end private def add_ids_to(model) - require 'benchmark' - max_per_batch = (ENV['MAX_PER_BATCH'] || default_max_per_batch).to_i label = model.mapped_label_name - property = model.primary_key - nodes_left = 1 last_time_taken = nil - until nodes_left == 0 - nodes_left = Neo4j::Session.query.match(n: label).where("NOT has(n.#{property})").return('COUNT(n) AS ids').first.ids + until (nodes_left = idless_count(label, model.primary_key)) == 0 + print_status(last_time_taken, max_per_batch, nodes_left) - time_per_node = last_time_taken / max_per_batch if last_time_taken - print_output "Running first batch...\r" - if time_per_node - eta_seconds = (nodes_left * time_per_node).round - print_output "#{nodes_left} nodes left. Last batch: #{(time_per_node * 1000.0).round(1)}ms / node (ETA: #{eta_seconds / 60} minutes)\r" + count = [nodes_left, max_per_batch].min + last_time_taken = Benchmark.realtime do + max_per_batch = id_batch_set(label, model.primary_key, count.times.map { new_id_for(model) }, count) end + end + end - return if nodes_left == 0 - to_set = [nodes_left, max_per_batch].min + def idless_count(label, id_property) + Neo4j::Session.query.match(n: label).where("NOT has(n.#{id_property})").pluck('COUNT(n) AS ids').first + end - new_ids = to_set.times.map { new_id_for(model) } - begin - last_time_taken = id_batch_set(label, property, new_ids, to_set) - rescue Neo4j::Server::CypherResponse::ResponseError, Faraday::TimeoutError - new_max_per_batch = (max_per_batch * 0.8).round - output "Error querying #{max_per_batch} nodes. Trying #{new_max_per_batch}" - max_per_batch = new_max_per_batch - end - end + def print_status(last_time_taken, max_per_batch, nodes_left) + time_per_node = last_time_taken / max_per_batch if last_time_taken + message = if time_per_node + eta_seconds = (nodes_left * time_per_node).round + "#{nodes_left} nodes left. Last batch: #{(time_per_node * 1000.0).round(1)}ms / node (ETA: #{eta_seconds / 60} minutes)\r" + else + "Running first batch...\r" + end + + print_output message end - def id_batch_set(label, property, new_ids, to_set) - Benchmark.realtime do - begin - tx = Neo4j::Transaction.new - Neo4j::Session.query("MATCH (n:`#{label}`) WHERE NOT has(n.#{property}) - with COLLECT(n) as nodes, #{new_ids} as ids - FOREACH(i in range(0,#{to_set - 1})| - FOREACH(node in [nodes[i]]| - SET node.#{property} = ids[i])) - RETURN distinct(true) - LIMIT #{to_set}") - ensure - tx.close - end - end + + def id_batch_set(label, id_property, new_ids, count) + tx = Neo4j::Transaction.new + + Neo4j::Session.query("MATCH (n:`#{label}`) WHERE NOT has(n.#{id_property}) + with COLLECT(n) as nodes, #{new_ids} as ids + FOREACH(i in range(0,#{count - 1})| + FOREACH(node in [nodes[i]]| + SET node.#{id_property} = ids[i])) + RETURN distinct(true) + LIMIT #{count}") + + count + rescue Neo4j::Server::CypherResponse::ResponseError, Faraday::TimeoutError + new_max_per_batch = (max_per_batch * 0.8).round + output "Error querying #{max_per_batch} nodes. Trying #{new_max_per_batch}" + new_max_per_batch + ensure + tx.close end def default_max_per_batch 900 end @@ -131,13 +141,14 @@ end def setup output "Creating file #{classnames_filepath}. Please use this as the migration guide." FileUtils.mkdir_p('db/neo4j-migrate') - unless File.file?(classnames_filepath) - source = File.join(File.dirname(__FILE__), '..', '..', 'config', 'neo4j', classnames_filename) - FileUtils.copy_file(source, classnames_filepath) - end + + return if File.file?(classnames_filepath) + + source = File.join(File.dirname(__FILE__), '..', '..', 'config', 'neo4j', classnames_filename) + FileUtils.copy_file(source, classnames_filepath) end private attr_reader :classnames_filename, :classnames_filepath, :model_map