lib/mongo/cluster/sdam_flow.rb in mongo-2.11.6 vs lib/mongo/cluster/sdam_flow.rb in mongo-2.12.0.rc0

- old
+ new

@@ -56,78 +56,30 @@ # Updates descriptions on all servers whose address matches # updated_desc's address. def update_server_descriptions servers_list.each do |server| if server.address == updated_desc.address - changed = server.description != updated_desc + @server_description_changed = server.description != updated_desc + # Always update server description, so that fields that do not # affect description equality comparisons but are part of the # description are updated. server.update_description(updated_desc) server.update_last_scan - # But return if there was a content difference between - # descriptions, and if there wasn't we'll skip the remainder of - # sdam flow - return changed + + # If there was no content difference between descriptions, we + # still need to run sdam flow, but if the flow produces no change + # in topology we will omit sending events. + return true end end false end def server_description_changed - if updated_desc.me_mismatch? && updated_desc.primary? && - (topology.unknown? || topology.replica_set?) - then - # When the driver receives a description claiming to be a primary, - # we are obligated by spec tests to add and remove hosts in that - # description even if it also has a me mismatch. The me mismatch - # scenario though presents a number of problems: - # - # 1. Effectively, the server's address changes, meaning we cannot - # update the description of the server whose description change we - # are processing (instead servers are added and removed), but we - # behave to an extent as if we are updating the description, which - # causes a bunch of awkwardness. - # 2. The server for which we are processing the response will be - # removed from topology, which may cause the current thread to terminate - # prior to running the entire sdam flow. To deal with this we separate - # the removal event publication from actually removing the server - # from topology, which again complicates the flow. - - # Primary-with-me-mismatch response could be the first one we receive - # when the topology is still unknown. Change to RS without primary - # in this case. - if topology.unknown? - @topology = Topology::ReplicaSetNoPrimary.new( - topology.options.merge(replica_set_name: updated_desc.replica_set_name), - topology.monitoring, self) - end - - servers = add_servers_from_desc(updated_desc) - # Spec tests require us to remove servers based on data in descrptions - # with me mismatches. The driver will be more resilient if it only - # removed servers from descriptions with matching mes. - remove_servers_not_in_desc(updated_desc) - - servers.each do |server| - server.start_monitoring - end - - # The rest of sdam flow assumes the server being removed is not the one - # whose description we are processing, and publishes description update - # event. Since we are removing the server whose response we are - # processing, do not publish description change event but mark it - # published (by assigning to @previous_desc). - do_remove(updated_desc.address.to_s) - @previous_desc = updated_desc - - # We may have removed the current primary, check if there is a primary. - check_if_has_primary - # Publish topology change event. - commit_changes - disconnect_servers - return + @previous_server_descriptions = servers_list.map do |server| + [server.address.to_s, server.description] end unless update_server_descriptions # All of the transitions require that server whose updated_desc we are # processing is still in the cluster (i.e., was not removed as a result @@ -156,14 +108,20 @@ topology.monitoring, self) update_rs_without_primary end when Topology::Sharded unless updated_desc.unknown? || updated_desc.mongos? + log_warn( + "Removing server #{updated_desc.address.to_s} because it is a #{updated_desc.server_type.to_s.upcase} and not a MONGOS" + ) remove end when Topology::ReplicaSetWithPrimary if updated_desc.standalone? || updated_desc.mongos? + log_warn( + "Removing server #{updated_desc.address.to_s} because it is a #{updated_desc.server_type.to_s.upcase} and not a replica set member" + ) remove check_if_has_primary elsif updated_desc.primary? update_rs_from_primary elsif updated_desc.secondary? || updated_desc.arbiter? || updated_desc.other? @@ -171,10 +129,13 @@ else check_if_has_primary end when Topology::ReplicaSetNoPrimary if updated_desc.standalone? || updated_desc.mongos? + log_warn( + "Removing server #{updated_desc.address.to_s} because it is a #{updated_desc.server_type.to_s.upcase} and not a replica set member" + ) remove elsif updated_desc.primary? # Here we change topology type to RS with primary, however # while processing updated_desc we may find that its RS name # does not match our existing RS name. For this reason @@ -438,10 +399,20 @@ ) end end def publish_description_change_event + # This method may be invoked when server description definitely changed + # but prior to the topology getting updated. Therefore we check both + # server description changes and overall topology changes. When this + # method is called at the end of SDAM flow as part of "commit changes" + # step, server description change is incorporated into the topology + # change. + unless @server_description_changed || topology_effectively_changed? + return + end + # updated_desc here may not be the description we received from # the server - in case of a stale primary, the server reported itself # as being a primary but updated_desc here will be unknown. # We do not notify on unknown -> unknown changes. @@ -490,11 +461,11 @@ publish_description_change_event start_pool_if_data_bearing topology_changed_event_published = false - if topology.object_id != cluster.topology.object_id || @need_topology_changed_event + if !topology.equal?(cluster.topology) || @need_topology_changed_event # We are about to publish topology changed event. # Recreate the topology instance to get its server descriptions # up to date. @topology = topology.class.new(topology.options, topology.monitoring, cluster) # This sends the SDAM event @@ -515,10 +486,14 @@ end if updated_desc.object_id == previous_desc.object_id return end + unless topology_effectively_changed? + return + end + # If we are here, there has been a change in the server descriptions # in our topology, but topology class has not changed. # Publish the topology changed event and recreate the topology to # get the new list of server descriptions into it. @topology = topology.class.new(topology.options, topology.monitoring, cluster) @@ -581,8 +556,29 @@ # Returns whether the server whose description this flow processed # was not previously unknown, and is now. Used to decide, in particular, # whether to clear the server's connection pool. def became_unknown? updated_desc.unknown? && !original_desc.unknown? + end + + # Returns whether topology meaningfully changed as a result of running + # SDAM flow. + # + # The spec defines topology equality through equality of topology types + # and server descriptions in each topology; this definition is not usable + # by us because our topology objects do not hold server descriptions and + # are instead "live". Thus we have to store the full list of server + # descriptions at the beginning of SDAM flow and compare them to the + # current ones. + def topology_effectively_changed? + unless topology.equal?(cluster.topology) + return true + end + + server_descriptions = servers_list.map do |server| + [server.address.to_s, server.description] + end + + @previous_server_descriptions != server_descriptions end end end