module Pacer module Filter class KeyIndex attr_reader :graph, :element_type def initialize(graph, element_type) @graph = graph @element_type = element_type end # FIXME: there is no count for key indices anymore. I'm counting # up to 10 elements to try to find good indices, otherwise using # anything... def count(key, value) iter = get(key, value).iterator 10.times do |n| if iter.hasNext iter.next else return n end end 1_000_000 # Random high number. end def get(key, value) if element_type == :vertex graph.blueprints_graph.getVertices(key, value) else graph.blueprints_graph.getEdges(key, value) end end end module PropertyFilter class Filters NodeVisitor = Pacer::Filter::WhereFilter::NodeVisitor attr_reader :properties, :extensions, :route_modules, :best_index_value attr_accessor :wrapper, :blocks # Allow Pacer to use index counts to determine which index has # the least number elements for the available keys in the query. # # @attr [Boolean] choose_best_index attr_accessor :choose_best_index # Allow Pacer to use manual indices without explicitly # referencing them by name. # # @example Explicit manual index: # # graph.v(:index_name => { :index_key => value }) # # @example Non-explicit index lookup: # # graph.v(:index_key => value) # # @attr [Boolean] search_manual_indices attr_accessor :search_manual_indices def initialize(graph, filters) if graph.is_a? Pacer::Wrappers::ElementWrapper # this can happen if the starting route is actually a single element. @graph = graph.graph else @graph = graph end @properties = [] @blocks = [] @extensions = [] @wrapper = nil @route_modules = [] @non_ext_props = [] @best_index_value = nil add_filters filters, nil combine_sets end def combine_sets is_set = properties.group_by { |p| p[1].is_a? Set } sets = is_set[true] # [["x", Set[]] ...] if sets is_multi = sets.group_by { |a| a.count > 1 } multi = is_multi[true] if multi multi = multi.group_by { |m| m.first } # {"x": [["x", Set[]], ...]} result = multi.map do |multi_set| multi_set[1].reduce do |result, pair| [result[0], result[1].intersection(pair[1])] end end result = result.concat(is_multi[false]) if is_multi[false] result = result.concat(is_set[false]) if is_set[false] @properties = result end end end # Set which graph this filter is currently operating on # # @note this is not threadsafe if you are reusing predefined # routes on multiple graphs. # # @attr [PacerGraph] g a graph attr_reader :graph def graph=(g) if graph != g reset_properties @graph = g end end def remove_property_keys(keys) properties.delete_if { |a| keys.include? a.first } non_ext_props.delete_if { |a| keys.include? a.first } end def property_keys properties.map(&:first).uniq end # Set which indices are available to be used to determine the # best_index. # # @attr [Array] i an array of indices attr_reader :indices def indices=(i) if indices != i reset_properties @indices = i end end # Not used for regular filtering, but useful to test an element against # a complex set of conditions without having to build a route. # # Returns a proc that can be called with an element to test and returns # true if the element matches the conditions. def to_predicate expando, pipe = build_pipeline(nil, com.tinkerpop.pipes.util.iterators.ExpandableIterator.new) proc do |e| expando.add e pipe.next if pipe.hasNext end end def build_pipeline(route, start_pipe, pipe = nil) pipe ||= start_pipe if route self.graph = route.graph route_modules.each do |mod| extension_route = mod.route(Pacer::Route.empty(route)) s, e = extension_route.send :build_pipeline s.setStarts(pipe) if pipe start_pipe ||= s pipe = e end end encoded_properties.each do |key, value| case value when NodeVisitor::Pipe new_pipe = value.build when NodeVisitor::Value # no op else new_pipe = PropertyFilterPipe.new(key, Pacer::Pipes::EQUAL, value) end if new_pipe new_pipe.set_starts pipe if pipe Pacer.debug_pipes << { :name => key, :start => pipe, :end => new_pipe } if Pacer.debug_pipes pipe = new_pipe start_pipe ||= pipe end end blocks.each do |block| # Will work if route is nil. block_pipe = Pacer::Pipes::BlockFilterPipe.new(route, block) block_pipe.set_starts pipe if pipe Pacer.debug_pipes << { :name => 'block', :start => pipe, :end => block_pipe } if Pacer.debug_pipes pipe = block_pipe start_pipe ||= pipe end [start_pipe, pipe] end def any? properties.any? or blocks.any? or route_modules.any? or extensions.any? end def extensions_only? properties.none? and blocks.none? and route_modules.none? and (extensions.any? or wrapper) end def to_s strings = [] strings << [wrapper.name] if wrapper strings.concat extensions.map { |e| e.name } strings.concat((non_ext_props - [@best_index_value]).map { |k, v| if v.is_a? Set "#{ k } IN (#{ v.sort.map { |s| s.inspect }.join ', ' })" else "#{ k }==#{ v.inspect }" end }) strings.concat blocks.map { '&block' } strings.concat route_modules.map { |mod| mod.name } strings.join ', ' end def best_index(element_type) result = find_best_index(element_type) # the call to find_best_index produces @best_index_value: if properties.delete @best_index_value @encoded_properties = nil end result end # Check #lookup in extensions, in preerence to #route_conditions # for filters to apply. # # Designed to allow filters to be defined in extensions that # will not be used on every appearance of the extension (too # many property filters in a traversal can impose a serious # performance penalty. It is expected that lookup filters will # only be used for index lookups. def use_lookup! extensions.each do |ext| if ext.respond_to? :lookup add_filters ext.lookup(graph), ext end end if wrapper and wrapper.respond_to? :lookup add_filters wrapper.lookup(graph), nil end end protected attr_accessor :non_ext_props def add_filter(filter, extension) case filter when Hash reset_properties filter.each do |k, v| self.non_ext_props << [k.to_s, v] unless extension self.properties << [k.to_s, v] end when Module, Class if filter.is_a? Class and filter.ancestors.include? Pacer::Wrappers::ElementWrapper self.wrapper = filter else self.extensions << filter end extract_conditions(filter) if filter.respond_to? :route self.route_modules << filter end when Array add_filters(filter, extension) when nil else if filter.respond_to? :wrapper self.wrapper = filter.wrapper extract_conditions(filter) elsif filter.respond_to? :parts self.extensions.concat filter.parts.to_a extract_conditions(filter) else raise "Unknown filter: #{ filter.class }: #{ filter.inspect }" end end end def extract_conditions(filter) if filter.respond_to? :route_conditions add_filters filter.route_conditions(graph), filter end end def add_filters(filters, extension) if filters.is_a? Array filters.each do |filter| add_filter filter, extension end else add_filter filters, extension end end def encode_value(key, value) if value.is_a? Set encoded = value.map.with_index { |v, i| graph.encode_property(v) } str = encoded.map { |v| "`#{ key.to_s }` == #{ v.inspect }" }.join " or " parsed = JRuby.parse str visitor = NodeVisitor.new(nil, {}) parsed.accept visitor else value = graph.encode_property(value) if value.respond_to? :to_java value.to_java elsif value.respond_to? :to_java_string value.to_java_string else value end end end def find_best_index(element_type) return @best_index if @best_index avail = available_indices(element_type) return nil if avail.empty? index_options = [] properties.each do |k, v| if v and index_for_property(avail, index_options, k, v) return @best_index end end index_options = index_options.sort_by do |a| count = a.first if count == 0 and search_manual_indices # Only use 0 result indices if there is no alternative # because most manual indices won't be populating the # data in question. java.lang.Integer::MAX_VALUE else count end end _, @best_index, @best_index_value = index_options.first || [nil, [], []] @best_index end def index_for_property(avail, index_options, k, v) if v.is_a? Hash if (idxs = avail["name:#{k}"]).any? v.each do |k2, v2| return true if check_index(index_options, idxs, k2.to_s, v2, [k, v]) end end false elsif (idxs = (avail["key:#{k}"] + avail[:all])).any? true if check_index(index_options, idxs, k, v, [k, v]) end end def check_index(index_options, idxs, k, v, index_value) if v.is_a? Set # By default we assume that indices can't do OR type operations, so Set values are ruled out. If # the index can do that (i.e. Neo4j's lucene engine), the adapter needs to do its own index selection. false elsif choose_best_index idxs.each do |idx| index_options << [idx.count(k, encode_value(k, v)), [idx, k, v], index_value] end false else @best_index_value = index_value @best_index = [idxs.first, k, v] true end end def reset_properties @encoded_properties = nil if @best_index_value # put removed index property back... properties << @best_index_value end @best_index = nil @best_index_value = nil @available_indices = nil end def encoded_properties @encoded_properties ||= properties.map do |k, v| [k, encode_value(k, v)] end end def available_indices(element_type) return @available_indices if @available_indices @available_indices = Hash.new { |h, k| h[k] = [] } key_index = KeyIndex.new(graph, element_type) graph.key_indices(element_type).each do |key| @available_indices["key:#{key}"] = [key_index] end if search_manual_indices indices.each do |index| next unless graph.index_class? element_type, index.index_class @available_indices["name:#{index.index_name}"] = [index] @available_indices[:all] << index end end @available_indices end end end end end