module Gummi module Index extend ActiveSupport::Concern module ClassMethods # Return true if created or false if already created. # def setup created_settings = client.indices.create index: name, body: { settings: settings } created_settings.present? refresh rescue ::Elasticsearch::Transport::Transport::Errors::BadRequest => exception false end # Return true if successful or already teared down. # # Raises NotImplementedError in production. # def teardown raise NotImplementedError unless Gummi.env == 'development' || Gummi.env == 'test' response = client.indices.delete index: name response.present? rescue ::Elasticsearch::Transport::Transport::Errors::NotFound true end def name raise "Implement me" end def refresh client.indices.refresh client.cluster.health wait_for_status: :yellow end def settings default_settings end def default_settings { index: { # Main Settings number_of_shards: '3', number_of_replicas: (Gummi.env == 'production' ? '2' : '0'), refresh_interval: '1s', store: { type: (Gummi.env == 'test' ? :memory : :niofs) }, mapper: { dynamic: false }, analysis: { # Tokenizers are just some sort of "tool" or "module" that can be applied to analyzers. tokenizer: { # This one is a little bit more general and is able to chop any word into all of its components. ngram_tokenizer: { type: 'nGram', min_gram: 1, max_gram: 7, token_chars: [ 'letter', 'digit' ], } }, # Now we are ready to use our tokenizers. # Let's create the most important thing: Analyzers. analyzer: { path_hierarchy_analyzer: { type: 'custom', tokenizer: 'path_hierarchy', }, # When adding long text to Elastic, we most likely are going to use this # analyzer. This is commonly used for titles and descriptions. text_index_analyzer: { type: 'custom', tokenizer: 'ngram_tokenizer', # Chopping every word up into tokens filter: { 0 => 'standard', # Some default transformations 1 => 'lowercase', # Make everything lowercase 2 => 'word_delimiter', # E.g. "O'Neil" -> "O Neil", "Victoria's" -> "Victoria" 2 => 'asciifolding', # Transform everything into ASCII }, }, # For smaller texts, such as the city "stockholm", we don't want any # tokenizing. It's enough to explicitly save the word as it is. # As a matter of fact, if we would tokenize the city, then the facets # would report that we have Transports in "st" "sto" "stoc" etc. string_index_analyzer: { type: 'custom', tokenizer: 'standard', filter: { # The filters, however, are identical to the other analyzer. 0 => 'standard', 1 => 'lowercase', 2 => 'word_delimiter', 3 => 'asciifolding', }, }, # For finding Slugs keyword_index_analyzer: { type: 'custom', tokenizer: 'keyword', filter: { 0 => 'lowercase', 1 => 'asciifolding', }, }, # This is an analyzer that we apply to the search query itself. text_search_analyzer: { type: 'custom', tokenizer: 'standard', filter: { 0 => 'standard', 1 => 'lowercase', 2 => 'word_delimiter', 3 => 'asciifolding', }, }, # This is an analyzer that we apply to the search query itself. keyword_search_analyzer: { type: 'custom', tokenizer: 'keyword', filter: { 0 => 'lowercase', 1 => 'asciifolding', }, }, } } } } end def client Gummi::API.client end end end end