Sha256: 39bb83f4109b1f8bba34ad43a74c783d341cb6e056f30f6f20ef1bdeba683061

Contents?: true

Size: 1.96 KB

Versions: 27

Compression:

Stored size: 1.96 KB

Contents

require 'pragmatic_tokenizer/processor'

require 'pragmatic_tokenizer/languages/common'

require 'pragmatic_tokenizer/languages/english'
require 'pragmatic_tokenizer/languages/arabic'
require 'pragmatic_tokenizer/languages/bulgarian'
require 'pragmatic_tokenizer/languages/catalan'
require 'pragmatic_tokenizer/languages/czech'
require 'pragmatic_tokenizer/languages/danish'
require 'pragmatic_tokenizer/languages/deutsch'
require 'pragmatic_tokenizer/languages/greek'
require 'pragmatic_tokenizer/languages/spanish'
require 'pragmatic_tokenizer/languages/persian'
require 'pragmatic_tokenizer/languages/finnish'
require 'pragmatic_tokenizer/languages/french'
require 'pragmatic_tokenizer/languages/indonesian'
require 'pragmatic_tokenizer/languages/italian'
require 'pragmatic_tokenizer/languages/latvian'
require 'pragmatic_tokenizer/languages/dutch'
require 'pragmatic_tokenizer/languages/norwegian'
require 'pragmatic_tokenizer/languages/polish'
require 'pragmatic_tokenizer/languages/portuguese'
require 'pragmatic_tokenizer/languages/romanian'
require 'pragmatic_tokenizer/languages/russian'
require 'pragmatic_tokenizer/languages/slovak'
require 'pragmatic_tokenizer/languages/swedish'
require 'pragmatic_tokenizer/languages/turkish'

module PragmaticTokenizer
  module Languages
    LANGUAGE_CODES = {
      'en' => English,
      'ar' => Arabic,
      'bg' => Bulgarian,
      'ca' => Catalan,
      'cs' => Czech,
      'da' => Danish,
      'de' => Deutsch,
      'el' => Greek,
      'es' => Spanish,
      'fa' => Persian,
      'fi' => Finnish,
      'fr' => French,
      'id' => Indonesian,
      'it' => Italian,
      'lv' => Latvian,
      'nl' => Dutch,
      'nn' => Norwegian,
      'nb' => Norwegian,
      'no' => Norwegian,
      'pl' => Polish,
      'pt' => Portuguese,
      'ro' => Romanian,
      'ru' => Russian,
      'sk' => Slovak,
      'sv' => Swedish,
      'tr' => Turkish
    }

    def self.get_language_by_code(code)
      LANGUAGE_CODES[code] || Common
    end
  end
end

Version data entries

27 entries across 27 versions & 1 rubygems

Version Path
pragmatic_tokenizer-0.5.0 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.4.2 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.4.1 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.4.0 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.3.4 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.3.3 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.3.2 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.3.1 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.3.0 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.2.4 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.2.3 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.2.2 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.2.1 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.2.0 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.1.12 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.1.11 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.1.10 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.1.9 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.1.8 lib/pragmatic_tokenizer/languages.rb
pragmatic_tokenizer-0.1.7 lib/pragmatic_tokenizer/languages.rb