Sha256: 30236f774ff5c276a3e69a4e3ac87d56a54469b963fc605293f16e46e53bb9c2

Contents?: true

Size: 1.89 KB

Versions: 3

Compression:

Stored size: 1.89 KB

Contents

require 'opener/pos_taggers/base'
require 'opener/pos_taggers/en'
require 'nokogiri'
require 'open3'
require 'slop'

require_relative 'pos_tagger/version'
require_relative 'pos_tagger/cli'

module Opener
  ##
  # Primary POS tagger class that delegates work the various POS tagging
  # kernels.
  #
  # @!attribute [r] options
  #  @return [Hash]
  #
  class POSTagger
    attr_reader :options

    ##
    # Hash containing the default options to use.
    #
    # @return [Hash]
    #
    DEFAULT_OPTIONS = {
      :args => []
    }.freeze

    ##
    # @param [Hash] options
    #
    # @option options [Array] :args Arbitrary arguments to pass to the
    #  underlying kernel.
    #
    def initialize(options = {})
      @options = DEFAULT_OPTIONS.merge(options)
    end

    ##
    # Processes the input and returns an Array containing the output of STDOUT,
    # STDERR and an object containing process information.
    #
    # @param [String] input The input to process.
    # @return [Array]
    #
    def run(input)
      language = language_from_kaf(input)

      unless valid_language?(language)
        raise ArgumentError, "The specified language (#{language}) is invalid"
      end

      kernel = language_constant(language).new(:args => options[:args])

      return kernel.run(input)
    end

    alias tag run

    protected

    ##
    # Extracts the language from a KAF document.
    #
    # @param [String] input
    # @return [String]
    #
    def language_from_kaf(input)
      reader = Nokogiri::XML::Reader(input)

      return reader.read.lang
    end

    ##
    # @param [String] language
    # @return [Class]
    #
    def language_constant(language)
      return language && POSTaggers.const_get(language.upcase)
    end

    ##
    # @return [TrueClass|FalseClass]
    #
    def valid_language?(language)
      return Opener::POSTaggers.const_defined?(language.upcase)
    end
  end # POSTagger
end # Opener

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
opener-pos-tagger-3.1.2 lib/opener/pos_tagger.rb
opener-pos-tagger-3.1.1 lib/opener/pos_tagger.rb
opener-pos-tagger-3.1.0 lib/opener/pos_tagger.rb