Sha256: 2ae2c130bc9c37af69fa75ae286af1950e6b329fb976d14b7333456b7018e29f

Contents?: true

Size: 1.24 KB

Versions: 3

Compression:

Stored size: 1.24 KB

Contents

require "csv"
require_relative "zip-extractor"

module Datasets
  class AFINN < Dataset
    Record = Struct.new(:word,
                        :valence)

    def initialize
      super()
      @metadata.id = "afinn"
      @metadata.name = "AFINN"
      @metadata.url = "http://www2.imm.dtu.dk/pubdb/pubs/6010-full.html"
      @metadata.licenses = ["ODbL-1.0"]
      @metadata.description = lambda do
        extract_file("AFINN/AFINN-README.txt") do |input|
          readme = input.read
          readme.force_encoding("UTF-8")
          readme.
            gsub(/^AFINN-96:.*?\n\n/m, "").
            gsub(/^In Python.*$/m, "").
            strip
        end
      end
    end

    def each
      return to_enum(__method__) unless block_given?

      extract_file("AFINN/AFINN-111.txt") do |input|
        csv = CSV.new(input, col_sep: "\t", converters: :numeric)
        csv.each do |row|
          yield(Record.new(*row))
        end
      end
    end

    private
    def extract_file(file_path, &block)
      data_path = cache_dir_path + "imm6010.zip"
      data_url = "http://www2.imm.dtu.dk/pubdb/edoc/imm6010.zip"
      download(data_path, data_url)

      extractor = ZipExtractor.new(data_path)
      extractor.extract_file(file_path, &block)
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
red-datasets-0.1.7 lib/datasets/afinn.rb
red-datasets-0.1.6 lib/datasets/afinn.rb
red-datasets-0.1.5 lib/datasets/afinn.rb