Sha256: f887cd934798582858ef35d015aa2e07df92b38635515d4cb7efee84f68f17f5

Contents?: true

Size: 1.7 KB

Versions: 3

Compression:

Stored size: 1.7 KB

Contents

#!/usr/bin/env ruby
require 'json'
require 'logger'
require './lib/jamnagar'

raw_items = JSON.load(File.open("tweet_stream.json"))

with_urls = raw_items.select{|raw| raw["entities"]["urls"].size > 0 }.compact.uniq

account_id = [44,55,66].sample
items = with_urls.map{|item| {"raw" => item, "created_at" => Time.now.to_i, "account_id" => account_id, "url" => item["entities"]["urls"][0]["expanded_url"]}}
items = items[0...50]

item_collection   = Jamnagar::Initializers::Mongo.new({:database => "jamnagar", :collection => "items"}).collection
item_storage      = Jamnagar::Storage::ItemStore.new(Jamnagar::Adapters::MongoAdapter.new(item_collection))

source_collection = Jamnagar::Initializers::Mongo.new({:database => "jamnagar", :collection => "sources"}).collection
source_storage    = Jamnagar::Storage::SourceStore.new(Jamnagar::Adapters::MongoAdapter.new(source_collection))


logger       = Logger.new(STDOUT)
logger.level = Logger::DEBUG

refiners = [
  Jamnagar::Refiners::PrimaryKeyGeneration.new,
  Jamnagar::Refiners::UrlExpansion.new,
  Jamnagar::Refiners::DuplicateDetection.new(store: item_storage),
  Jamnagar::Refiners::SourceDetail.new(store: source_storage),
  Jamnagar::Refiners::PopularityIncrementation.new(store: item_storage),
  Jamnagar::Refiners::MetaDataExtraction.new
]

verifiers = [
  Jamnagar::Verifiers::Twitter::UniquenessVerifier.new(store: item_storage)
]

refinery = Jamnagar::Refineries::ContentRefinery.new({
  :items     => items,
  :refiners  => refiners,
  :storage   => item_storage,
  :verifiers => verifiers
})
s_time = Time.now
refinery.refine
e_time = Time.now

puts "\n------------------------\n"
puts "Refining took: #{(e_time - s_time).to_i} seconds"
puts "\n------------------------\n"

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
jamnagar-1.3.9.1 run.rb
jamnagar-1.3.9 run.rb
jamnagar-1.3.8 run.rb