module MarketBot
module Android
class Leaderboard
attr_reader :identifier, :category
attr_reader :hydra
MAX_STARS = 5
PERCENT_DENOM = 100
def self.parse(html)
if html.include?('
Editor's Choice')
parse_editors_choice_page(html)
else
parse_normal_page(html)
end
end
def self.parse_normal_page(html)
results = []
doc = Nokogiri::HTML(html)
doc.css('.card').each do |snippet_node|
result = {}
details_node = snippet_node.css('.details')
unless snippet_node.css('.current-rating').empty?
stars_style = snippet_node.css('.current-rating').first.attributes['style'].value
stars_width_percent = stars_style[/width:\s+([0-9.]+)%/, 1].to_f
result[:stars] = (MAX_STARS * stars_width_percent/PERCENT_DENOM).round(1).to_s
else
result[:stars] = nil
end
result[:title] = details_node.css('.title').first.attributes['title'].to_s
if (price_elem = details_node.css('.buy span').first)
result[:price_usd] = price_elem.text
end
result[:developer] = details_node.css('.subtitle').first.attributes['title'].to_s
result[:market_id] = details_node.css('.title').first.attributes['href'].to_s.gsub('/store/apps/details?id=', '').gsub(/&feature=.*$/, '')
result[:market_url] = "https://play.google.com/store/apps/details?id=#{result[:market_id]}&hl=en"
result[:price_usd] = '$0.00' if result[:price_usd] == 'Install'
results << result
end
results
end
def self.parse_editors_choice_page(html)
results = []
doc = Nokogiri::HTML(html)
doc.css('.fsg-snippet').each do |snippet_node|
result = {}
result[:title] = snippet_node.css('.title').text
result[:price_usd] = nil
result[:developer] = snippet_node.css('.attribution').text
result[:market_id] = snippet_node.attributes['data-docid'].text
result[:market_url] = "https://play.google.com/store/apps/details?id=#{result[:market_id]}&hl=en"
results << result
end
results
end
# This is the initializer method for the Leaderboard class.
#
# Leaderboard gets initialized by default with a specified identifier, an optional app category, along with optional
# request options.
#
# * *Args* :
# - +identifier+ -> The identifier is used to get the results for distinct leaderboards.
# Valid identifiers include:
# :topselling_paid
# :topselling_free
# :topselling_new_free
# :topselling_new_paid
# :editors_choice
# :topselling_paid_game
# :movers_shakers
# :featured
# :tablet_featured
# :topgrossing
# - +category+ -> The category switches between the actual categories, or genres, of apps within a given leaderboard.
# Valid categories include:
# :game
# :arcade
# :brain
# :cards
# :casual
# :game_wallpaper
# :racing
# :sports_games
# :game_widgets
# :application
# :books_and_reference
# :business
# :comics
# :communication
# :education
# :entertainment
# :finance
# :health_and_fitness
# :libraries_and_demo
# :lifestyle
# :app_wallpaper
# :media_and_video
# :medical
# :music_and_audio
# :news_and_magazines
# :personalization
# :photography
# :productivity
# :shopping
# :social
# :sports
# :tools
# :transportation
# :travel_and_local
# :weather
# :app_widgets
# - +options+ -> The optional options Hash contains keys :hydra and :request_opts. :hydra can be used to specify
# a custom Hydra instance, while :request_opts is a Hash containing further options for the Play
# Store HTTP request.
#
def initialize(identifier, category=nil, options={})
@identifier = identifier
@category = category
@hydra = options[:hydra] || MarketBot.hydra
@request_opts = options[:request_opts] || {}
@parsed_results = []
@pending_pages = []
end
def market_urls(options={})
results = []
min_page = options[:min_page] || 1
max_page = options[:max_page] || 25
(min_page..max_page).each do |page|
start_val = (page - 1) * 24
url = 'https://play.google.com/store/apps'
url << "/category/#{category.to_s.upcase}" if category
url << "/collection/#{identifier.to_s}?"
url << "start=#{start_val}"
url << "&num=24&hl=en"
results << url
end
results
end
def enqueue_update(options={},&block)
@callback = block
if @identifier.to_s.downcase == 'editors_choice' && category == nil
url = 'https://play.google.com/store/apps/collection/editors_choice?&hl=en'
process_page(url, 1)
else
min_rank = options[:min_rank] || 1
max_rank = options[:max_rank] || 500
min_page = rank_to_page(min_rank)
max_page = rank_to_page(max_rank)
@parsed_results = []
urls = market_urls(:min_page => min_page, :max_page => max_page)
urls.each_index{ |i| process_page(urls[i], i+1) }
end
self
end
def update(options={})
enqueue_update(options)
@hydra.run
self
end
def rank_to_page(rank)
((rank - 1) / 24) + 1
end
def results
raise 'Results do not exist yet.' unless @parsed_results
@parsed_results.reject{ |page| page.nil? || page.empty? }.flatten
end
private
def process_page(url, page_num)
@pending_pages << page_num
request = Typhoeus::Request.new(url, @request_opts)
request.on_complete do |response|
# HACK: Typhoeus <= 0.4.2 returns a response, 0.5.0pre returns the request.
response = response.response if response.is_a?(Typhoeus::Request)
result = Leaderboard.parse(response.body)
update_callback(result, page_num)
end
@hydra.queue(request)
end
def update_callback(result, page)
@parsed_results[page] = result
@pending_pages.delete(page)
@callback.call(self) if @callback and @pending_pages.empty?
end
end
end
end