## Page Structure # activity_doc(whole) # tournament_doc # record_doc # record_doc # record_doc # record_doc # tournament_doc # record_doc # record_doc # record_doc ################### require 'atp_scraper/activities/record' module AtpScraper # Scrape activity data class Activity include Activities def initialize(html, html_charset = 'utf-8') @activity_doc = AtpScraper::Html.parse(html, html_charset) @player_name = pickup_player_name(@activity_doc) end def pickup_activity_data result = [] player = {} search_tournaments_doc(@activity_doc).each do |tournament_doc| tournament = pickup_tournament_info(tournament_doc) player[:rank] = pickup_player_rank(tournament[:caption]) search_records_doc(tournament_doc).each do |record_doc| record = Record.pickup_record(record_doc) record_hash = create_record(record, player, tournament) result.push(record_hash) end end result end private def search_tournaments_doc(activity_doc) activity_doc.css(".activity-tournament-table") end def search_records_doc(tournament_doc) tournament_doc.css(".mega-table tbody tr") end def create_record(record, player, tournament) { year: tournament[:year], player_name: @player_name, player_rank: player[:rank], opponent_name: record[:opponent_name], opponent_rank: record[:opponent_rank], round: record[:round], score: record[:score], win_loss: record[:win_loss], tournament_name: tournament[:name], tournament_location: tournament[:location], tournament_start_date: tournament[:date][:start], tournament_end_date: tournament[:date][:end], tournament_surface: tournament[:surface], tournament_surface_inout: tournament[:surface_inout] } end def pickup_player_name(activity_doc) activity_doc .css("meta[property=\"pageTransitionTitle\"]") .attr("content").value end def pickup_tournament_info(tournament_doc) tournament_date = pickup_text(tournament_doc, ".tourney-dates") surface = pickup_surface(tournament_doc) { name: pickup_text(tournament_doc, ".tourney-title"), location: pickup_text(tournament_doc, ".tourney-location"), date: divide_tournament_date(tournament_date), year: tournament_date[0, 4], caption: pickup_text(tournament_doc, ".activity-tournament-caption"), surface: surface[:surface], surface_inout: surface[:inout] } end def pickup_player_rank(tournament_caption) rank = tournament_caption.match(/ATP Ranking:(.+), Prize/) rank[1].strip end # Before: String "2011.01.03 - 2011.01.08" # After: Hash { start: 2011.01.03, end: 2011.01.08 } def divide_tournament_date(date) date = date.split('-').map(&:strip) { start: date[0], end: date[1] } end def pickup_text(doc, selector) doc.css(selector).first.content.strip end def pickup_surface(tournament_doc) surface = tournament_doc .css(".tourney-details")[1] .css(".item-details") .first.content.gsub(/\t|\s/, "") divide_surface(surface) end def divide_surface(surface) inout = surface.match(/^(Outdoor|Indoor)/) return { surface: surface, inout: nil } if inout.nil? { surface: surface.gsub(/#{inout[0]}/, ''), inout: inout[0] } end end end