lib/linkedin-scraper/profile.rb in linkedin-scraper-0.0.6 vs lib/linkedin-scraper/profile.rb in linkedin-scraper-0.0.7

- old
+ new

@@ -1,38 +1,89 @@ USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac FireFox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"] module Linkedin class Profile #the First name of the contact attr_accessor :first_name,:last_name,:title,:location,:country, - :industry, :linkedin_url,:recommended_visitors,:profile, - :page + :industry, :linkedin_url,:recommended_visitors,:page + #Array of hashes for eduction + # [ + # [0] { + # :name => "Vishwakarma Institute of Technology", + # :description => "B.Tech, Computer Engineering", + # :period => "2007 – 2011" + # }, + # [1] { + # :name => "St Ursula's High School", + # :description => "Secondary School Education", + # :period => nil + # } + # ] + attr_accessor :education + #Array of websites + #[ + #[0] "http://www.yatishmehta.in" + #] + attr_accessor :websites + #array of hashes containing group name and link + # [ + # [ 0] { + # :name => "Business on Rails", + # :link => "http://www.linkedin.com/groups/Business-on-Rails-27822" + # }, + # [ 1] { + # :name => "HTML5 Technologies", + # :link => "http://www.linkedin.com/groups/HTML5-Technologies-2868882" + # }, + # [ 2] { + # :name => "India on Rails", + # :link => "http://www.linkedin.com/groups/India-on-Rails-149940" + # :name => "Open Source", + # :link => "http://www.linkedin.com/groups?gid=43875" + # }, + # [ 4] { + # :name => "Rails Developers", + # :link => "http://www.linkedin.com/groups?gid=77764" + # }, + # ] + attr_accessor:groups + #Array of hash containing its past job companies and job profile #Example # [ - # [0] { - # :past_title => "Intern", - # :past_company => "Sungard" - # }, - # [1] { - # :past_title => "Software Developer", - # :past_company => "Microsoft" - # } - # ] + # [0] { + # :past_company => "Consumyze Software", + # :past_title => "Trainee", + # :past_company_website => "http://www.consumyze.com", + # :description => "Responsible for design and development" + # }, + # [1] { + # :past_company => "SunGard Global Services", + # :past_title => "Project Intern", + # :past_company_website => "http://www.sungard.com/globalservices/learnmore", + # :description => "Fame PassPoint. Developed an entirely Ajax based online control panel for user management and Data access for Fame" + # } + # ] + attr_accessor :past_companies #Array of hash containing its current job companies and job profile #Example # [ # [0] { # :current_title => "Intern", # :current_company => "Sungard" + # :current_company_url=>"http://www.betterlabs.net", + # :description=>"Responsible for design and development of projects on Ruby on Rails." # }, # [1] { - # :current_title => "Software Developer", - # :current_company => "Microsoft" + # :current_title => "Software Developer", + # :current_company => "Microsoft" + # :current_company_url =>"http://www.microsoft.net", + # :description =>"Development and design" + # } # ] attr_accessor :current_companies #url of the profile @@ -45,14 +96,18 @@ @country=get_country(page) @industry=get_industry(page) @current_companies=get_current_companies page @past_companies=get_past_companies page @recommended_visitors=get_recommended_visitors page + @education=get_education page @linkedin_url=url + @websites=get_websites page + @groups=get_groups page @page=page end #returns:nil if it gives a 404 request + def self.get_profile url begin @agent=Mechanize.new @agent.user_agent_alias = USER_AGENTS.sample @agent.max_history = 0 @@ -61,10 +116,21 @@ rescue=>e puts e end end + def get_company_url node + if node.at("h4/strong/a") + link=node.at("h4/strong/a")["href"] + @agent=Mechanize.new + @agent.user_agent_alias = USER_AGENTS.sample + @agent.max_history = 0 + page=@agent.get("http://www.linkedin.com"+link) + url=page.at(".basic-info/div/dl/dd/a").text if page.at(".basic-info/div/dl/dd/a") + end + end + private def get_first_name page return page.at(".given-name").text.strip if page.search(".given-name").first end @@ -89,36 +155,81 @@ return page.at(".industry").text.gsub(/\s+/, " ").strip if page.search(".industry").first end def get_past_companies page past_cs=[] - if page.search(".past").first - page.search(".past").search("li").each do |past_company| - title,company=past_company.text.strip.split(" at ") - company=company.gsub(/\s+/, " ").strip if company - title=title.gsub(/\s+/, " ").strip if title - past_company={:past_company=>company,:past_title=> title} + if page.search(".position.experience.vevent.vcard.summary-past").first + page.search(".position.experience.vevent.vcard.summary-past").each do |past_company| + url=get_company_url past_company + title=past_company.at("h3").text.gsub(/\s+|\n/, " ").strip if past_company.at("h3") + company=past_company.at("h4").text.gsub(/\s+|\n/, " ").strip if past_company.at("h4") + description=past_company.at(".description.past-position").text.gsub(/\s+|\n/, " ").strip if past_company.at(".description.past-position") + past_company={:past_company=>company,:past_title=> title,:past_company_website=>url,:description=>description} past_cs<<past_company end return past_cs end end def get_current_companies page current_cs=[] - if page.search(".current").first - page.search(".current").search("li").each do |past_company| - title,company=past_company.text.strip.split(" at ") - company=company.gsub(/\s+/, " ").strip if company - title=title.gsub(/\s+/, " ").strip if title - current_company={:current_company=>company,:current_title=> title} + if page.search(".position.experience.vevent.vcard.summary-current").first + page.search(".position.experience.vevent.vcard.summary-current").each do |current_company| + url=get_company_url current_company + title=current_company.at("h3").text.gsub(/\s+|\n/, " ").strip if current_company.at("h3") + company=current_company.at("h4").text.gsub(/\s+|\n/, " ").strip if current_company.at("h4") + description=current_company.at(".description.current-position").text.gsub(/\s+|\n/, " ").strip if current_company.at(".description.current-position") + current_company={:current_company=>company,:current_title=> title,:current_company_url=>url,:description=>description} current_cs<<current_company end return current_cs end end + def get_education page + education=[] + if page.search(".position.education.vevent.vcard").first + page.search(".position.education.vevent.vcard").each do |item| + name=item.at("h3").text.gsub(/\s+|\n/, " ").strip if item.at("h3") + desc=item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4") + period=item.at(".period").text.gsub(/\s+|\n/, " ").strip if item.at(".period") + edu={:name=>name,:description=>desc,:period=>period} + education<<edu + end + return education + end + end + + def get_websites page + websites=[] + if page.search(".website").first + page.search(".website").each do |site| + url=site.at("a")["href"] + url="http://www.linkedin.com"+url + url=CGI.parse(URI.parse(url).query)["url"] + websites<<url + end + return websites.flatten! + end + end + + def get_groups page + groups=[] + if page.search(".group-data").first + page.search(".group-data").each do |item| + name=item.text.gsub(/\s+|\n/, " ").strip + link="http://www.linkedin.com"+item.at("a")["href"] + groups<<{:name=>name,:link=>link} + end + return groups + end + + end + + + + def get_recommended_visitors page recommended_vs=[] if page.search(".browsemap").first page.at(".browsemap").at("ul").search("li").each do |visitor| v={} @@ -128,9 +239,8 @@ v[:company]=visitor.at('.headline').text.split(" at ").last recommended_vs<<v end return recommended_vs end - end end end