lib/linkedin-scraper/profile.rb in linkedin-scraper-0.0.6 vs lib/linkedin-scraper/profile.rb in linkedin-scraper-0.0.7
- old
+ new
@@ -1,38 +1,89 @@
USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac FireFox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
module Linkedin
class Profile
#the First name of the contact
attr_accessor :first_name,:last_name,:title,:location,:country,
- :industry, :linkedin_url,:recommended_visitors,:profile,
- :page
+ :industry, :linkedin_url,:recommended_visitors,:page
+ #Array of hashes for eduction
+ # [
+ # [0] {
+ # :name => "Vishwakarma Institute of Technology",
+ # :description => "B.Tech, Computer Engineering",
+ # :period => "2007 – 2011"
+ # },
+ # [1] {
+ # :name => "St Ursula's High School",
+ # :description => "Secondary School Education",
+ # :period => nil
+ # }
+ # ]
+ attr_accessor :education
+ #Array of websites
+ #[
+ #[0] "http://www.yatishmehta.in"
+ #]
+ attr_accessor :websites
+ #array of hashes containing group name and link
+ # [
+ # [ 0] {
+ # :name => "Business on Rails",
+ # :link => "http://www.linkedin.com/groups/Business-on-Rails-27822"
+ # },
+ # [ 1] {
+ # :name => "HTML5 Technologies",
+ # :link => "http://www.linkedin.com/groups/HTML5-Technologies-2868882"
+ # },
+ # [ 2] {
+ # :name => "India on Rails",
+ # :link => "http://www.linkedin.com/groups/India-on-Rails-149940"
+ # :name => "Open Source",
+ # :link => "http://www.linkedin.com/groups?gid=43875"
+ # },
+ # [ 4] {
+ # :name => "Rails Developers",
+ # :link => "http://www.linkedin.com/groups?gid=77764"
+ # },
+ # ]
+ attr_accessor:groups
+
#Array of hash containing its past job companies and job profile
#Example
# [
- # [0] {
- # :past_title => "Intern",
- # :past_company => "Sungard"
- # },
- # [1] {
- # :past_title => "Software Developer",
- # :past_company => "Microsoft"
- # }
- # ]
+ # [0] {
+ # :past_company => "Consumyze Software",
+ # :past_title => "Trainee",
+ # :past_company_website => "http://www.consumyze.com",
+ # :description => "Responsible for design and development"
+ # },
+ # [1] {
+ # :past_company => "SunGard Global Services",
+ # :past_title => "Project Intern",
+ # :past_company_website => "http://www.sungard.com/globalservices/learnmore",
+ # :description => "Fame PassPoint. Developed an entirely Ajax based online control panel for user management and Data access for Fame"
+ # }
+ # ]
+
attr_accessor :past_companies
#Array of hash containing its current job companies and job profile
#Example
# [
# [0] {
# :current_title => "Intern",
# :current_company => "Sungard"
+ # :current_company_url=>"http://www.betterlabs.net",
+ # :description=>"Responsible for design and development of projects on Ruby on Rails."
# },
# [1] {
- # :current_title => "Software Developer",
- # :current_company => "Microsoft"
+ # :current_title => "Software Developer",
+ # :current_company => "Microsoft"
+ # :current_company_url =>"http://www.microsoft.net",
+ # :description =>"Development and design"
+
# }
# ]
attr_accessor :current_companies
#url of the profile
@@ -45,14 +96,18 @@
@country=get_country(page)
@industry=get_industry(page)
@current_companies=get_current_companies page
@past_companies=get_past_companies page
@recommended_visitors=get_recommended_visitors page
+ @education=get_education page
@linkedin_url=url
+ @websites=get_websites page
+ @groups=get_groups page
@page=page
end
#returns:nil if it gives a 404 request
+
def self.get_profile url
begin
@agent=Mechanize.new
@agent.user_agent_alias = USER_AGENTS.sample
@agent.max_history = 0
@@ -61,10 +116,21 @@
rescue=>e
puts e
end
end
+ def get_company_url node
+ if node.at("h4/strong/a")
+ link=node.at("h4/strong/a")["href"]
+ @agent=Mechanize.new
+ @agent.user_agent_alias = USER_AGENTS.sample
+ @agent.max_history = 0
+ page=@agent.get("http://www.linkedin.com"+link)
+ url=page.at(".basic-info/div/dl/dd/a").text if page.at(".basic-info/div/dl/dd/a")
+ end
+ end
+
private
def get_first_name page
return page.at(".given-name").text.strip if page.search(".given-name").first
end
@@ -89,36 +155,81 @@
return page.at(".industry").text.gsub(/\s+/, " ").strip if page.search(".industry").first
end
def get_past_companies page
past_cs=[]
- if page.search(".past").first
- page.search(".past").search("li").each do |past_company|
- title,company=past_company.text.strip.split(" at ")
- company=company.gsub(/\s+/, " ").strip if company
- title=title.gsub(/\s+/, " ").strip if title
- past_company={:past_company=>company,:past_title=> title}
+ if page.search(".position.experience.vevent.vcard.summary-past").first
+ page.search(".position.experience.vevent.vcard.summary-past").each do |past_company|
+ url=get_company_url past_company
+ title=past_company.at("h3").text.gsub(/\s+|\n/, " ").strip if past_company.at("h3")
+ company=past_company.at("h4").text.gsub(/\s+|\n/, " ").strip if past_company.at("h4")
+ description=past_company.at(".description.past-position").text.gsub(/\s+|\n/, " ").strip if past_company.at(".description.past-position")
+ past_company={:past_company=>company,:past_title=> title,:past_company_website=>url,:description=>description}
past_cs<<past_company
end
return past_cs
end
end
def get_current_companies page
current_cs=[]
- if page.search(".current").first
- page.search(".current").search("li").each do |past_company|
- title,company=past_company.text.strip.split(" at ")
- company=company.gsub(/\s+/, " ").strip if company
- title=title.gsub(/\s+/, " ").strip if title
- current_company={:current_company=>company,:current_title=> title}
+ if page.search(".position.experience.vevent.vcard.summary-current").first
+ page.search(".position.experience.vevent.vcard.summary-current").each do |current_company|
+ url=get_company_url current_company
+ title=current_company.at("h3").text.gsub(/\s+|\n/, " ").strip if current_company.at("h3")
+ company=current_company.at("h4").text.gsub(/\s+|\n/, " ").strip if current_company.at("h4")
+ description=current_company.at(".description.current-position").text.gsub(/\s+|\n/, " ").strip if current_company.at(".description.current-position")
+ current_company={:current_company=>company,:current_title=> title,:current_company_url=>url,:description=>description}
current_cs<<current_company
end
return current_cs
end
end
+ def get_education page
+ education=[]
+ if page.search(".position.education.vevent.vcard").first
+ page.search(".position.education.vevent.vcard").each do |item|
+ name=item.at("h3").text.gsub(/\s+|\n/, " ").strip if item.at("h3")
+ desc=item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4")
+ period=item.at(".period").text.gsub(/\s+|\n/, " ").strip if item.at(".period")
+ edu={:name=>name,:description=>desc,:period=>period}
+ education<<edu
+ end
+ return education
+ end
+ end
+
+ def get_websites page
+ websites=[]
+ if page.search(".website").first
+ page.search(".website").each do |site|
+ url=site.at("a")["href"]
+ url="http://www.linkedin.com"+url
+ url=CGI.parse(URI.parse(url).query)["url"]
+ websites<<url
+ end
+ return websites.flatten!
+ end
+ end
+
+ def get_groups page
+ groups=[]
+ if page.search(".group-data").first
+ page.search(".group-data").each do |item|
+ name=item.text.gsub(/\s+|\n/, " ").strip
+ link="http://www.linkedin.com"+item.at("a")["href"]
+ groups<<{:name=>name,:link=>link}
+ end
+ return groups
+ end
+
+ end
+
+
+
+
def get_recommended_visitors page
recommended_vs=[]
if page.search(".browsemap").first
page.at(".browsemap").at("ul").search("li").each do |visitor|
v={}
@@ -128,9 +239,8 @@
v[:company]=visitor.at('.headline').text.split(" at ").last
recommended_vs<<v
end
return recommended_vs
end
-
end
end
end