lib/linkedin-scraper/profile.rb in linkedin-scraper-0.1.0 vs lib/linkedin-scraper/profile.rb in linkedin-scraper-0.1.1
- old
+ new
@@ -68,117 +68,75 @@
def current_companies
@current_companies ||= get_companies('current')
end
def education
- unless @education
- @education = []
- if @page.search('.position.education.vevent.vcard').first
- @education = @page.search('.position.education.vevent.vcard').map do |item|
- name = item.at('h3').text.gsub(/\s+|\n/, ' ').strip if item.at('h3')
- desc = item.at('h4').text.gsub(/\s+|\n/, ' ').strip if item.at('h4')
- period = item.at('.period').text.gsub(/\s+|\n/, ' ').strip if item.at('.period')
+ @education ||= @page.search('.position.education.vevent.vcard').map do |item|
+ name = item.at('h3').text.gsub(/\s+|\n/, ' ').strip if item.at('h3')
+ desc = item.at('h4').text.gsub(/\s+|\n/, ' ').strip if item.at('h4')
+ period = item.at('.period').text.gsub(/\s+|\n/, ' ').strip if item.at('.period')
- {:name => name, :description => desc, :period => period}
- end
- end
+ {:name => name, :description => desc, :period => period}
end
- @education
end
def websites
- unless @websites
- @websites = []
- if @page.search('.website').first
- @websites = @page.search('.website').map do |site|
- url = site.at('a')['href']
- url = "http://www.linkedin.com#{url}"
- CGI.parse(URI.parse(url).query)['url']
- end.flatten!
- end
+ @websites ||= @page.search('.website').flat_map do |site|
+ url = "http://www.linkedin.com#{site.at('a')['href']}"
+ CGI.parse(URI.parse(url).query)['url']
end
- @websites
+
end
def groups
- unless @groups
- @groups = []
- if page.search('.group-data').first
- @groups = page.search('.group-data').map do |item|
- name = item.text.gsub(/\s+|\n/, ' ').strip
- link = "http://www.linkedin.com#{item.at('a')['href']}"
- {:name => name, :link => link}
- end
- end
+ @groups ||= @page.search('.group-data').map do |item|
+ name = item.text.gsub(/\s+|\n/, ' ').strip
+ link = "http://www.linkedin.com#{item.at('a')['href']}"
+ {:name => name, :link => link}
end
- @groups
end
def organizations
- unless @organizations
- @organizations = []
- if @page.search('ul.organizations/li.organization').first
- @organizations = @page.search('ul.organizations/li.organization').map do |item|
-
- name = item.search('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
- start_date, end_date = item.search('ul.specifics li').text.gsub(/\s+|\n/, ' ').strip.split(' to ')
- start_date = Date.parse(start_date) rescue nil
- end_date = Date.parse(end_date) rescue nil
- {:name => name, :start_date => start_date, :end_date => end_date}
- end
- end
+ @organizations ||= @page.search('ul.organizations/li.organization').map do |item|
+ name = item.search('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
+ start_date, end_date = item.search('ul.specifics li').text.gsub(/\s+|\n/, ' ').strip.split(' to ')
+ start_date = Date.parse(start_date) rescue nil
+ end_date = Date.parse(end_date) rescue nil
+ {:name => name, :start_date => start_date, :end_date => end_date}
end
- @organizations
end
def languages
- unless @languages
- @languages = []
- if @page.at('ul.languages/li.language')
- @languages = @page.search('ul.languages/li.language').map do |item|
- language = item.at('h3').text rescue nil
- proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, ' ').strip rescue nil
- {:language=> language, :proficiency => proficiency }
- end
- end
+ @languages ||= @page.search('ul.languages/li.language').map do |item|
+ language = item.at('h3').text rescue nil
+ proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, ' ').strip rescue nil
+ {:language=> language, :proficiency => proficiency }
end
- @languages
end
def certifications
- unless @certtifications
- @certifications = []
- if @page.at('ul.certifications/li.certification')
- @certifications = @page.search('ul.certifications/li.certification').map do |item|
+ @certifications ||= @page.search('ul.certifications/li.certification').map do |item|
name = item.at('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
authority = item.at('.specifics/.org').text.gsub(/\s+|\n/, ' ').strip rescue nil
license = item.at('.specifics/.licence-number').text.gsub(/\s+|\n/, ' ').strip rescue nil
start_date = item.at('.specifics/.dtstart').text.gsub(/\s+|\n/, ' ').strip rescue nil
{:name => name, :authority => authority, :license => license, :start_date => start_date}
end
- end
- end
- @certifications
+
end
def recommended_visitors
- unless @recommended_visitors
- @recommended_visitors = []
- if @page.at('.browsemap/.content/ul/li')
- @recommended_visitors = @page.search('.browsemap/.content/ul/li').map do |visitor|
- v = {}
- v[:link] = visitor.at('a')['href']
- v[:name] = visitor.at('strong/a').text
- v[:title] = visitor.at('.headline').text.gsub('...',' ').split(' at ').first
- v[:company] = visitor.at('.headline').text.gsub('...',' ').split(' at ')[1]
- v
- end
- end
+ @recommended_visitors ||= @page.search('.browsemap/.content/ul/li').map do |visitor|
+ v = {}
+ v[:link] = visitor.at('a')['href']
+ v[:name] = visitor.at('strong/a').text
+ v[:title] = visitor.at('.headline').text.gsub('...',' ').split(' at ').first
+ v[:company] = visitor.at('.headline').text.gsub('...',' ').split(' at ')[1]
+ v
end
- @recommended_visitors
end
def to_json
require 'json'
ATTRIBUTES.reduce({}){ |hash,attr| hash[attr.to_sym] = self.send(attr.to_sym);hash }.to_json
@@ -194,14 +152,14 @@
company = {}
company[:title] = node.at('h3').text.gsub(/\s+|\n/, ' ').strip if node.at('h3')
company[:company] = node.at('h4').text.gsub(/\s+|\n/, ' ').strip if node.at('h4')
company[:description] = node.at(".description.#{type}-position").text.gsub(/\s+|\n/, ' ').strip if node.at(".description.#{type}-position")
- start_date = node.at('.dtstart').text.gsub(/\s+|\n/, ' ').strip rescue nil
+
+ start_date = node.at('.dtstart')['title'] rescue nil
company[:start_date] = parse_date(start_date) rescue nil
- end_date = node.at('.dtend').text.gsub(/\s+|\n/, ' ').strip rescue nil
- end_date ||= node.at('.dtstamp').text.gsub(/\s+|\n/, ' ').strip rescue nil
+ end_date = node.at('.dtend')['title'] rescue nil
company[:end_date] = parse_date(end_date) rescue nil
company_link = node.at('h4/strong/a')['href'] if node.at('h4/strong/a')
result = get_company_details(company_link)