profile.rb in linkedin-scraper-0.0.10

- old
+ new

@@ -45,17 +45,17 @@
         @agent = Mechanize.new
         @agent.user_agent_alias = USER_AGENTS.sample
         @agent.max_history = 0
         page = @agent.get(url)
         return Linkedin::Profile.new(page, url)
-      rescue=>e
+      rescue => e
         puts e
       end
     end
 
     def get_skills(page)
-      page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text}
+      page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text} rescue nil
     end
 
     def get_company_url(node)
       result={}
       if node.at("h4/strong/a")
@@ -186,103 +186,117 @@
 
     def get_organizations(page)
       organizations = []
       # if the profile contains org data
       if page.search('ul.organizations li.organization').first
-
         # loop over each element with org data
         page.search('ul.organizations li.organization').each do |item|
-          # find the h3 element within the above section and get the text with excess white space stripped
-          name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
-          position = nil # add this later
-          occupation = nil # add this latetr too, this relates to the experience/work
-          start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
-          if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
-            end_date = nil
-          else
-            Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
-          end
 
-          organizations << { name: name, start_date: start_date, end_date: end_date }
-        end
+          begin
+            # find the h3 element within the above section and get the text with excess white space stripped
+            name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
+            position = nil # add this later
+            occupation = nil # add this latetr too, this relates to the experience/work
+            start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
+            if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
+              end_date = nil
+            else
+              Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
+            end
 
+            organizations << { name: name, start_date: start_date, end_date: end_date }
+          rescue => e
+
+          end
+        end
         return organizations
-      end # page.search('ul.organizations li.organization').first
+      end
     end
 
     def get_languages(page)
       languages = []
       # if the profile contains org data
       if page.search('ul.languages li.language').first
 
         # loop over each element with org data
         page.search('ul.languages li.language').each do |item|
-          # find the h3 element within the above section and get the text with excess white space stripped
-          language = item.at('h3').text
-          proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, " ").strip
-          languages << { language:language, proficiency:proficiency }
+          begin
+            # find the h3 element within the above section and get the text with excess white space stripped
+            language = item.at('h3').text
+            proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, " ").strip
+            languages << { language:language, proficiency:proficiency }
+          rescue => e
+          end
         end
 
         return languages
       end # page.search('ul.organizations li.organization').first
     end
 
     def get_certifications(page)
       certifications = []
+
       # search string to use with Nokogiri
       query = 'ul.certifications li.certification'
       months = 'January|February|March|April|May|June|July|August|September|November|December'
       regex = /(#{months}) (\d{4})/
 
       # if the profile contains cert data
       if page.search(query).first
 
         # loop over each element with cert data
         page.search(query).each do |item|
-          item_text = item.text.gsub(/\s+|\n/, " ").strip
-          name = item_text.split(" #{item_text.scan(/#{months} \d{4}/)[0]}")[0]
-          authority = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
-          license = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
-          start_date = Date.parse(item_text.scan(regex)[0].join(' '))
+          begin
+            item_text = item.text.gsub(/\s+|\n/, " ").strip
+            name = item_text.split(" #{item_text.scan(/#{months} \d{4}/)[0]}")[0]
+            authority = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
+            license = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
+            start_date = Date.parse(item_text.scan(regex)[0].join(' '))
 
-          includes_end_date = item_text.scan(regex).count > 1
-          end_date = includes_end_date ? Date.parse(item_text.scan(regex)[0].join(' ')) : nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
+            includes_end_date = item_text.scan(regex).count > 1
+            end_date = includes_end_date ? Date.parse(item_text.scan(regex)[0].join(' ')) : nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
 
-          certifications << { name:name, authority:authority, license:license, start_date:start_date, end_date:end_date }
+            certifications << { name:name, authority:authority, license:license, start_date:start_date, end_date:end_date }
+          rescue => e
+          end
         end
         return certifications
       end
+
     end
 
 
     def get_organizations(page)
       organizations = []
       # if the profile contains org data
       if page.search('ul.organizations li.organization').first
 
         # loop over each element with org data
         page.search('ul.organizations li.organization').each do |item|
-          # find the h3 element within the above section and get the text with excess white space stripped
-          name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
-          position = nil # add this later
-          occupation = nil # add this latetr too, this relates to the experience/work
-          start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
-          if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
-            end_date = nil
-          else
-            Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
-          end
+          begin
+            # find the h3 element within the above section and get the text with excess white space stripped
+            name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
+            position = nil # add this later
+            occupation = nil # add this latetr too, this relates to the experience/work
+            start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
+            if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
+              end_date = nil
+            else
+              Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
+            end
 
-          organizations << { name: name, start_date: start_date, end_date: end_date }
+            organizations << { name: name, start_date: start_date, end_date: end_date }
+          rescue => e
+          end
         end
-
-        return organizations
-      end # page.search('ul.organizations li.organization').first
+      end
+      return organizations
     end
 
 
 
+
     def get_recommended_visitors(page)
       recommended_vs=[]
       if page.search(".browsemap").first
         page.at(".browsemap").at("ul").search("li").each do |visitor|
           v = {}
@@ -293,7 +307,8 @@
           recommended_vs << v
         end
         return recommended_vs
       end
     end
+
   end
 end