lib/linsc/parsers.rb in linsc-0.0.5 vs lib/linsc/parsers.rb in linsc-0.0.6

- old
+ new

@@ -1,65 +1,65 @@ module Parsers def scrape_contact(input_row, page, mode) row = CSV::Row.new(@headers, []) - name = page.at_css("#name")&.text&.split + name = page.at_css("#name").text.split contact_id = input_row["Contact ID"] lin_id = input_row["LIN ID"] cv_tr = input_row["CV TR"] acc_name = input_row["Account Name"] import_status = input_row["Linkedin Import Status"] email = input_row["Email"] lin_profile = input_row["Linkedin Profile"] cand_id = input_row["Candidate ID"] cand_source = input_row["LIN 1st Degree"] - title = page.at_css(".headline.title")&.text - country = page.at_css("#demographics .locality")&.text - sector = page.at_css("#demographics .descriptor:not(.adr)")&.text + title = page.at_css(".headline.title").text + country = page.at_css("#demographics .locality").text + sector = page.at_css("#demographics .descriptor:not(.adr)").text positions = page.css("#experience .positions .position") if positions - e1_title = positions[0]&.at_css(".item-title")&.text - e1_org = positions[0]&.at_css(".item-subtitle")&.text - e1_start = positions[0]&.css(".date-range time")[0]&.text - e1_end = positions[0]&.css(".date-range time")[1]&.text - e1_loc = positions[0]&.at_css(".location")&.text - e1_desc = positions[0]&.at_css(".description")&.text - e2_title = positions[1]&.at_css(".item-title")&.text - e2_org = positions[1]&.at_css(".item-subtitle")&.text - e2_start = positions[1]&.css(".date-range time")[0]&.text - e2_end = positions[1]&.css(".date-range time")[1]&.text - e2_loc = positions[1]&.at_css(".location")&.text - e2_desc = positions[1]&.at_css(".description")&.text - e3_title = positions[2]&.at_css(".item-title")&.text - e3_org = positions[2]&.at_css(".item-subtitle")&.text - e3_start = positions[2]&.css(".date-range time")[0]&.text - e3_end = positions[2]&.css(".date-range time")[1]&.text - e3_loc = positions[2]&.at_css(".location")&.text - e3_desc = positions[2]&.at_css(".description")&.text + e1_title = positions[0].at_css(".item-title").text + e1_org = positions[0].at_css(".item-subtitle").text + e1_start = positions[0].css(".date-range time")[0].text + e1_end = positions[0].css(".date-range time")[1].text + e1_loc = positions[0].at_css(".location").text + e1_desc = positions[0].at_css(".description").text + e2_title = positions[1].at_css(".item-title").text + e2_org = positions[1].at_css(".item-subtitle").text + e2_start = positions[1].css(".date-range time")[0].text + e2_end = positions[1].css(".date-range time")[1].text + e2_loc = positions[1].at_css(".location").text + e2_desc = positions[1].at_css(".description").text + e3_title = positions[2].at_css(".item-title").text + e3_org = positions[2].at_css(".item-subtitle").text + e3_start = positions[2].css(".date-range time")[0].text + e3_end = positions[2].css(".date-range time")[1].text + e3_loc = positions[2].at_css(".location").text + e3_desc = positions[2].at_css(".description").text end certs = page.css(".certifications .certification") if certs - c1_name = certs[0]&.at_css(".item-title")&.text - c2_name = certs[1]&.at_css(".item-title")&.text - c_type = certs[0]&.at_css(".item-subtitle")&.text + c1_name = certs[0].at_css(".item-title").text + c2_name = certs[1].at_css(".item-title").text + c_type = certs[0].at_css(".item-subtitle").text end schools = page.css("#education .schools .school") if schools - s1_name = schools[0]&.at_css(".item-title")&.text - s2_name = schools[1]&.at_css(".item-title")&.text - s1_start = schools[0]&.css(".date-range time")[0]&.text - s2_start = schools[1]&.css(".date-range time")[0]&.text - s1_end = schools[0]&.css(".date-range time")[1]&.text - s2_end = schools[1]&.css(".date-range time")[1]&.text - s1_degree = schools[0]&.at_css(".item-subtitle")&.text - s2_degree = schools[1]&.at_css(".item-subtitle")&.text + s1_name = schools[0].at_css(".item-title").text + s2_name = schools[1].at_css(".item-title").text + s1_start = schools[0].css(".date-range time")[0].text + s2_start = schools[1].css(".date-range time")[0].text + s1_end = schools[0].css(".date-range time")[1].text + s2_end = schools[1].css(".date-range time")[1].text + s1_degree = schools[0].at_css(".item-subtitle").text + s2_degree = schools[1].at_css(".item-subtitle").text end summary = page.at_css("#summary .description") - summary&.css('br').each{|br| br.replace "\n"} if summary + summary.css('br').each{|br| br.replace "\n"} if summary text_resume = "\n\n***IMPORTED FROM LINKEDIN***\n#{lin_profile}\n\n" text_resume += name.join(" ") text_resume += "\n#{email}" text_resume += "\nTitle: #{title}" if title @@ -103,31 +103,31 @@ end interests = page.css("#interests .pills .interest") text_resume += "\nINTERESTS\n" if interests && interests.length > 0 ints = [] interests.each do |interest| - int = interest.at_css(".wrap")&.text + int = interest.at_css(".wrap").text if int ints << int unless (int == "See less") || (int.match(/See \d+\+/)) end end text_resume += "#{ints.join(", ")}\n\n" skills = page.css("#skills .pills .skill") text_resume += "\n\nSKILLS\n" if skills && skills.length > 0 sks = [] skills.each do |skill| - sk = skill.at_css(".wrap")&.text + sk = skill.at_css(".wrap").text if sk sks << sk unless (sk == "See less") || (sk.match(/See \d+\+/)) end end text_resume += "#{sks.join(", ")}\n\n" languages = page.css("#languages .language") text_resume += "\n\nLANGUAGES\n" if languages.length > 0 langs = [] languages.each do |language| - lang = language.at_css(".name")&.text + lang = language.at_css(".name").text prof = language.at_css(".proficiency") lang += " (#{prof.text})" if prof && prof.text.length > 0 langs << lang if lang end text_resume += "#{langs.join(", ")}\n\n" @@ -228,47 +228,47 @@ row["Contact ID"] = contact_id row["LIN ID"] = lin_id row["CV TR"] = "1" row["Account Name"] = acc_name row["Linkedin Import Status"] = import_status - row["First Name"] = name[0]&.slice(0, 39) - row["Last Name"] = name[1..-1]&.join(" ")&.slice(0, 79) + row["First Name"] = name[0].slice(0, 39) + row["Last Name"] = name[1..-1].join(" ").slice(0, 79) row["Email"] = email row["Candidate ID"] = cand_id row["LIN 1st Degree"] = cand_source - row["Title"] = title&.slice(0, 127) + row["Title"] = title.slice(0, 127) row["Contact Country"] = country - row["Contact LIN Sector"] = sector&.slice(0, 99) - row["Employer 1 Title"] = e1_title&.slice(0, 31999) - row["Employer Organization Name 1"] = e1_org&.slice(0, 254) + row["Contact LIN Sector"] = sector.slice(0, 99) + row["Employer 1 Title"] = e1_title.slice(0, 31999) + row["Employer Organization Name 1"] = e1_org.slice(0, 254) row["Employer 1 Start Date"] = format_date(e1_start) #format row["Employer 1 End Date"] = format_date(e1_end) #format - row["Employer 1 Location"] = e1_loc&.slice(0, 254) - row["Employer 1 Description"] = e1_desc&.slice(0, 31999) - row["Employer 2 Title"] = e2_title&.slice(0, 31999) - row["Employer Organization Name 2"] = e2_org&.slice(0, 254) + row["Employer 1 Location"] = e1_loc.slice(0, 254) + row["Employer 1 Description"] = e1_desc.slice(0, 31999) + row["Employer 2 Title"] = e2_title.slice(0, 31999) + row["Employer Organization Name 2"] = e2_org.slice(0, 254) row["Employer 2 Start Date"] = format_date(e2_start) #format row["Employer 2 End Date"] = format_date(e2_end) #format - row["Employer 2 Location"] = e2_loc&.slice(0, 254) - row["Employer 2 Description"] = e2_desc&.slice(0, 31999) - row["Employer 3 Title"] = e3_title&.slice(0, 31999) - row["Employer Organization Name 3"] = e3_org&.slice(0, 254) + row["Employer 2 Location"] = e2_loc.slice(0, 254) + row["Employer 2 Description"] = e2_desc.slice(0, 31999) + row["Employer 3 Title"] = e3_title.slice(0, 31999) + row["Employer Organization Name 3"] = e3_org.slice(0, 254) row["Employer 3 Start Date"] = format_date(e3_start) #format row["Employer 3 End Date"] = format_date(e3_end) #format - row["Employer 3 Location"] = e3_loc&.slice(0, 254) - row["Employer 3 Description"] = e3_desc&.slice(0, 31999) - row["License or Certification Name 1"] = c1_name&.slice(0, 254) - row["License or Certification Name 2"] = c2_name&.slice(0, 254) - row["License or Certification Credential Type"] = c_type&.slice(0, 254) - row["Education School 1"] = s1_name&.slice(0, 124) - row["Education Degree Name 1"] = s1_degree&.slice(0, 254) + row["Employer 3 Location"] = e3_loc.slice(0, 254) + row["Employer 3 Description"] = e3_desc.slice(0, 31999) + row["License or Certification Name 1"] = c1_name.slice(0, 254) + row["License or Certification Name 2"] = c2_name.slice(0, 254) + row["License or Certification Credential Type"] = c_type.slice(0, 254) + row["Education School 1"] = s1_name.slice(0, 124) + row["Education Degree Name 1"] = s1_degree.slice(0, 254) row["Education Degree Date 1"] = format_date(s1_end) - row["Education School 2"] = s2_name&.slice(0, 124) - row["Education Degree Name 2"] = s2_degree&.slice(0, 254) + row["Education School 2"] = s2_name.slice(0, 124) + row["Education Degree Name 2"] = s2_degree.slice(0, 254) row["Education Degree Date 2"] = format_date(s2_end) - row["Text Resume"] = text_resume&.slice(0, 31999) - row["LinkedIn Profile"] = lin_profile&.slice(0, 254) + row["Text Resume"] = text_resume.slice(0, 31999) + row["LinkedIn Profile"] = lin_profile.slice(0, 254) row["Resume Last Updated"] = Time.now.strftime('%Y-%m-%d %H:%M:%S') row["LIN Import Date"] = Time.now.strftime('%Y-%m-%d') row["CV Uploaded"] = "1" row @@ -279,11 +279,11 @@ rows = [] schools = page.css("#education .schools .school") schools.each do |school| row = CSV::Row.new(@education_headers, []) - row["Contact"] = input_row["Contact ID"] + row["Contact ID"] = input_row["Contact ID"] row["LIN ID"] = input_row["LIN ID"] row["School Name"] = school.at_css(".item-title").text.slice(0, 149) row["Major"] = school.at_css(".item-subtitle").text.slice(0, 254) dstart = school.css(".date-range time")[0] dend = school.css(".date-range time")[1] @@ -301,10 +301,10 @@ rows = [] positions = page.css("#experience .positions .position") positions.each do |position| row = CSV::Row.new(@employment_headers, []) - row["Contact"] = input_row["Contact ID"] + row["Contact ID"] = input_row["Contact ID"] row["LIN ID"] = input_row["LIN ID"] row["Job Title"] = position.at_css(".item-title").text.slice(0, 74) row["Employer Name"] = position.at_css(".item-subtitle").text.slice(0, 149) jstart = position.css(".date-range time")[0] jend = position.css(".date-range time")[1]