lib/relaton_ieee/scrapper.rb in relaton-ieee-1.3.0 vs lib/relaton_ieee/scrapper.rb in relaton-ieee-1.4.0

- old
+ new

@@ -16,11 +16,11 @@ abstract: fetch_abstract(doc), contributor: fetch_contributor(doc), language: ["en"], script: ["Latn"], date: fetch_date(doc), - committee: fetch_committee(doc), + committee: fetch_committee(doc) ) end # rubocop:enable Metrics/MethodLength, Metrics/AbcSize private @@ -28,19 +28,19 @@ # @param title [String] # @return [Array<RelatonBib::TypedTitleString>] def fetch_title(title) [ RelatonBib::TypedTitleString.new( - type: "main", content: title, language: "en", script: "Latn", + type: "main", content: title, language: "en", script: "Latn" ), ] end # @param title [String] # @return [Array<RelatonBib::DocumentIdentifier>] def fetch_docid(title) - /^(?<identifier>\S+)/ =~ title + /^(?<identifier>(?:\w+\s)?\S+)/ =~ title [RelatonBib::DocumentIdentifier.new(id: identifier, type: "IEEE")] end # @param url [String] # @return [Array>RelatonBib::TypedUri>] @@ -76,26 +76,26 @@ # @param doc [Nokogiri::HTML::Document] # @return [Array<RelatonBib::ContributionInfo>] def fetch_contributor(doc) name = doc.at( - "//td[.='IEEE Program Manager']/following-sibling::td/div/a", + "//td[.='IEEE Program Manager']/following-sibling::td/div/a" ) return [] unless name [personn_contrib(name.text)] end # @param name [String] # @return [RelatonBib::ContributionInfo] def personn_contrib(name) fname = RelatonBib::FullName.new( - completename: RelatonBib::LocalizedString.new(name), + completename: RelatonBib::LocalizedString.new(name) ) entity = RelatonBib::Person.new(name: fname) RelatonBib::ContributionInfo.new( - entity: entity, role: [type: "author"], + entity: entity, role: [type: "author"] ) end # @param name [String] # @return [RelatonBib::ContributionInfo] @@ -115,12 +115,12 @@ issued = doc.at "//td[.='Board Approval']/following-sibling::td/div" if issued dates << RelatonBib::BibliographicDate.new(type: "issued", on: issued.text) end - published = doc.at("//td[.='History']/following-sibling::td/div")&. - text&.match(/(?<=Published Date:)[\d-]+/)&.to_s + published = doc.at("//td[.='History']/following-sibling::td/div") + &.text&.match(/(?<=Published Date:)[\d-]+/)&.to_s if published dates << RelatonBib::BibliographicDate.new(type: "published", on: published) end dates @@ -133,9 +133,13 @@ def fetch_committee(doc) committees = [] sponsor = doc.at "//td[.='Sponsor Committee']/following-sibling::td/div" if sponsor committees << Committee.new(type: "sponsor", name: sponsor.text) + end + sponsor = doc.at "//td[.='Standards Committee']/following-sibling::td/div/a" + if sponsor + committees << Committee.new(type: "standard", name: sponsor.text) end working = doc.at "//td[.='Working Group']/following-sibling::td/div" chair = doc.at "//td[.='Working Group Chair']/following-sibling::td/div" if working committees << Committee.new(type: "working", name: working.text,