lib/relaton_ieee/scrapper.rb in relaton-ieee-1.3.0 vs lib/relaton_ieee/scrapper.rb in relaton-ieee-1.4.0
- old
+ new
@@ -16,11 +16,11 @@
abstract: fetch_abstract(doc),
contributor: fetch_contributor(doc),
language: ["en"],
script: ["Latn"],
date: fetch_date(doc),
- committee: fetch_committee(doc),
+ committee: fetch_committee(doc)
)
end
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
private
@@ -28,19 +28,19 @@
# @param title [String]
# @return [Array<RelatonBib::TypedTitleString>]
def fetch_title(title)
[
RelatonBib::TypedTitleString.new(
- type: "main", content: title, language: "en", script: "Latn",
+ type: "main", content: title, language: "en", script: "Latn"
),
]
end
# @param title [String]
# @return [Array<RelatonBib::DocumentIdentifier>]
def fetch_docid(title)
- /^(?<identifier>\S+)/ =~ title
+ /^(?<identifier>(?:\w+\s)?\S+)/ =~ title
[RelatonBib::DocumentIdentifier.new(id: identifier, type: "IEEE")]
end
# @param url [String]
# @return [Array>RelatonBib::TypedUri>]
@@ -76,26 +76,26 @@
# @param doc [Nokogiri::HTML::Document]
# @return [Array<RelatonBib::ContributionInfo>]
def fetch_contributor(doc)
name = doc.at(
- "//td[.='IEEE Program Manager']/following-sibling::td/div/a",
+ "//td[.='IEEE Program Manager']/following-sibling::td/div/a"
)
return [] unless name
[personn_contrib(name.text)]
end
# @param name [String]
# @return [RelatonBib::ContributionInfo]
def personn_contrib(name)
fname = RelatonBib::FullName.new(
- completename: RelatonBib::LocalizedString.new(name),
+ completename: RelatonBib::LocalizedString.new(name)
)
entity = RelatonBib::Person.new(name: fname)
RelatonBib::ContributionInfo.new(
- entity: entity, role: [type: "author"],
+ entity: entity, role: [type: "author"]
)
end
# @param name [String]
# @return [RelatonBib::ContributionInfo]
@@ -115,12 +115,12 @@
issued = doc.at "//td[.='Board Approval']/following-sibling::td/div"
if issued
dates << RelatonBib::BibliographicDate.new(type: "issued",
on: issued.text)
end
- published = doc.at("//td[.='History']/following-sibling::td/div")&.
- text&.match(/(?<=Published Date:)[\d-]+/)&.to_s
+ published = doc.at("//td[.='History']/following-sibling::td/div")
+ &.text&.match(/(?<=Published Date:)[\d-]+/)&.to_s
if published
dates << RelatonBib::BibliographicDate.new(type: "published",
on: published)
end
dates
@@ -133,9 +133,13 @@
def fetch_committee(doc)
committees = []
sponsor = doc.at "//td[.='Sponsor Committee']/following-sibling::td/div"
if sponsor
committees << Committee.new(type: "sponsor", name: sponsor.text)
+ end
+ sponsor = doc.at "//td[.='Standards Committee']/following-sibling::td/div/a"
+ if sponsor
+ committees << Committee.new(type: "standard", name: sponsor.text)
end
working = doc.at "//td[.='Working Group']/following-sibling::td/div"
chair = doc.at "//td[.='Working Group Chair']/following-sibling::td/div"
if working
committees << Committee.new(type: "working", name: working.text,