lib/bolognese/readers/schema_org_reader.rb in bolognese-1.0.33 vs lib/bolognese/readers/schema_org_reader.rb in bolognese-1.0.34
- old
+ new
@@ -14,11 +14,11 @@
def get_schema_org(id: nil, **options)
return { "string" => nil, "state" => "not_found" } unless id.present?
id = normalize_id(id)
- response = Maremma.get(id)
+ response = Maremma.get(id, host: true)
doc = Nokogiri::XML(response.body.fetch("data", nil), nil, 'UTF-8')
# workaround for xhtml documents
nodeset = doc.css("script")
string = nodeset.find { |element| element["type"] == "application/ld+json" }
@@ -66,11 +66,11 @@
publisher = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical"
container = if meta.fetch(ct, nil).present?
url = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "url", first: true)
-
+
{
"type" => (schema_org == "Dataset") ? "DataRepository" : "Periodical",
"title" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "name", first: true),
"identifier" => url,
"identifierType" => url.present? ? "URL" : nil,
@@ -112,10 +112,10 @@
dates = []
dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if meta.fetch("datePublished", nil).present?
dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if meta.fetch("dateCreated", nil).present?
dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if meta.fetch("dateModified", nil).present?
publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
-
+
state = meta.present? || read_options.present? ? "findable" : "not_found"
geo_locations = Array.wrap(meta.fetch("spatialCoverage", nil)).map do |gl|
if gl.dig("geo", "box")
s, w, n, e = gl.dig("geo", "box").split(" ", 4)
geo_location_box = {