# encoding: UTF-8 $:.unshift "." require 'spec_helper' describe RDF::Tabular::Metadata do let(:logger) {RDF::Spec.logger} before(:each) do logger.formatter = lambda {|severity, datetime, progname, msg| "#{severity}: #{msg}\n"} WebMock.stub_request(:any, %r(.*example.org.*)). to_return(lambda {|request| file = request.uri.to_s.split('/').last content_type = case file when /\.json/ then 'application/json' when /\.csv/ then 'text/csv' else 'text/plain' end case file when "csv-metadata.json", "country-codes-and-names.csv-metadata.json" {status: 401} else { body: File.read(File.expand_path("../data/#{file}", __FILE__)), status: 200, headers: {'Content-Type' => content_type} } end }) end shared_examples "inherited properties" do |allowed = true| { aboutUrl: { valid: ["http://example.org/example.csv#row={_row}", "http://example.org/tree/{on%2Dstreet}/{GID}", "#row.{_row}"], invalid: [1, true, nil, %w(foo bar)] }, datatype: { valid: (%w(anyAtomicType string token language Name NCName boolean gYear number binary datetime any xml html json) + [{"base" => "string"}] ), invalid: [1, true, "http://example.org/", {"base" => "foo"}, {"base" => "anyType"}, {"base" => "anySimpleType"}, {"base" => "IDREFS"}, ], errors: [{"@id" => "_:foo"}, {"@id" => "http://www.w3.org/2001/XMLSchema#string"}, ] }, default: { valid: ["foo"], invalid: [1, %w(foo bar), true, nil] }, lang: { valid: %w(en en-US), invalid: %w(1 foo) }, null: { valid: ["foo", %w(foo bar)], invalid: [1, true, {}] }, ordered: { valid: [true, false], invalid: [nil, "foo", 1, 0, "true", "false", "TrUe", "fAlSe", "1", "0"], }, propertyUrl: { valid: [ "http://example.org/example.csv#col={_name}", "http://example.org/tree/{on%2Dstreet}/{GID}", "#row.{_row}" ], invalid: [1, true, %w(foo bar)] }, required: { valid: [true, false], invalid: [nil, "foo", 1, 0, "true", "false", "TrUe", "fAlSe", "1", "0"], }, separator: { valid: %w(, a | : foo ::) + [nil], invalid: [1, false] }, "textDirection" => { valid: %w(rtl ltr), invalid: %w(foo default) }, valueUrl: { valid: [ "http://example.org/example.csv#row={_row}", "http://example.org/tree/{on%2Dstreet}/{GID}", "#row.{_row}" ], invalid: [1, true, nil, %w(foo bar)] }, }.each do |prop, params| context prop.to_s do if allowed it "validates" do params.fetch(:valid, {}).each do |v| logger.clear subject.send("#{prop}=".to_sym, v) expect(subject).to be_valid # Causes re-validation expect(logger.to_s).not_to match(/ERROR|WARN/) end end it "invalidates" do params.fetch(:invalid, {}).each do |v| logger.clear subject.send("#{prop}=".to_sym, v) expect(subject).to be_valid # Causes re-validation expect(logger.to_s).not_to include "ERROR" end end it "errors" do params.fetch(:error, {}).each do |v| logger.clear subject.send("#{prop}=".to_sym, v) expect(subject).not_to be_valid # Causes re-validation expect(logger.to_s).to include "ERROR" end end else it "does not allow" do params[:valid].each do |v| logger.clear subject.send("#{prop}=".to_sym, v) expect(subject).to be_valid # Causes re-validation expect(logger.to_s).not_to include "ERROR" expect(logger.to_s).to include "WARN" end end end end end end shared_examples "common properties" do |allowed = true| let(:valid) {%w(dc:description dcat:keyword http://schema.org/copyrightHolder)} let(:invalid) {%w(foo bar:baz)} if allowed context "valid JSON-LD" do it "allows defined prefixed names and absolute URIs" do valid.each do |v| logger.clear subject[v.to_sym] = "foo" expect(subject).to be_valid # Causes re-validation expect(logger.to_s).not_to match(/ERROR|WARN/) end end { "value object" => %({"@value": "foo"}), "value with type" => %({"@value": "1", "@type": "xsd:integer"}), "value with language" => %({"@value": "foo", "@language": "en"}), "node" => %({"@id": "http://example/foo"}), "node with pname type" => %({"@type": "foaf:Person"}), "node with URL type" => %({"@type": "http://example/Person"}), "node with array type" => %({"@type": ["schema:Person", "foaf:Person"]}), "node with term type" => %({"@type": "Table"}), "node with term property" => %({"csvw:name": "foo"}), "boolean value" => true, "integer value" => 1, "double value" => 1.1, }.each do |name, value| specify(name) { subject["dc:object"] = value.is_a?(String) ? ::JSON.parse(value) : value expect(subject).to be_valid # Causes re-validation expect(logger.to_s).not_to match(/ERROR|WARN/) } end end context "invalid JSON-LD" do it "Does not allow unknown prefxies or unprefixed names" do invalid.each do |v| logger.clear subject[v.to_sym] = "foo" expect(subject).to be_valid # Causes re-validation expect(logger.to_s).not_to include "ERROR" expect(logger.to_s).to include "WARN" end end { "value with type and language" => %({"@value": "foo", "@type": "xsd:token", "@language": "en"}), "@id and @value" => %({"@id": "http://example/", "@value": "foo"}), "value with BNode @id" => %({"@id": "_:foo"}), "value with BNode @type" => %({"@type": "_:foo"}), "value with BNode property" => %({"_:foo": "bar"}), "value with @context" => %({"@context": {}, "@id": "http://example/"}), "value with @graph" => %({"@graph": {}}), }.each do |name, value| specify(name) { subject["dc:object"] = ::JSON.parse(value) expect(subject).not_to be_valid expect(logger.to_s).to include "ERROR" } end end else it "Does not allow defined prefixed names and absolute URIs" do (valid + invalid).each do |v| subject[v.to_sym] = "foo" expect(subject).to be_valid # Causes re-validation expect(logger.to_s).not_to include "ERROR" expect(logger.to_s).to include "WARN" end end end end describe RDF::Tabular::Column do subject {described_class.new({"name" => "foo"}, context: "http://www.w3.org/ns/csvw", base: RDF::URI("http://example.org/base"), logger: logger)} specify {is_expected.to be_valid} it_behaves_like("inherited properties") it_behaves_like("common properties") it "allows valid name" do %w( name abc.123 _col.1 ).each {|v| expect(described_class.new({"name" => v}, logger: logger)).to be_valid} expect(logger.to_s).not_to match(/ERROR|WARN/) end it "detects invalid names" do [1, true, nil, "_foo", "_col=1"].each do |v| md = described_class.new({"name" => v}, logger: logger) expect(md).to be_valid expect(logger.to_s).not_to include "ERROR" expect(logger.to_s).to include "WARN" end end it "allows absence of name" do expect(described_class.new({"@type" => "Column"}, logger: logger)).to be_valid expect(described_class.new({"@type" => "Column"}, logger: logger).name).to eql '_col.0' expect(logger.to_s).not_to match(/ERROR|WARN/) end its(:type) {is_expected.to eql :Column} { titles: { valid: ["foo", %w(foo bar), {"en" => "foo", "de" => "bar"}], warning: [1, true, nil] }, suppressOutput: { valid: [true, false], warning: [nil, "foo", 1, 0, "true", "false", "TrUe", "fAlSe", "1", "0"], }, virtual: { valid: [true, false], warning: [nil, 1, 0, "true", "false", "TrUe", "fAlSe", "1", "0", "foo"], }, }.each do |prop, params| context prop.to_s do it "validates" do params[:valid].each do |v| subject.send("#{prop}=".to_sym, v) expect(subject).to be_valid expect(logger.to_s).not_to include "ERROR" expect(logger.to_s).not_to include "WARN" end end it "invalidates" do params[:invalid].each do |v| subject.send("#{prop}=".to_sym, v) expect(subject).not_to be_valid expect(logger.to_s).to include "ERROR" end end if params[:invalid] it "warnings" do params[:warning].each do |v| subject.send("#{prop}=".to_sym, v) expect(subject).to be_valid expect(logger.to_s).not_to include "ERROR" expect(logger.to_s).to include "WARN" end end if params[:warning] end end context "titles" do { string: ["foo", {"und" => ["foo"]}], }.each do |name, (input, output)| it name do subject.titles = input expect(subject.normalize!.titles).to produce(output) expect(logger.to_s).not_to match(/ERROR|WARN/) end end end end describe RDF::Tabular::Schema do subject {described_class.new({}, context: "http://www.w3.org/ns/csvw", base: RDF::URI("http://example.org/base"), logger: logger)} specify {is_expected.to be_valid} it_behaves_like("inherited properties") it_behaves_like("common properties") its(:type) {is_expected.to eql :Schema} describe "columns" do let(:column) {{"name" => "foo"}} subject {described_class.new({"columns" => []}, base: RDF::URI("http://example.org/base"), logger: logger)} it {is_expected.to be_valid} its(:type) {is_expected.to eql :Schema} it "allows a valid column" do v = described_class.new({"columns" => [column]}, base: RDF::URI("http://example.org/base"), logger: logger) expect(v).to be_valid expect(logger.to_s).not_to include "ERROR" end it "is invalid with an invalid column" do v = described_class.new({"columns" => [{"name" => "_invalid"}]}, base: RDF::URI("http://example.org/base"), logger: logger) expect(v).to be_valid expect(logger.to_s).not_to include "ERROR" expect(logger.to_s).to include "WARN" end it "is invalid with an non-unique columns" do v = described_class.new({"columns" => [column, column]}, base: RDF::URI("http://example.org/base"), logger: logger) expect(v).not_to be_valid expect(logger.to_s).to include "ERROR" end end describe "primaryKey" do let(:column) {{"name" => "foo"}} let(:column2) {{"name" => "bar"}} subject {described_class.new({"columns" => [column], "primaryKey" => column["name"]}, base: RDF::URI("http://example.org/base"), logger: logger)} specify {is_expected.to be_valid} its(:type) {is_expected.to eql :Schema} it "is valid if referenced column does not exist" do subject[:columns] = [] expect(subject).to be_valid expect(logger.to_s).not_to include "ERROR" expect(logger.to_s).to include "WARN" end it "is valid with multiple names" do v = described_class.new({ "columns" => [column, column2], "primaryKey" => [column["name"], column2["name"]]}, base: RDF::URI("http://example.org/base"), logger: logger) expect(v).to be_valid expect(logger.to_s).not_to match(/ERROR|WARN/) end it "is valid with multiple names if any column missing" do v = described_class.new({ "columns" => [column], "primaryKey" => [column["name"], column2["name"]]}, base: RDF::URI("http://example.org/base"), logger: logger) expect(v).to be_valid expect(logger.to_s).to include "WARN" end end describe "foreignKeys" do subject { RDF::Tabular::TableGroup.new({ "@context" => 'http://www.w3.org/ns/csvw', tables: [{ url: "a", tableSchema: { "@id" => "a_s", columns: [{name: "a1"}, {name: "a2"}], foreignKeys: [] } }, { url: "b", tableSchema: { "@id" => "b_s", columns: [{name: "b1"}, {name: "b2"}], foreignKeys: [] } }]}, base: RDF::URI("http://example.org/base"), logger: logger ) } context "valid" do { "references single column with resource" => { "columnReference" => "a1", "reference" => { "resource" => "b", "columnReference" => "b1" } }, "references multiple columns with resource" => { "columnReference" => ["a1", "a2"], "reference" => { "resource" => "b", "columnReference" => ["b1", "b2"] } }, "references single column with schemaReference" => { "columnReference" => "a1", "reference" => { "schemaReference" => "b_s", "columnReference" => "b1" } } }.each do |name, fk| it name do subject.tables.first.tableSchema.foreignKeys << fk subject.normalize! expect(subject).to be_valid expect(logger.to_s).not_to match(/ERROR|WARN/) end end end context "invalid" do { "missing source column" => { "columnReference" => "not_here", "reference" => { "resource" => "b", "columnReference" => "b1" } }, "one missing source column" => { "columnReference" => ["a1", "not_here"], "reference" => { "resource" => "b", "columnReference" => ["b1", "b2"] } }, "missing destination column" => { "columnReference" => "a1", "reference" => { "resource" => "b", "columnReference" => "not_there" } }, "missing resource" => { "columnReference" => "a1", "reference" => { "resource" => "not_here", "columnReference" => "b1" } }, "missing tableSchema" => { "columnReference" => "a1", "reference" => { "schemaReference" => "not_here", "columnReference" => "b1" } }, "both resource and tableSchema" => { "columnReference" => "a1", "reference" => { "resource" => "b", "schemaReference" => "b_s", "columnReference" => "b1" } }, }.each do |name, fk| it name do subject.tables.first.tableSchema.foreignKeys << fk subject.normalize! subject.inspect expect(subject).not_to be_valid expect(logger.to_s).to include "ERROR" end end end end end describe RDF::Tabular::Transformation do let(:targetFormat) {"http://example.org/targetFormat"} let(:scriptFormat) {"http://example.org/scriptFormat"} subject { described_class.new({ "url" => "http://example/", "targetFormat" => targetFormat, "scriptFormat" => scriptFormat}, context: "http://www.w3.org/ns/csvw", base: RDF::URI("http://example.org/base"), logger: logger) } specify {is_expected.to be_valid} it_behaves_like("inherited properties", false) it_behaves_like("common properties") its(:type) {is_expected.to eql :Template} { source: { valid: %w(json rdf) + [nil], warning: [1, true, {}] }, }.each do |prop, params| context prop.to_s do it "validates" do params[:valid].each do |v| subject.send("#{prop}=".to_sym, v) expect(subject).to be_valid end expect(logger.to_s).not_to match(/ERROR|WARN/) end it "warnings" do params[:warning].each do |v| subject.send("#{prop}=".to_sym, v) expect(subject).to be_valid expect(logger.to_s).to include "WARN" end end end end context "titles" do { string: ["foo", {"und" => ["foo"]}], }.each do |name, (input, output)| it name do subject.titles = input expect(subject.normalize!.titles).to produce(output) expect(logger.to_s).not_to match(/ERROR|WARN/) end end end end describe RDF::Tabular::Dialect do subject {described_class.new({}, context: "http://www.w3.org/ns/csvw", base: RDF::URI("http://example.org/base"), logger: logger)} specify {is_expected.to be_valid} it_behaves_like("inherited properties", false) it_behaves_like("common properties", false) its(:type) {is_expected.to eql :Dialect} described_class.const_get(:DEFAULTS).each do |p, v| context "#{p}" do it "retrieves #{v.inspect} by default" do expect(logger.to_s).not_to match(/ERROR|WARN/) expect(subject.send(p)).to eql v end it "retrieves set value" do subject[p] = "foo" expect(subject.send(p)).to eql "foo" expect(logger.to_s).not_to match(/ERROR|WARN/) end end end describe "#embedded_metadata" do { "with defaults" => { input: "https://example.org/countries.csv", result: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "https://example.org/countries.csv", "tableSchema": { "@type": "Schema", "columns": [ {"titles": {"und": ["countryCode"]}}, {"titles": {"und": ["latitude"]}}, {"titles": {"und": ["longitude"]}}, {"titles": {"und": ["name"]}} ] } }) }, "with skipRows" => { input: "https://example.org/countries.csv", dialect: {skipRows: 1}, result: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "https://example.org/countries.csv", "tableSchema": { "@type": "Schema", "columns": [ {"titles": {"und": ["AD"]}}, {"titles": {"und": ["42.546245"]}}, {"titles": {"und": ["1.601554"]}}, {"titles": {"und": ["Andorra"]}} ] }, "rdfs:comment": ["countryCode,latitude,longitude,name"] }) }, "delimiter" => { input: "https://example.org/tree-ops.tsv", dialect: {delimiter: "\t"}, result: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "https://example.org/tree-ops.tsv", "tableSchema": { "@type": "Schema", "columns": [ {"titles": {"und": ["GID"]}}, {"titles": {"und": ["On Street"]}}, {"titles": {"und": ["Species"]}}, {"titles": {"und": ["Trim Cycle"]}}, {"titles": {"und": ["Inventory Date"]}} ] } }) }, }.each do |name, props| it name do dialect = if props[:dialect] described_class.new(props[:dialect], logger: logger) else subject end result = dialect.embedded_metadata(props[:input], nil, base: RDF::URI("http://example.org/base")) expect(::JSON.parse(result.to_json(JSON_STATE))).to produce(::JSON.parse(props[:result]), logger) expect(logger.to_s).not_to match(/ERROR|WARN/) end end end end describe RDF::Tabular::Table do subject {described_class.new({"url" => "http://example.org/table.csv"}, context: "http://www.w3.org/ns/csvw", base: RDF::URI("http://example.org/base"), logger: logger)} specify {is_expected.to be_valid} it_behaves_like("inherited properties") it_behaves_like("common properties") its(:type) {is_expected.to eql :Table} describe "#to_table_group" do it "should be tested" end { tableSchema: { valid: [RDF::Tabular::Schema.new({})], warning: [1, true, nil] }, notes: { valid: [{}, [{}]], invalid: [1, true, nil] }, tableDirection: { valid: %w(rtl ltr auto), warning: %w(foo true 1) }, transformations: { valid: [[RDF::Tabular::Transformation.new({url: "http://example", targetFormat: "http://example", scriptFormat: "http://example/"}, context: "http://www.w3.org/ns/csvw", base: RDF::URI("http://example.org/base"))]], warning: [RDF::Tabular::Transformation.new({url: "http://example", targetFormat: "http://example", scriptFormat: "http://example/"}, context: "http://www.w3.org/ns/csvw", base: RDF::URI("http://example.org/base"))] + %w(foo true 1) }, dialect: { valid: [{skipRows: 1}], warning: [1] }, suppressOutput: { valid: [true, false], warning: [nil, "foo", 1, 0, "true", "false", "TrUe", "fAlSe", "1", "0"], }, }.each do |prop, params| context prop.to_s do it "validates" do params[:valid].each do |v| subject.send("#{prop}=".to_sym, v) expect(subject).to be_valid expect(logger.to_s).not_to match(/ERROR|WARN/) end end it "invalidates" do params[:invalid].each do |v| subject.send("#{prop}=".to_sym, v) expect(subject).not_to be_valid expect(logger.to_s).to include("ERROR") end end if params[:invalid] it "warnings" do params[:warning].each do |v| subject.send("#{prop}=".to_sym, v) expect(subject).to be_valid expect(logger.to_s).to include("WARN") end end if params[:warning] end end end describe RDF::Tabular::TableGroup do let(:table) {{"url" => "http://example.org/table.csv"}} subject {described_class.new({"tables" => [table]}, context: "http://www.w3.org/ns/csvw", base: RDF::URI("http://example.org/base"), logger: logger)} specify {is_expected.to be_valid} it_behaves_like("inherited properties") it_behaves_like("common properties") its(:type) {is_expected.to eql :TableGroup} { tableSchema: { valid: [RDF::Tabular::Schema.new({})], warning: [1, true, nil] }, tableDirection: { valid: %w(rtl ltr auto), warning: %w(foo true 1) }, dialect: { valid: [{skipRows: 1}], warning: [1] }, transformations: { valid: [[RDF::Tabular::Transformation.new({url: "http://example", targetFormat: "http://example", scriptFormat: "http://example/"}, context: "http://www.w3.org/ns/csvw", base: RDF::URI("http://example.org/base"))]], warning: [RDF::Tabular::Transformation.new({url: "http://example", targetFormat: "http://example", scriptFormat: "http://example/"}, context: "http://www.w3.org/ns/csvw", base: RDF::URI("http://example.org/base"))] + %w(foo true 1) }, notes: { valid: [{}, [{}]], invalid: [1, true, nil] }, }.each do |prop, params| context prop.to_s do it "validates" do params[:valid].each do |v| subject.send("#{prop}=".to_sym, v) expect(subject).to be_valid expect(logger.to_s).not_to match(/ERROR|WARN/) end end it "invalidates" do params[:invalid].each do |v| subject.send("#{prop}=".to_sym, v) expect(subject).not_to be_valid expect(logger.to_s).to include("ERROR") end end if params[:invalid] it "warnings" do params[:warning].each do |v| subject.send("#{prop}=".to_sym, v) expect(subject).to be_valid expect(logger.to_s).to include("WARN") end end if params[:warning] end end end context "parses example metadata" do Dir.glob(File.expand_path("../data/*.json", __FILE__)).each do |filename| next if filename =~ /-(atd|standard|minimal|roles).json/ context filename do subject {RDF::Tabular::Metadata.open(filename, logger: logger)} it {is_expected.to be_valid} its(:filenames) {is_expected.to include("file:#{filename}")} end after(:each) do expect(logger.to_s).not_to match(/ERROR|WARN/) end end end context "parses invalid metadata" do Dir.glob(File.expand_path("../invalid_data/*.json", __FILE__)).each do |filename| context filename do subject {RDF::Tabular::Metadata.open(filename, logger: logger)} File.foreach(filename.sub(".json", "-errors.txt")) do |err| it {is_expected.not_to be_valid} end after(:each) do expect(logger.to_s).not_to include("ERROR") end end end end context "object properties" do let(:table) {{"url" => "http://example.org/table.csv", "@type" => "Table"}} it "loads referenced schema" do table[:tableSchema] = "http://example.org/schema" expect(described_class).to receive(:open).with(table[:tableSchema], kind_of(Hash)).and_return(RDF::Tabular::Schema.new({"@type" => "Schema"}, base: RDF::URI("http://example.org/base"))) allow_any_instance_of(described_class).to receive(:normalize!).and_return(true) described_class.new(table, base: RDF::URI("http://example.org/base"), logger: logger) end it "loads referenced dialect" do table[:dialect] = "http://example.org/dialect" expect(described_class).to receive(:open).with(table[:dialect], kind_of(Hash)).and_return(RDF::Tabular::Dialect.new({})) allow_any_instance_of(described_class).to receive(:normalize!).and_return(true) described_class.new(table, base: RDF::URI("http://example.org/base"), logger: logger) end end context "inherited properties" do let(:table) {{"url" => "http://example.org/table.csv", "tableSchema" => {"@type" => "Schema"}, "@type" => "Table"}} subject {described_class.new(table, base: RDF::URI("http://example.org/base"), logger: logger)} it "inherits properties from parent" do subject.lang = "en" expect(subject.tableSchema.lang).to eql "en" end it "overrides properties in parent" do subject.lang = "en" subject.tableSchema.lang = "de" expect(subject.tableSchema.lang).to eql "de" end end describe ".open" do context "validates example metadata" do Dir.glob(File.expand_path("../data/*.json", __FILE__)).each do |filename| next if filename =~ /-(atd|standard|minimal|roles).json/ context filename do subject {RDF::Tabular::Metadata.open(filename, logger: logger)} it {is_expected.to be_valid} it do subject.validate expect(logger.to_s).to be_empty end its(:filenames) {is_expected.to include("file:#{filename}")} end end end end describe ".from_input" do it "FIXME" end describe ".new" do context "intuits subclass" do { ":type TableGroup" => [{}, {type: :TableGroup}, RDF::Tabular::TableGroup], ":type Table" => [{}, {type: :Table}, RDF::Tabular::Table], ":type Template" => [{}, {type: :Template}, RDF::Tabular::Transformation], ":type Schema" => [{}, {type: :Schema}, RDF::Tabular::Schema], ":type Column" => [{}, {type: :Column}, RDF::Tabular::Column], ":type Dialect" => [{}, {type: :Dialect}, RDF::Tabular::Dialect], "tables TableGroup" => [{"tables" => []}, RDF::Tabular::TableGroup], "dialect Table" => [{"dialect" => {}}, RDF::Tabular::Table], "tableSchema Table" => [{"tableSchema" => {}}, RDF::Tabular::Table], "transformations Table" => [{"transformations" => []}, RDF::Tabular::Table], "targetFormat Transformation" => [{"targetFormat" => "http://foo"}, RDF::Tabular::Transformation], "scriptFormat Transformation" => [{"scriptFormat" => "http://foo"}, RDF::Tabular::Transformation], "source Transformation" => [{"source" => "json"}, RDF::Tabular::Transformation], "columns Schema" => [{"columns" => []}, RDF::Tabular::Schema], "primaryKey Schema" => [{"primaryKey" => "foo"}, RDF::Tabular::Schema], "foreignKeys Schema" => [{"foreignKeys" => []}, RDF::Tabular::Schema], "commentPrefix Dialect" => [{"commentPrefix" => "#"}, RDF::Tabular::Dialect], "delimiter Dialect" => [{"delimiter" => ","}, RDF::Tabular::Dialect], "doubleQuote Dialect" => [{"doubleQuote" => true}, RDF::Tabular::Dialect], "encoding Dialect" => [{"encoding" => "utf-8"}, RDF::Tabular::Dialect], "header Dialect" => [{"header" => true}, RDF::Tabular::Dialect], "headerRowCount Dialect" => [{"headerRowCount" => 1}, RDF::Tabular::Dialect], "lineTerminators Dialect" => [{"lineTerminators" => "\r\n"}, RDF::Tabular::Dialect], "quoteChar Dialect" => [{"quoteChar" => "\""}, RDF::Tabular::Dialect], "skipBlankRows Dialect" => [{"skipBlankRows" => true}, RDF::Tabular::Dialect], "skipColumns Dialect" => [{"skipColumns" => 0}, RDF::Tabular::Dialect], "skipInitialSpace Dialect" => [{"skipInitialSpace" => true}, RDF::Tabular::Dialect], "skipRows Dialect" => [{"skipRows" => 1}, RDF::Tabular::Dialect], "trim Dialect" => [{"trim" => true}, RDF::Tabular::Dialect], }.each do |name, args| it name do klass = args.pop input, options = args options ||= {} options[:logger] = logger options[:context] ||= 'http://www.w3.org/ns/csvw' expect(described_class.new(input, **options)).to be_a(klass) expect(logger.to_s).not_to match(/ERROR|WARN/) end end end end describe "#each_row" do subject { described_class.new(JSON.parse(%({ "url": "https://example.org/countries.csv", "@type": "Table", "tableSchema": { "@type": "Schema", "columns": [{ "name": "countryCode", "titles": "countryCode", "propertyUrl": "https://example.org/countries.csv#countryCode" }, { "name": "latitude", "titles": "latitude", "propertyUrl": "https://example.org/countries.csv#latitude" }, { "name": "longitude", "titles": "longitude", "propertyUrl": "https://example.org/countries.csv#longitude" }, { "name": "name", "titles": "name", "propertyUrl": "https://example.org/countries.csv#name" }] } })), base: RDF::URI("http://example.org/base"), logger: logger) } let(:input) {RDF::Util::File.open_file("https://example.org/countries.csv")} specify {expect {|b| subject.each_row(input, &b)}.to yield_control.exactly(3)} it "returns consecutive row numbers" do nums = subject.to_enum(:each_row, input).map(&:number) expect(nums).to eql([1, 2, 3]) end it "returns cells" do subject.each_row(input) do |row| expect(row).to be_a(RDF::Tabular::Row) expect(row.values.length).to eql 4 expect(row.values.map(&:class).compact).to include(RDF::Tabular::Row::Cell) end end it "has nil aboutUrls" do subject.each_row(input) do |row| expect(row.values[0].aboutUrl).to be_nil expect(row.values[1].aboutUrl).to be_nil expect(row.values[2].aboutUrl).to be_nil expect(row.values[3].aboutUrl).to be_nil end end it "has expected propertyUrls" do subject.each_row(input) do |row| expect(row.values[0].propertyUrl).to eq "https://example.org/countries.csv#countryCode" expect(row.values[1].propertyUrl).to eq "https://example.org/countries.csv#latitude" expect(row.values[2].propertyUrl).to eq "https://example.org/countries.csv#longitude" expect(row.values[3].propertyUrl).to eq "https://example.org/countries.csv#name" end end it "has expected valueUrls" do subject.each_row(input) do |row| expect(row.values[0].valueUrl).to be_nil expect(row.values[1].valueUrl).to be_nil expect(row.values[2].valueUrl).to be_nil expect(row.values[3].valueUrl).to be_nil end end it "has expected values" do rows = subject.to_enum(:each_row, input).to_a expect(rows[0].values.map(&:to_s)).to produce(%w(AD 42.546245 1.601554 Andorra), logger) expect(rows[1].values.map(&:to_s)).to produce((%w(AE 23.424076 53.847818) << "United Arab Emirates"), logger) expect(rows[2].values.map(&:to_s)).to produce(%w(AF 33.93911 67.709953 Afghanistan), logger) end context "URL expansion" do subject { JSON.parse(%({ "url": "https://example.org/countries.csv", "tableSchema": { "columns": [ {"titles": "addressCountry"}, {"titles": "latitude"}, {"titles": "longitude"}, {"titles": "name"} ] } })) } let(:input) {RDF::Util::File.open_file("https://example.org/countries.csv")} { "default titles" => { aboutUrl: [RDF::Node, RDF::Node, RDF::Node, RDF::Node], propertyUrl: [nil, nil, nil, nil], valueUrl: [nil, nil, nil, nil], md: {} }, "schema transformations" => { aboutUrl: %w(#addressCountry #latitude #longitude #name), propertyUrl: %w(?_name=addressCountry ?_name=latitude ?_name=longitude ?_name=name), valueUrl: %w(addressCountry latitude longitude name), md: { "aboutUrl" => "{#_name}", "propertyUrl" => '{?_name}', "valueUrl" => '{_name}' } }, "PNames" => { aboutUrl: [RDF::Vocab::SCHEMA.addressCountry, RDF::Vocab::SCHEMA.latitude, RDF::Vocab::SCHEMA.longitude, RDF::Vocab::SCHEMA.name], propertyUrl: [RDF::Vocab::SCHEMA.addressCountry, RDF::Vocab::SCHEMA.latitude, RDF::Vocab::SCHEMA.longitude, RDF::Vocab::SCHEMA.name], valueUrl: [RDF::Vocab::SCHEMA.addressCountry, RDF::Vocab::SCHEMA.latitude, RDF::Vocab::SCHEMA.longitude, RDF::Vocab::SCHEMA.name], md: { "aboutUrl" => "http://schema.org/{_name}", "propertyUrl" => 'schema:{_name}', "valueUrl" => 'schema:{_name}' } }, }.each do |name, props| context name do let(:md) {RDF::Tabular::Table.new(subject.merge(props[:md]), base: RDF::URI("http://example.org/base"), logger: logger).normalize!} let(:cells) {md.to_enum(:each_row, input).to_a.first.values} let(:aboutUrls) {props[:aboutUrl].map {|u| u.is_a?(String) ? md.url.join(u) : u}} let(:propertyUrls) {props[:propertyUrl].map {|u| u.is_a?(String) ? md.url.join(u) : u}} let(:valueUrls) {props[:valueUrl].map {|u| u.is_a?(String) ? md.url.join(u) : u}} it "aboutUrl is #{props[:aboutUrl]}" do if aboutUrls.first == RDF::Node expect(cells.map(&:aboutUrl)).to all(be_nil) else expect(cells.map(&:aboutUrl)).to include(*aboutUrls) end end it "propertyUrl is #{props[:propertyUrl]}" do expect(cells.map(&:propertyUrl)).to include(*propertyUrls) end it "valueUrl is #{props[:valueUrl]}" do expect(cells.map(&:valueUrl)).to include(*valueUrls) end end end end it "expands aboutUrl in cells" context "variations" do { "skipRows" => {dialect: {skipRows: 1}}, "headerRowCount" => {dialect: {headerRowCount: 0}}, "skipRows + headerRowCount" => {dialect: {skipRows: 1, headerRowCount: 0}}, "skipColumns" => {dialect: {skipColumns: 1}}, }.each do |name, props| context name do subject { raw = JSON.parse(%({ "url": "https://example.org/countries.csv", "@type": "Table", "tableSchema": { "@type": "Schema", "columns": [{ "name": "countryCode", "titles": "countryCode", "propertyUrl": "https://example.org/countries.csv#countryCode" }, { "name": "latitude", "titles": "latitude", "propertyUrl": "https://example.org/countries.csv#latitude" }, { "name": "longitude", "titles": "longitude", "propertyUrl": "https://example.org/countries.csv#longitude" }, { "name": "name", "titles": "name", "propertyUrl": "https://example.org/countries.csv#name" }] } })) raw["dialect"] = props[:dialect] described_class.new(raw, base: RDF::URI("http://example.org/base"), logger: logger) } let(:rows) {subject.to_enum(:each_row, input).to_a} let(:rowOffset) {props[:dialect].fetch(:skipRows, 0) + props[:dialect].fetch(:headerRowCount, 1)} let(:columnOffset) {props[:dialect].fetch(:skipColumns, 0)} it "has expected number attributes" do nums = [1, 2, 3, 4] nums = nums.first(nums.length - rowOffset) expect(rows.map(&:number)).to eql nums end it "has expected sourceNumber attributes" do nums = [1, 2, 3, 4].map {|n| n + rowOffset} nums = nums.first(nums.length - rowOffset) expect(rows.map(&:sourceNumber)).to eql nums end it "has expected column.number attributes" do nums = [1, 2, 3, 4] nums = nums.first(nums.length - columnOffset) expect(rows.first.values.map {|c| c.column.number}).to eql nums end it "has expected column.sourceNumber attributes" do nums = [1, 2, 3, 4].map {|n| n + columnOffset} nums = nums.first(nums.length - columnOffset) expect(rows.first.values.map {|c| c.column.sourceNumber}).to eql nums end end end context "virtual columns" do subject { described_class.new(JSON.parse(%({ "@context": "http://www.w3.org/ns/csvw", "url": "https://example.org/countries.csv", "aboutUrl": "https://example.org/countries", "@type": "Table", "tableSchema": { "@type": "Schema", "columns": [{ "name": "countryCode", "titles": "countryCode", "propertyUrl": "https://example.org/countries.csv#countryCode" }, { "name": "latitude", "titles": "latitude", "propertyUrl": "https://example.org/countries.csv#latitude" }, { "name": "longitude", "titles": "longitude", "propertyUrl": "https://example.org/countries.csv#longitude" }, { "name": "name", "titles": "name", "propertyUrl": "https://example.org/countries.csv#name" }, { "virtual": true, "propertyUrl": "https://example.org/countries.csv#virt1", "valueUrl": "https://example.org/countries.csv#virt1" }, { "virtual": true, "propertyUrl": "https://example.org/countries.csv#virt2", "default": "default", "datatype": "string" }] } })), base: RDF::URI("http://example.org/base"), logger: logger) } let(:input) {RDF::Util::File.open_file("https://example.org/countries.csv")} let(:rows) {subject.to_enum(:each_row, input).to_a} it "has expected aboutUrls" do subject.each_row(input) do |row| expect(row.values[0].aboutUrl).to eq "https://example.org/countries" expect(row.values[1].aboutUrl).to eq "https://example.org/countries" expect(row.values[2].aboutUrl).to eq "https://example.org/countries" expect(row.values[3].aboutUrl).to eq "https://example.org/countries" expect(row.values[4].aboutUrl).to eq "https://example.org/countries" expect(row.values[5].aboutUrl).to eq "https://example.org/countries" end end it "has expected propertyUrls" do subject.each_row(input) do |row| expect(row.values[0].propertyUrl).to eq "https://example.org/countries.csv#countryCode" expect(row.values[1].propertyUrl).to eq "https://example.org/countries.csv#latitude" expect(row.values[2].propertyUrl).to eq "https://example.org/countries.csv#longitude" expect(row.values[3].propertyUrl).to eq "https://example.org/countries.csv#name" expect(row.values[4].propertyUrl).to eq "https://example.org/countries.csv#virt1" expect(row.values[5].propertyUrl).to eq "https://example.org/countries.csv#virt2" end end it "has expected valueUrls" do subject.each_row(input) do |row| expect(row.values[0].valueUrl).to be_nil expect(row.values[1].valueUrl).to be_nil expect(row.values[2].valueUrl).to be_nil expect(row.values[3].valueUrl).to be_nil expect(row.values[4].valueUrl).to eq "https://example.org/countries.csv#virt1" expect(row.values[5].valueUrl).to be_nil end end it "has expected values" do rows = subject.to_enum(:each_row, input).to_a expect(rows[0].values.map(&:to_s)).to produce(%w(AD 42.546245 1.601554 Andorra).push("", "default"), logger) expect(rows[1].values.map(&:to_s)).to produce((%w(AE 23.424076 53.847818).push("United Arab Emirates", "", "default")), logger) expect(rows[2].values.map(&:to_s)).to produce(%w(AF 33.93911 67.709953 Afghanistan).push("", "default"), logger) end end end context "datatypes" do { # Strings "string with no constraints" => {base: "string", value: "foo", result: "foo"}, "string with matching length" => {base: "string", value: "foo", length: 3, result: "foo"}, "string matching null when required" => {base: "string", value: "NULL", null: "NULL", required: true}, "string with wrong length" => { base: "string", value: "foo", length: 4, errors: ["foo does not have length 4"] }, "string with wrong maxLength" => { base: "string", value: "foo", maxLength: 2, errors: ["foo does not have length <= 2"] }, "string with wrong minLength" => { base: "string", value: "foo", minLength: 4, errors: ["foo does not have length >= 4"] }, # Numbers "decimal with no constraints" => { base: "decimal", value: "4" }, "decimal with matching pattern" => { base: "decimal", format: {"pattern" => '000'}, value: "123" }, "decimal with explicit groupChar" => { base: "decimal", format: {"groupChar" => ";"}, value: "123;456.789", result: "123456.789" }, "decimal with repeated groupChar" => { base: "decimal", format: {"groupChar" => ";"}, value: "123;;456.789", result: "123;;456.789", errors: [/does not match numeric pattern/] }, "decimal with explicit decimalChar" => { base: "decimal", format: {"decimalChar" => ";"}, value: "123456;789", result: "123456.789" }, "invalid decimal" => { base: "decimal", value: "123456.789e10", result: "123456.789e10", errors: ["123456.789e10 is not a valid decimal"] }, "decimal with percent" => { base: "decimal", format: {"groupChar" => ","}, value: "123456.789%", result: "1234.56789" }, "decimal with per-mille" => { base: "decimal", format: {"groupChar" => ","}, value: "123456.789‰", result: "123.456789" }, "valid integer" => {base: "integer", value: "1234"}, "invalid integer" => {base: "integer", value: "1234.56", errors: ["1234.56 is not a valid integer"]}, "valid long" => {base: "long", value: "1234"}, "invalid long" => {base: "long", value: "1234.56", errors: ["1234.56 is not a valid long"]}, "valid short" => {base: "short", value: "1234"}, "invalid short" => {base: "short", value: "1234.56", errors: ["1234.56 is not a valid short"]}, "valid byte" => {base: "byte", value: "123"}, "invalid byte" => {base: "byte", value: "1234", errors: ["1234 is not a valid byte"]}, "valid unsignedLong" => {base: "unsignedLong", value: "1234"}, "invalid unsignedLong" => {base: "unsignedLong", value: "-1234", errors: ["-1234 is not a valid unsignedLong"]}, "valid unsignedShort" => {base: "unsignedShort", value: "1234"}, "invalid unsignedShort" => {base: "unsignedShort", value: "-1234", errors: ["-1234 is not a valid unsignedShort"]}, "valid unsignedByte" => {base: "unsignedByte", value: "123"}, "invalid unsignedByte" => {base: "unsignedByte", value: "-123", errors: ["-123 is not a valid unsignedByte"]}, "valid positiveInteger" => {base: "positiveInteger", value: "123"}, "invalid positiveInteger" => {base: "positiveInteger", value: "-123", errors: ["-123 is not a valid positiveInteger"]}, "valid negativeInteger" => {base: "negativeInteger", value: "-123"}, "invalid negativeInteger" => {base: "negativeInteger", value: "123", errors: ["123 is not a valid negativeInteger"]}, "valid nonPositiveInteger" => {base: "nonPositiveInteger", value: "0"}, "invalid nonPositiveInteger" => {base: "nonPositiveInteger", value: "1", errors: ["1 is not a valid nonPositiveInteger"]}, "valid nonNegativeInteger" => {base: "nonNegativeInteger", value: "0"}, "invalid nonNegativeInteger" => {base: "nonNegativeInteger", value: "-1", errors: ["-1 is not a valid nonNegativeInteger"]}, "valid double" => {base: "double", value: "1234.456e789"}, "invalid double" => {base: "double", value: "1z", errors: ["1z is not a valid double"]}, "NaN double" => {base: "double", value: "NaN", result: "NaN"}, "INF double" => {base: "double", value: "INF"}, "-INF double" => {base: "double", value: "-INF"}, "valid number" => {base: "number", value: "1234.456e789"}, "invalid number" => {base: "number", value: "1z", errors: ["1z is not a valid number"]}, "NaN number" => {base: "number", value: "NaN", result: "NaN"}, "INF number" => {base: "number", value: "INF"}, "-INF number" => {base: "number", value: "-INF"}, "valid float" => {base: "float", value: "1234.456e7"}, "invalid float" => {base: "float", value: "1z", errors: ["1z is not a valid float"]}, "NaN float" => {base: "float", value: "NaN", result: "NaN"}, "INF float" => {base: "float", value: "INF"}, "-INF float" => {base: "float", value: "-INF"}, # Booleans "valid boolean true" => {base: "boolean", value: "true"}, "valid boolean false" => {base: "boolean", value: "false"}, "valid boolean 1" => {base: "boolean", value: "1", result: "true"}, "valid boolean 0" => {base: "boolean", value: "0", result: "false"}, "valid boolean Y|N Y" => {base: "boolean", value: "Y", format: "Y|N", result: "true"}, "valid boolean Y|N N" => {base: "boolean", value: "N", format: "Y|N", result: "false"}, # Dates "valid date yyyy-MM-dd" => {base: "date", value: "2015-03-22", format: "yyyy-MM-dd", result: "2015-03-22"}, "valid date yyyyMMdd" => {base: "date", value: "20150322", format: "yyyyMMdd", result: "2015-03-22"}, "valid date dd-MM-yyyy" => {base: "date", value: "22-03-2015", format: "dd-MM-yyyy", result: "2015-03-22"}, "valid date d-M-yyyy" => {base: "date", value: "22-3-2015", format: "d-M-yyyy", result: "2015-03-22"}, "valid date MM-dd-yyyy" => {base: "date", value: "03-22-2015", format: "MM-dd-yyyy", result: "2015-03-22"}, "valid date M-d-yyyy" => {base: "date", value: "3-22-2015", format: "M-d-yyyy", result: "2015-03-22"}, "valid date dd/MM/yyyy" => {base: "date", value: "22/03/2015", format: "dd/MM/yyyy", result: "2015-03-22"}, "valid date d/M/yyyy" => {base: "date", value: "22/3/2015", format: "d/M/yyyy", result: "2015-03-22"}, "valid date MM/dd/yyyy" => {base: "date", value: "03/22/2015", format: "MM/dd/yyyy", result: "2015-03-22"}, "valid date M/d/yyyy" => {base: "date", value: "3/22/2015", format: "M/d/yyyy", result: "2015-03-22"}, "valid date dd.MM.yyyy" => {base: "date", value: "22.03.2015", format: "dd.MM.yyyy", result: "2015-03-22"}, "valid date d.M.yyyy" => {base: "date", value: "22.3.2015", format: "d.M.yyyy", result: "2015-03-22"}, "valid date MM.dd.yyyy" => {base: "date", value: "03.22.2015", format: "MM.dd.yyyy", result: "2015-03-22"}, "valid date M.d.yyyy" => {base: "date", value: "3.22.2015", format: "M.d.yyyy", result: "2015-03-22"}, # Times "valid time HH:mm:ss.S" => {base: "time", value: "15:02:37.1", format: "HH:mm:ss.S", result: "15:02:37.1"}, "valid time HH:mm:ss" => {base: "time", value: "15:02:37", format: "HH:mm:ss", result: "15:02:37"}, "valid time HHmmss" => {base: "time", value: "150237", format: "HHmmss", result: "15:02:37"}, "valid time HH:mm" => {base: "time", value: "15:02", format: "HH:mm", result: "15:02:00"}, "valid time HHmm" => {base: "time", value: "1502", format: "HHmm", result: "15:02:00"}, # DateTimes "valid dateTime yyyy-MM-ddTHH:mm:ss" => {base: "dateTime", value: "2015-03-15T15:02:37", format: "yyyy-MM-ddTHH:mm:ss", result: "2015-03-15T15:02:37"}, "valid dateTime yyyy-MM-ddTHH:mm:ss.S" => {base: "dateTime", value: "2015-03-15T15:02:37.1", format: "yyyy-MM-ddTHH:mm:ss.S", result: "2015-03-15T15:02:37.1"}, "valid dateTime yyyy-MM-dd HH:mm:ss" => {base: "dateTime", value: "2015-03-15 15:02:37", format: "yyyy-MM-dd HH:mm:ss", result: "2015-03-15T15:02:37"}, "valid dateTime yyyyMMdd HHmmss" => {base: "dateTime", value: "20150315 150237", format: "yyyyMMdd HHmmss", result: "2015-03-15T15:02:37"}, "valid dateTime dd-MM-yyyy HH:mm" => {base: "dateTime", value: "15-03-2015 15:02", format: "dd-MM-yyyy HH:mm", result: "2015-03-15T15:02:00"}, "valid dateTime d-M-yyyy HHmm" => {base: "dateTime", value: "15-3-2015 1502", format: "d-M-yyyy HHmm", result: "2015-03-15T15:02:00"}, "valid dateTime yyyy-MM-ddTHH:mm" => {base: "dateTime", value: "2015-03-15T15:02", format: "yyyy-MM-ddTHH:mm", result: "2015-03-15T15:02:00"}, "valid dateTimeStamp d-M-yyyy HHmm X" => {base: "dateTimeStamp", value: "15-3-2015 1502 Z", format: "d-M-yyyy HHmm X", result: "2015-03-15T15:02:00Z"}, "valid datetime yyyy-MM-ddTHH:mm:ss" => {base: "datetime", value: "2015-03-15T15:02:37", format: "yyyy-MM-ddTHH:mm:ss", result: "2015-03-15T15:02:37"}, "valid datetime yyyy-MM-dd HH:mm:ss" => {base: "datetime", value: "2015-03-15 15:02:37", format: "yyyy-MM-dd HH:mm:ss", result: "2015-03-15T15:02:37"}, "valid datetime yyyyMMdd HHmmss" => {base: "datetime", value: "20150315 150237", format: "yyyyMMdd HHmmss", result: "2015-03-15T15:02:37"}, "valid datetime dd-MM-yyyy HH:mm" => {base: "datetime", value: "15-03-2015 15:02", format: "dd-MM-yyyy HH:mm", result: "2015-03-15T15:02:00"}, "valid datetime d-M-yyyy HHmm" => {base: "datetime", value: "15-3-2015 1502", format: "d-M-yyyy HHmm", result: "2015-03-15T15:02:00"}, "valid datetime yyyy-MM-ddTHH:mm" => {base: "datetime", value: "2015-03-15T15:02", format: "yyyy-MM-ddTHH:mm", result: "2015-03-15T15:02:00"}, # Timezones "valid w/TZ yyyy-MM-ddX" => {base: "date", value: "2015-03-22Z", format: "yyyy-MM-ddX", result: "2015-03-22Z"}, "valid w/TZ HH:mm:ssX" => {base: "time", value: "15:02:37-05", format: "HH:mm:ssX", result: "15:02:37-05:00"}, "valid w/TZ yyyy-MM-dd HH:mm:ss X" => {base: "dateTimeStamp", value: "2015-03-15 15:02:37 +0800", format: "yyyy-MM-dd HH:mm:ss X", result: "2015-03-15T15:02:37+08:00"}, "valid w/TZ HHmm XX" => {base: "time", value: "1502 +0800", format: "HHmm XX", result: "15:02:00+08:00"}, "valid w/TZ yyyy-MM-dd HH:mm:ss XX" => {base: "dateTimeStamp", value: "2015-03-15 15:02:37 -0800", format: "yyyy-MM-dd HH:mm:ss XX", result: "2015-03-15T15:02:37-08:00"}, "valid w/TZ HHmm XXX" => {base: "time", value: "1502 +08:00", format: "HHmm XXX", result: "15:02:00+08:00"}, "valid w/TZ yyyy-MM-ddTHH:mm:ssXXX" => {base: "dateTime", value: "2015-03-15T15:02:37-05:00", format: "yyyy-MM-ddTHH:mm:ssXXX", result: "2015-03-15T15:02:37-05:00"}, "invalid w/TZ HH:mm:ssX" => {base: "time", value: "15:02:37-05:00", format: "HH:mm:ssX", errors: ["15:02:37-05:00 does not match format HH:mm:ssX"]}, "invalid w/TZ HH:mm:ssXX" => {base: "time", value: "15:02:37-05", format: "HH:mm:ssXX", errors: ["15:02:37-05 does not match format HH:mm:ssXX"]}, # Other date-like things "valid gDay" => {base: "gDay", value: "---31"}, "valid gMonth" => {base: "gMonth", value: "--02"}, "valid gMonthDay" => {base: "gMonthDay", value: "--02-21"}, "valid gYear" => {base: "gYear", value: "9999"}, "valid gYearMonth" => {base: "gYearMonth", value: "1999-05"}, # Durations "valid duration PT130S" => {base: "duration", value: "PT130S"}, "valid duration PT130M" => {base: "duration", value: "PT130M"}, "valid duration PT130H" => {base: "duration", value: "PT130H"}, "valid duration P130D" => {base: "duration", value: "P130D"}, "valid duration P130M" => {base: "duration", value: "P130M"}, "valid duration P130Y" => {base: "duration", value: "P130Y"}, "valid duration PT2M10S" => {base: "duration", value: "PT2M10S"}, "valid duration P0Y20M0D" => {base: "duration", value: "P0Y20M0D"}, "valid duration -P60D" => {base: "duration", value: "-P60D"}, "valid dayTimeDuration P1DT2H" => {base: "dayTimeDuration", value: "P1DT2H"}, "valid yearMonthDuration P0Y20M" => {base: "yearMonthDuration", value: "P0Y20M"}, # Other datatypes "valid anyAtomicType" => {base: "anyAtomicType", value: "some thing", result: RDF::Literal("some thing", datatype: RDF::XSD.anyAtomicType)}, "valid anyURI" => {base: "anyURI", value: "http://example.com/", result: RDF::Literal("http://example.com/", datatype: RDF::XSD.anyURI)}, "valid base64Binary" => {base: "base64Binary", value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", result: RDF::Literal("Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", datatype: RDF::XSD.base64Binary)}, "base64Binary with matching length:" => { base: "base64Binary", value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", length: 45, result: RDF::Literal("Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", datatype: RDF::XSD.base64Binary) }, "base64Binary with wrong maxLength:" => { base: "base64Binary", value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", maxLength: 1, errors: ["decoded Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g has length 45 not <= 1"] }, "base64Binary with wrong minLength" => { base: "base64Binary", value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", minLength: 50, errors: ["decoded Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g has length 45 not >= 50"] }, "valid hexBinary" => {base: "hexBinary", value: "0FB7", result: RDF::Literal("0FB7", datatype: RDF::XSD.hexBinary)}, "hexBinary with matching length:" => { base: "hexBinary", value: "0FB7", length: 2, result: RDF::Literal("0FB7", datatype: RDF::XSD.hexBinary) }, "hexBinary with wrong maxLength:" => { base: "hexBinary", value: "0FB7", maxLength: 1, errors: ["decoded 0FB7 has length 2 not <= 1"] }, "hexBinary with wrong minLength" => { base: "hexBinary", value: "0FB7", minLength: 4, errors: ["decoded 0FB7 has length 2 not >= 4"] }, "valid QName" => {base: "QName", value: "foo:bar", result: RDF::Literal("foo:bar", datatype: RDF::XSD.QName)}, "valid normalizedString" => {base: "normalizedString", value: "some thing", result: RDF::Literal("some thing", datatype: RDF::XSD.normalizedString)}, "valid token" => {base: "token", value: "some thing", result: RDF::Literal("some thing", datatype: RDF::XSD.token)}, "valid language" => {base: "language", value: "en", result: RDF::Literal("en", datatype: RDF::XSD.language)}, "valid Name" => {base: "Name", value: "someThing", result: RDF::Literal("someThing", datatype: RDF::XSD.Name)}, "valid NMTOKEN" => {base: "NMTOKEN", value: "someThing", result: RDF::Literal("someThing", datatype: RDF::XSD.NMTOKEN)}, # Aliases "number is alias for double" => {base: "number", value: "1234.456e789", result: RDF::Literal("1234.456e789", datatype: RDF::XSD.double)}, "binary is alias for base64Binary" => {base: "binary", value: "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", result: RDF::Literal("Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g", datatype: RDF::XSD.base64Binary)}, "datetime is alias for dateTime" => {base: "dateTime", value: "15-3-2015 1502", format: "d-M-yyyy HHmm", result: RDF::Literal("2015-03-15T15:02:00", datatype: RDF::XSD.dateTime)}, "any is alias for anyAtomicType" => {base: "any", value: "some thing", result: RDF::Literal("some thing", datatype: RDF::XSD.anyAtomicType)}, "xml is alias for rdf:XMLLiteral" => {base: "xml", value: "", result: RDF::Literal("", datatype: RDF.XMLLiteral)}, "html is alias for rdf:HTML" => {base: "html", value: "", result: RDF::Literal("", datatype: RDF.HTML)}, #"json is alias for csvw:JSON" => {base: "json", value: %({""foo"": ""bar""}), result: RDF::Literal(%({"foo": "bar"}), datatype: RDF::Tabular::CSVW.json)}, }.each do |name, props| context name do let(:value) {props[:value]} let(:result) { if props[:errors] RDF::Literal(props.fetch(:result, value)) else RDF::Literal(props.fetch(:result, value), datatype: md.context.expand_iri(props[:base], vocab: true)) end } let(:md) { RDF::Tabular::Table.new({ url: "http://example.com/table.csv", dialect: {header: false}, tableSchema: { columns: [{ name: "name", datatype: props.dup.delete_if {|k, v| [:value, :valid, :result].include?(k)} }] } }, logger: logger) } subject {md.to_enum(:each_row, "#{value}\n").to_a.first.values.first} if props[:errors] it {is_expected.not_to be_valid} it "has expected errors" do props[:errors].each do |e| expect(subject.errors.to_s).to match(e) end end else it {is_expected.to be_valid} it "has no errors" do expect(subject.errors).to be_empty end end specify {expect(subject.value).to eql result} end end context "Unsupported datatypes" do %w(anyType anySimpleType ENTITIES IDREFS NMTOKENS ENTITY ID IDREF NOTATAION foo).each do |base| it "detects #{base} as unsupported" do RDF::Tabular::Table.new({ url: "http://example.com/table.csv", tableSchema: { columns: [{ name: "name", datatype: base }] } }, logger: logger) expect(subject).to be_valid expect(logger.to_s).to include("WARN") end end end end end context "Number formats" do { '0' => {valid: %w(1 -1 +1 12), invalid: %w(1.2), base: "integer", re: /^(?[+-]?)(?\d{1,})(?)$/}, '00' => {valid: %w(12 123), invalid: %w(1 1,2), base: "integer", re: /^(?[+-]?)(?\d{2,})(?)$/}, '#' => {valid: %w(1 12 123), invalid: %w(1.2), base: "integer", re: /^(?[+-]?)(?\d{0,})(?)$/}, '##' => {re: /^(?[+-]?)(?\d{0,})(?)$/}, '#0' => {re: /^(?[+-]?)(?\d{1,})(?)$/}, '0.0' => {valid: %w(1.1 -1.1 12.1), invalid: %w(1.12), base: "decimal", re: /^(?[+-]?)(?\d{1,}\.\d{1})(?)$/}, '0.00' => {valid: %w(1.12 +1.12 12.12), invalid: %w(1.1 1.123), base: "decimal", re: /^(?[+-]?)(?\d{1,}\.\d{2})(?)$/}, '0.#' => {valid: %w(1 1.1 12.1), invalid: %w(1.12), base: "decimal", re: /^(?[+-]?)(?\d{1,}(?:\.\d{0,1})?)(?)$/}, '-0' => {valid: %w(-1 -10), invalid: %w(1 +1), base: "decimal", re: /^(?\-)(?\d{1,})(?)$/}, '%000' => {valid: %w(%123 %+123 %-123 %1234), invalid: %w(%12 123%), base: "decimal", re: /^(?%[+-]?)(?\d{3,})(?)$/}, '‰000' => {valid: %w(‰123 ‰+123 ‰-123 ‰1234), invalid: %w(‰12 123‰), base: "decimal", re: /^(?‰[+-]?)(?\d{3,})(?)$/}, '000%' => {valid: %w(123% +123% -123% 1234%), invalid: %w(12% %123), base: "decimal", re: /^(?[+-]?)(?\d{3,})(?%)$/}, '000‰' => {valid: %w(123‰ +123‰ -123‰ 1234‰), invalid: %w(12‰ ‰123), base: "decimal", re: /^(?[+-]?)(?\d{3,})(?‰)$/}, '000.0%' => {base: "decimal", re: /^(?[+-]?)(?\d{3,}\.\d{1})(?%)$/}, '###0.#####' => {valid: %w(1 1.1 12345.12345), invalid: %w(1,234.1 1.123456), base: "decimal", re: /^(?[+-]?)(?\d{1,}(?:\.\d{0,5})?)(?)$/}, '###0.0000#' => {valid: %w(1.1234 1.12345 12345.12345), invalid: %w(1,234.1234 1.12), base: "decimal", re: /^(?[+-]?)(?\d{1,}\.\d{4,5})(?)$/}, '00000.0000' => {valid: %w(12345.1234), invalid: %w(1.2 1,234.123,4), base: "decimal", re: /^(?[+-]?)(?\d{5,}\.\d{4})(?)$/}, '#0.0#E#0' => {base: "double", re: /^(?[+-]?)(?\d{1,}\.\d{1,2}E[+-]?\d{1,2})(?)$/}, '#0.0#E+#0' => {base: "double", re: /^(?[+-]?)(?\d{1,}\.\d{1,2}E\+\d{1,2})(?)$/}, '#0.0#E#0%' => {base: "double", re: /^(?[+-]?)(?\d{1,}\.\d{1,2}E[+-]?\d{1,2})(?%)$/}, # Grouping '#,##,##0' => {base: "integer", re: /^(?[+-]?)(?(?:(?:(?:\d{1,2},)?(?:\d{2},)*\d)?\d)?\d{1})(?)$/}, '#,##,#00' => {base: "integer", re: /^(?[+-]?)(?(?:(?:\d{1,2},)?(?:\d{2},)*\d)?\d{2})(?)$/}, '#,##,000' => {base: "integer", re: /^(?[+-]?)(?(?:\d{1,2},)?(?:\d{2},)*\d{3})(?)$/}, '#,#0,000' => {base: "integer", re: /^(?[+-]?)(?(?:(?:\d{1,2},)?(?:\d{2},)*\d)?\d{1},\d{3})(?)$/}, '#,00,000' => {base: "integer", re: /^(?[+-]?)(?(?:\d{1,2},)?(?:\d{2},)*\d{2},\d{3})(?)$/}, '0,00,000' => {base: "integer", re: /^(?[+-]?)(?(?:(?:\d{1,2},)?(?:\d{2},)*\d)?\d{1},\d{2},\d{3})(?)$/}, '0.0##,###' => {base: "decimal", re: /^(?[+-]?)(?\d{1,}\.\d{1}(?:\d(?:\d(?:,\d(?:\d(?:\d)?)?)?)?)?)(?)$/}, '0.00#,###' => {base: "decimal", re: /^(?[+-]?)(?\d{1,}\.\d{2}(?:\d(?:,\d(?:\d(?:\d)?)?)?)?)(?)$/}, '0.000,###' => {base: "decimal", re: /^(?[+-]?)(?\d{1,}\.\d{3}(?:,\d(?:\d(?:\d)?)?)?)(?)$/}, '0.000,0##' => {base: "decimal", re:/^(?[+-]?)(?\d{1,}\.\d{3},\d{1}(?:\d(?:\d)?)?)(?)$/}, '0.000,00#' => {base: "decimal", re: /^(?[+-]?)(?\d{1,}\.\d{3},\d{2}(?:\d)?)(?)$/}, '0.000,000' => {base: "decimal", re: /^(?[+-]?)(?\d{1,}\.\d{3},\d{3})(?)$/}, # Jeni's '##0' => {valid: %w(1 12 123 1234), invalid: %w(1,234 123.4), base: "integer", re: /^(?[+-]?)(?\d{1,})(?)$/}, '#,#00' => {valid: %w(12 123 1,234 1,234,567), invalid: %w(1 1234 12,34 12,34,567), base: "integer", re: /^(?[+-]?)(?(?:(?:\d{1,3},)?(?:\d{3},)*\d)?\d{2})(?)$/}, '#0.#' => {valid: %w(1 1.2 1234.5), invalid: %w(12.34 1,234.5), base: "decimal", re: /^(?[+-]?)(?\d{1,}(?:\.\d{0,1})?)(?)$/}, '#0.0#,#' => {valid: %w(12.3 12.34 12.34,5), invalid: %w(1 12.345 12.34,56,7 12.34,567), base: "decimal", re: /^(?[+-]?)(?\d{1,}\.\d{1}(?:\d(?:,\d)?)?)(?)$/}, }.each do |pattern, props| context pattern do subject {RDF::Tabular::Datatype.new({})} describe "#build_number_re" do it "generates #{props[:re]} for #{pattern}" do expect(subject.build_number_re(pattern, ",", ".")).to eql props[:re] end if props[:re].is_a?(Regexp) it "recognizes bad pattern #{pattern}" do expect{subject.build_number_re(pattern, ",", ".")}.to raise_error(ArgumentError) end if props[:re] == ArgumentError end describe "Metadata" do let(:md) { RDF::Tabular::Table.new({ url: "http://example.com/table.csv", dialect: {header: false}, tableSchema: { columns: [{ name: "name", datatype: {"base" => props[:base], "format" => {"pattern" => pattern}} }] } }, logger: logger) } describe "valid" do Array(props[:valid]).each do |num| it "for #{num}" do cell = md.to_enum(:each_row, "\"#{num}\"\n").to_a.first.values.first expect(cell).to be_valid end end end describe "invalid" do Array(props[:invalid]).each do |num| it "for #{num}" do cell = md.to_enum(:each_row, "\"#{num}\"\n").to_a.first.values.first expect(cell).not_to be_valid end end end end end end end describe "#common_properties" do describe "#normalize!" do { "string with no language" => [ %({ "@context": "http://www.w3.org/ns/csvw", "dc:title": "foo" }), %({ "@context": "http://www.w3.org/ns/csvw", "dc:title": {"@value": "foo"} }) ], "string with language" => [ %({ "@context": ["http://www.w3.org/ns/csvw", {"@language": "en"}], "dc:title": "foo" }), %({ "@context": "http://www.w3.org/ns/csvw", "dc:title": {"@value": "foo", "@language": "en"} }) ], "relative URL" => [ %({ "@context": "http://www.w3.org/ns/csvw", "dc:source": {"@id": "foo"} }), %({ "@context": "http://www.w3.org/ns/csvw", "dc:source": {"@id": "http://example.com/foo"} }) ], "array of values" => [ %({ "@context": ["http://www.w3.org/ns/csvw", {"@language": "en"}], "dc:title": [ "foo", {"@value": "bar"}, {"@value": "baz", "@language": "de"}, 1, true, {"@value": 1}, {"@value": true}, {"@value": "1", "@type": "xsd:integer"}, {"@id": "foo"} ] }), %({ "@context": "http://www.w3.org/ns/csvw", "dc:title": [ {"@value": "foo", "@language": "en"}, {"@value": "bar"}, {"@value": "baz", "@language": "de"}, 1, true, {"@value": 1}, {"@value": true}, {"@value": "1", "@type": "xsd:integer"}, {"@id": "http://example.com/foo"} ] }) ], }.each do |name, (input, result)| it name do a = RDF::Tabular::Table.new(input, base: "http://example.com/A", logger: logger) b = RDF::Tabular::Table.new(result, base: "http://example.com/A", logger: logger) expect(a.normalize!).to eq b expect(logger.to_s).not_to match(/ERROR|WARN/) end end end context "transformation" do it "FIXME" end end describe "#verify_compatible!" do { "two tables with same id" => { A: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table", "tableSchema": {"columns": []} }), B: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table", "tableSchema": {"columns": []} }), R: true }, "two tables with different id" => { A: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": []} }), B: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table2", "tableSchema": {"columns": []} }), R: false }, "table-group and table with same url" => { A: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "TableGroup", "tables": [{ "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": []} }] }), B: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": []} }), R: true }, "table-group and table with different url" => { A: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "TableGroup", "tables": [{ "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": []} }] }), B: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table2", "tableSchema": {"columns": []} }), R: false }, "table-group with two tables" => { A: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "TableGroup", "tables": [{ "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": []} }, { "@type": "Table", "url": "http://example.org/table2", "tableSchema": {"columns": []} }] }), B: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table2", "tableSchema": {"columns": []} }), R: true }, "tables with matching columns" => { A: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": [{"name": "foo"}]} }), B: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": [{"name": "foo"}]} }), R: true }, "tables with virtual columns otherwise matching" => { A: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": [{"name": "foo"}, {"virtual": true}]} }), B: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": [{"name": "foo"}]} }), R: true }, "tables with differing columns" => { A: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": [{"name": "foo"}]} }), B: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": [{"name": "bar"}]} }), R: false }, "tables with different column count" => { A: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": [{"name": "foo"}, {"name": "bar"}]} }), B: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": [{"name": "bar"}]} }), R: false }, "tables with matching columns on name/titles" => { A: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": [{"name": "foo"}]} }), B: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": [{"titles": "foo"}]} }), R: false }, "tables with mismatch columns on name/titles" => { A: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": [{"name": "foo"}]} }), B: %({ "@context": "http://www.w3.org/ns/csvw", "@type": "Table", "url": "http://example.org/table1", "tableSchema": {"columns": [{"titles": "bar"}]} }), R: false }, }.each do |name, props| it name do a = described_class.new(::JSON.parse(props[:A]), validate: true) b = described_class.new(::JSON.parse(props[:B])) if props[:R] expect {a.verify_compatible!(b)}.not_to raise_error expect(logger.to_s).not_to match(/ERROR|WARN/) else expect {a.verify_compatible!(b)}.to raise_error(RDF::Tabular::Error) end end end end end