README.rdoc in dwc-archive-0.5.13 vs README.rdoc in dwc-archive-0.5.14

- old
+ new

@@ -23,11 +23,11 @@ dwc.extensions[0].data # summary for an extension # read content of the core data file into memory or used with a block # it returns array of arrays of data # rows that had a wrong encoding will be collected into errors array - data, errors = dwc.core.read + data, errors = dwc.core.read # read content using a block with getting back results in sets 100 rows each results = [] tail_data, tail_errors = dwc.core.read(100) do |data, errors| results << [data, errors] @@ -43,22 +43,38 @@ results << [data, errors] end results << [tail_data, tail_errors] # normalize names in classification collecting together synonyms, canonical names, - # vernacular names and associating paths to taxons in a classification + # vernacular names and associating paths to taxons in a classification # distributed as DwCA file # NOTE: this functionality requires biodiversity gem for ruby 1.8.x and # biodiversity19 gem for ruby 1.9.x result = dwc.normalize_classification + # for a finer control over normalization: + cn = DarwinCore::ClassificationNormalizer.new(dwc) + cn.normalize + + # to get a flat hash of nodes with attached vernacular names and synonyms + normalized_data = cn.normalized_data + + # to get a representation of tree organization as a hash + classification_tree = cn.tree + + # to get list of all name strings used as scientific or vernacular names + all_name_strings = cn.name_strings + + # to get list of errors generated during the normalization + errors = cn.error_names + DarwinCore.clean_all # remove all expanded archives == Creating a DarwinCore Archive file - + gen = DarwinCore::Generator.new('/tmp/dwc_birches.tar.gz') core = [ ["http://rs.tdwg.org/dwc/terms/taxonID", "http://rs.tdwg.org/dwc/terms/parentNameUsageID", "http://rs.tdwg.org/dwc/terms/scientificName", "http://rs.tdwg.org/dwc/terms/taxonRank"], [1, 0, "Plantae", "kingdom"], @@ -75,11 +91,11 @@ [3, "Wheeping Birch"], [3, "Береза плакучая"] ] synonyms = [ - ["http://rs.tdwg.org/dwc/terms/TaxonID", "http://rs.tdwg.org/dwc/terms/scientificName", "http://rs.tdwg.org/dwc/terms/taxonomicStatus"], + ["http://rs.tdwg.org/dwc/terms/TaxonID", "http://rs.tdwg.org/dwc/terms/scientificName", "http://rs.tdwg.org/dwc/terms/taxonomicStatus"], [1, "Betila Linnaeus, 1753", 'misspelling'] ] eml = { :id => '1234', @@ -87,11 +103,11 @@ :authors => [ { :first_name => 'John', :last_name => 'Doe', :email => 'jdoe@example.com' }, { :first_name => 'Jane', - :last_name => 'Doe', + :last_name => 'Doe', :email => 'jane@example.com' } ], :abstract => 'test classification', :citation => 'Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010', :url => 'http://example.com' @@ -104,10 +120,10 @@ gen.add_eml_xml(eml) gen.pack == Note on Patches/Pull Requests - + * Fork the project. * Make your feature addition or bug fix. * Add tests for it. This is important so I don't break it in a future version unintentionally. * Commit, do not mess with rakefile, version, or history.