README.rdoc in dwc-archive-0.5.13 vs README.rdoc in dwc-archive-0.5.14
- old
+ new
@@ -23,11 +23,11 @@
dwc.extensions[0].data # summary for an extension
# read content of the core data file into memory or used with a block
# it returns array of arrays of data
# rows that had a wrong encoding will be collected into errors array
- data, errors = dwc.core.read
+ data, errors = dwc.core.read
# read content using a block with getting back results in sets 100 rows each
results = []
tail_data, tail_errors = dwc.core.read(100) do |data, errors|
results << [data, errors]
@@ -43,22 +43,38 @@
results << [data, errors]
end
results << [tail_data, tail_errors]
# normalize names in classification collecting together synonyms, canonical names,
- # vernacular names and associating paths to taxons in a classification
+ # vernacular names and associating paths to taxons in a classification
# distributed as DwCA file
# NOTE: this functionality requires biodiversity gem for ruby 1.8.x and
# biodiversity19 gem for ruby 1.9.x
result = dwc.normalize_classification
+ # for a finer control over normalization:
+ cn = DarwinCore::ClassificationNormalizer.new(dwc)
+ cn.normalize
+
+ # to get a flat hash of nodes with attached vernacular names and synonyms
+ normalized_data = cn.normalized_data
+
+ # to get a representation of tree organization as a hash
+ classification_tree = cn.tree
+
+ # to get list of all name strings used as scientific or vernacular names
+ all_name_strings = cn.name_strings
+
+ # to get list of errors generated during the normalization
+ errors = cn.error_names
+
DarwinCore.clean_all # remove all expanded archives
== Creating a DarwinCore Archive file
-
+
gen = DarwinCore::Generator.new('/tmp/dwc_birches.tar.gz')
core = [
["http://rs.tdwg.org/dwc/terms/taxonID", "http://rs.tdwg.org/dwc/terms/parentNameUsageID", "http://rs.tdwg.org/dwc/terms/scientificName", "http://rs.tdwg.org/dwc/terms/taxonRank"],
[1, 0, "Plantae", "kingdom"],
@@ -75,11 +91,11 @@
[3, "Wheeping Birch"],
[3, "Береза плакучая"]
]
synonyms = [
- ["http://rs.tdwg.org/dwc/terms/TaxonID", "http://rs.tdwg.org/dwc/terms/scientificName", "http://rs.tdwg.org/dwc/terms/taxonomicStatus"],
+ ["http://rs.tdwg.org/dwc/terms/TaxonID", "http://rs.tdwg.org/dwc/terms/scientificName", "http://rs.tdwg.org/dwc/terms/taxonomicStatus"],
[1, "Betila Linnaeus, 1753", 'misspelling']
]
eml = {
:id => '1234',
@@ -87,11 +103,11 @@
:authors => [
{ :first_name => 'John',
:last_name => 'Doe',
:email => 'jdoe@example.com' },
{ :first_name => 'Jane',
- :last_name => 'Doe',
+ :last_name => 'Doe',
:email => 'jane@example.com' }
],
:abstract => 'test classification',
:citation => 'Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010',
:url => 'http://example.com'
@@ -104,10 +120,10 @@
gen.add_eml_xml(eml)
gen.pack
== Note on Patches/Pull Requests
-
+
* Fork the project.
* Make your feature addition or bug fix.
* Add tests for it. This is important so I don't break it in a
future version unintentionally.
* Commit, do not mess with rakefile, version, or history.