lib/commonmeta/utils.rb in commonmeta-ruby-3.0.6 vs lib/commonmeta/utils.rb in commonmeta-ruby-3.0.7

- old
+ new

@@ -1,443 +1,443 @@ # frozen_string_literal: true module Commonmeta module Utils NORMALIZED_LICENSES = { - 'https://creativecommons.org/licenses/by/1.0' => 'https://creativecommons.org/licenses/by/1.0/legalcode', - 'https://creativecommons.org/licenses/by/2.0' => 'https://creativecommons.org/licenses/by/2.0/legalcode', - 'https://creativecommons.org/licenses/by/2.5' => 'https://creativecommons.org/licenses/by/2.5/legalcode', - 'https://creativecommons.org/licenses/by/3.0' => 'https://creativecommons.org/licenses/by/3.0/legalcode', - 'https://creativecommons.org/licenses/by/3.0/us' => 'https://creativecommons.org/licenses/by/3.0/legalcode', - 'https://creativecommons.org/licenses/by/4.0' => 'https://creativecommons.org/licenses/by/4.0/legalcode', - 'https://creativecommons.org/licenses/by-nc/1.0' => 'https://creativecommons.org/licenses/by-nc/1.0/legalcode', - 'https://creativecommons.org/licenses/by-nc/2.0' => 'https://creativecommons.org/licenses/by-nc/2.0/legalcode', - 'https://creativecommons.org/licenses/by-nc/2.5' => 'https://creativecommons.org/licenses/by-nc/2.5/legalcode', - 'https://creativecommons.org/licenses/by-nc/3.0' => 'https://creativecommons.org/licenses/by-nc/3.0/legalcode', - 'https://creativecommons.org/licenses/by-nc/4.0' => 'https://creativecommons.org/licenses/by-nc/4.0/legalcode', - 'https://creativecommons.org/licenses/by-nd-nc/1.0' => 'https://creativecommons.org/licenses/by-nd-nc/1.0/legalcode', - 'https://creativecommons.org/licenses/by-nd-nc/2.0' => 'https://creativecommons.org/licenses/by-nd-nc/2.0/legalcode', - 'https://creativecommons.org/licenses/by-nd-nc/2.5' => 'https://creativecommons.org/licenses/by-nd-nc/2.5/legalcode', - 'https://creativecommons.org/licenses/by-nd-nc/3.0' => 'https://creativecommons.org/licenses/by-nd-nc/3.0/legalcode', - 'https://creativecommons.org/licenses/by-nd-nc/4.0' => 'https://creativecommons.org/licenses/by-nd-nc/4.0/legalcode', - 'https://creativecommons.org/licenses/by-nc-sa/1.0' => 'https://creativecommons.org/licenses/by-nc-sa/1.0/legalcode', - 'https://creativecommons.org/licenses/by-nc-sa/2.0' => 'https://creativecommons.org/licenses/by-nc-sa/2.0/legalcode', - 'https://creativecommons.org/licenses/by-nc-sa/2.5' => 'https://creativecommons.org/licenses/by-nc-sa/2.5/legalcode', - 'https://creativecommons.org/licenses/by-nc-sa/3.0' => 'https://creativecommons.org/licenses/by-nc-sa/3.0/legalcode', - 'https://creativecommons.org/licenses/by-nc-sa/4.0' => 'https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode', - 'https://creativecommons.org/licenses/by-nd/1.0' => 'https://creativecommons.org/licenses/by-nd/1.0/legalcode', - 'https://creativecommons.org/licenses/by-nd/2.0' => 'https://creativecommons.org/licenses/by-nd/2.0/legalcode', - 'https://creativecommons.org/licenses/by-nd/2.5' => 'https://creativecommons.org/licenses/by-nd/2.5/legalcode', - 'https://creativecommons.org/licenses/by-nd/3.0' => 'https://creativecommons.org/licenses/by-nd/3.0/legalcode', - 'https://creativecommons.org/licenses/by-nd/4.0' => 'https://creativecommons.org/licenses/by-nd/2.0/legalcode', - 'https://creativecommons.org/licenses/by-sa/1.0' => 'https://creativecommons.org/licenses/by-sa/1.0/legalcode', - 'https://creativecommons.org/licenses/by-sa/2.0' => 'https://creativecommons.org/licenses/by-sa/2.0/legalcode', - 'https://creativecommons.org/licenses/by-sa/2.5' => 'https://creativecommons.org/licenses/by-sa/2.5/legalcode', - 'https://creativecommons.org/licenses/by-sa/3.0' => 'https://creativecommons.org/licenses/by-sa/3.0/legalcode', - 'https://creativecommons.org/licenses/by-sa/4.0' => 'https://creativecommons.org/licenses/by-sa/4.0/legalcode', - 'https://creativecommons.org/licenses/by-nc-nd/1.0' => 'https://creativecommons.org/licenses/by-nc-nd/1.0/legalcode', - 'https://creativecommons.org/licenses/by-nc-nd/2.0' => 'https://creativecommons.org/licenses/by-nc-nd/2.0/legalcode', - 'https://creativecommons.org/licenses/by-nc-nd/2.5' => 'https://creativecommons.org/licenses/by-nc-nd/2.5/legalcode', - 'https://creativecommons.org/licenses/by-nc-nd/3.0' => 'https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode', - 'https://creativecommons.org/licenses/by-nc-nd/4.0' => 'https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode', - 'https://creativecommons.org/licenses/publicdomain' => 'https://creativecommons.org/licenses/publicdomain/', - 'https://creativecommons.org/publicdomain/zero/1.0' => 'https://creativecommons.org/publicdomain/zero/1.0/legalcode' + "https://creativecommons.org/licenses/by/1.0" => "https://creativecommons.org/licenses/by/1.0/legalcode", + "https://creativecommons.org/licenses/by/2.0" => "https://creativecommons.org/licenses/by/2.0/legalcode", + "https://creativecommons.org/licenses/by/2.5" => "https://creativecommons.org/licenses/by/2.5/legalcode", + "https://creativecommons.org/licenses/by/3.0" => "https://creativecommons.org/licenses/by/3.0/legalcode", + "https://creativecommons.org/licenses/by/3.0/us" => "https://creativecommons.org/licenses/by/3.0/legalcode", + "https://creativecommons.org/licenses/by/4.0" => "https://creativecommons.org/licenses/by/4.0/legalcode", + "https://creativecommons.org/licenses/by-nc/1.0" => "https://creativecommons.org/licenses/by-nc/1.0/legalcode", + "https://creativecommons.org/licenses/by-nc/2.0" => "https://creativecommons.org/licenses/by-nc/2.0/legalcode", + "https://creativecommons.org/licenses/by-nc/2.5" => "https://creativecommons.org/licenses/by-nc/2.5/legalcode", + "https://creativecommons.org/licenses/by-nc/3.0" => "https://creativecommons.org/licenses/by-nc/3.0/legalcode", + "https://creativecommons.org/licenses/by-nc/4.0" => "https://creativecommons.org/licenses/by-nc/4.0/legalcode", + "https://creativecommons.org/licenses/by-nd-nc/1.0" => "https://creativecommons.org/licenses/by-nd-nc/1.0/legalcode", + "https://creativecommons.org/licenses/by-nd-nc/2.0" => "https://creativecommons.org/licenses/by-nd-nc/2.0/legalcode", + "https://creativecommons.org/licenses/by-nd-nc/2.5" => "https://creativecommons.org/licenses/by-nd-nc/2.5/legalcode", + "https://creativecommons.org/licenses/by-nd-nc/3.0" => "https://creativecommons.org/licenses/by-nd-nc/3.0/legalcode", + "https://creativecommons.org/licenses/by-nd-nc/4.0" => "https://creativecommons.org/licenses/by-nd-nc/4.0/legalcode", + "https://creativecommons.org/licenses/by-nc-sa/1.0" => "https://creativecommons.org/licenses/by-nc-sa/1.0/legalcode", + "https://creativecommons.org/licenses/by-nc-sa/2.0" => "https://creativecommons.org/licenses/by-nc-sa/2.0/legalcode", + "https://creativecommons.org/licenses/by-nc-sa/2.5" => "https://creativecommons.org/licenses/by-nc-sa/2.5/legalcode", + "https://creativecommons.org/licenses/by-nc-sa/3.0" => "https://creativecommons.org/licenses/by-nc-sa/3.0/legalcode", + "https://creativecommons.org/licenses/by-nc-sa/4.0" => "https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode", + "https://creativecommons.org/licenses/by-nd/1.0" => "https://creativecommons.org/licenses/by-nd/1.0/legalcode", + "https://creativecommons.org/licenses/by-nd/2.0" => "https://creativecommons.org/licenses/by-nd/2.0/legalcode", + "https://creativecommons.org/licenses/by-nd/2.5" => "https://creativecommons.org/licenses/by-nd/2.5/legalcode", + "https://creativecommons.org/licenses/by-nd/3.0" => "https://creativecommons.org/licenses/by-nd/3.0/legalcode", + "https://creativecommons.org/licenses/by-nd/4.0" => "https://creativecommons.org/licenses/by-nd/2.0/legalcode", + "https://creativecommons.org/licenses/by-sa/1.0" => "https://creativecommons.org/licenses/by-sa/1.0/legalcode", + "https://creativecommons.org/licenses/by-sa/2.0" => "https://creativecommons.org/licenses/by-sa/2.0/legalcode", + "https://creativecommons.org/licenses/by-sa/2.5" => "https://creativecommons.org/licenses/by-sa/2.5/legalcode", + "https://creativecommons.org/licenses/by-sa/3.0" => "https://creativecommons.org/licenses/by-sa/3.0/legalcode", + "https://creativecommons.org/licenses/by-sa/4.0" => "https://creativecommons.org/licenses/by-sa/4.0/legalcode", + "https://creativecommons.org/licenses/by-nc-nd/1.0" => "https://creativecommons.org/licenses/by-nc-nd/1.0/legalcode", + "https://creativecommons.org/licenses/by-nc-nd/2.0" => "https://creativecommons.org/licenses/by-nc-nd/2.0/legalcode", + "https://creativecommons.org/licenses/by-nc-nd/2.5" => "https://creativecommons.org/licenses/by-nc-nd/2.5/legalcode", + "https://creativecommons.org/licenses/by-nc-nd/3.0" => "https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode", + "https://creativecommons.org/licenses/by-nc-nd/4.0" => "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode", + "https://creativecommons.org/licenses/publicdomain" => "https://creativecommons.org/licenses/publicdomain/", + "https://creativecommons.org/publicdomain/zero/1.0" => "https://creativecommons.org/publicdomain/zero/1.0/legalcode", } # source: https://www.bibtex.com/e/entry-types/ BIB_TO_CM_TRANSLATIONS = { - 'article' => 'JournalArticle', - 'book' => 'Book', - 'booklet' => 'Book', - 'inbook' => 'BookChapter', - 'inproceedings' => 'ProceedingsArticle', - 'manual' => 'Report', - 'mastersthesis' => 'Dissertation', - 'misc' => 'Other', - 'phdthesis' => 'Dissertation', - 'proceedings' => 'Proceedings', - 'techreport' => 'Report', - 'unpublished' => 'Manuscript' + "article" => "JournalArticle", + "book" => "Book", + "booklet" => "Book", + "inbook" => "BookChapter", + "inproceedings" => "ProceedingsArticle", + "manual" => "Report", + "mastersthesis" => "Dissertation", + "misc" => "Other", + "phdthesis" => "Dissertation", + "proceedings" => "Proceedings", + "techreport" => "Report", + "unpublished" => "Manuscript", } CM_TO_BIB_TRANSLATIONS = { - 'Article' => 'article', - 'Book' => 'book', - 'BookChapter' => 'inbook', - 'Dissertation' => 'phdthesis', - 'JournalArticle' => 'article', - 'Manuscript' => 'unpublished', - 'Other' => 'misc', - 'Proceedings' => 'proceedings', - 'ProceedingsArticle' => 'inproceedings', - 'Report' => 'techreport' + "Article" => "article", + "Book" => "book", + "BookChapter" => "inbook", + "Dissertation" => "phdthesis", + "JournalArticle" => "article", + "Manuscript" => "unpublished", + "Other" => "misc", + "Proceedings" => "proceedings", + "ProceedingsArticle" => "inproceedings", + "Report" => "techreport", } # source: https://docs.citationstyles.org/en/stable/specification.html?highlight=book#appendix-iii-types CSL_TO_CM_TRANSLATIONS = { - 'article' => 'Article', - 'article-journal' => 'JournalArticle', - 'article-magazine' => 'Article', - 'article-newspaper' => 'Article', - 'bill' => 'LegalDocument', - 'book' => 'Book', - 'broadcast' => 'Audiovisual', - 'chapter' => 'BookChapter', - 'classic' => 'Book', - 'collection' => 'Collection', - 'dataset' => 'Dataset', - 'document' => 'Document', - 'entry' => 'Entry', - 'entry-dictionary' => 'Entry', - 'entry-encyclopedia' => 'Entry', - 'event' => 'Event', - 'figure' => 'Figure', - 'graphic' => 'Image', - 'hearing' => 'LegalDocument', - 'interview' => 'Document', - 'legal_case' => 'LegalDocument', - 'legislation' => 'LegalDocument', - 'manuscript' => 'Manuscript', - 'map' => 'Map', - 'motion_picture' => 'Audiovisual', - 'musical_score' => 'Document', - 'pamphlet' => 'Document', - 'paper-conference' => 'ProceedingsArticle', - 'patent' => 'Patent', - 'performance' => 'Performance', - 'periodical' => 'Journal', - 'personal_communication' => 'PersonalCommunication', - 'post' => 'Post', - 'post-weblog' => 'Article', - 'regulation' => 'LegalDocument', - 'report' => 'Report', - 'review' => 'Review', - 'review-book' => 'Review', - 'software' => 'Software', - 'song' => 'Audiovisual', - 'speech' => 'Speech', - 'standard' => 'Standard', - 'thesis' => 'Dissertation', - 'treaty' => 'LegalDocument', - 'webpage' => 'WebPage' + "article" => "Article", + "article-journal" => "JournalArticle", + "article-magazine" => "Article", + "article-newspaper" => "Article", + "bill" => "LegalDocument", + "book" => "Book", + "broadcast" => "Audiovisual", + "chapter" => "BookChapter", + "classic" => "Book", + "collection" => "Collection", + "dataset" => "Dataset", + "document" => "Document", + "entry" => "Entry", + "entry-dictionary" => "Entry", + "entry-encyclopedia" => "Entry", + "event" => "Event", + "figure" => "Figure", + "graphic" => "Image", + "hearing" => "LegalDocument", + "interview" => "Document", + "legal_case" => "LegalDocument", + "legislation" => "LegalDocument", + "manuscript" => "Manuscript", + "map" => "Map", + "motion_picture" => "Audiovisual", + "musical_score" => "Document", + "pamphlet" => "Document", + "paper-conference" => "ProceedingsArticle", + "patent" => "Patent", + "performance" => "Performance", + "periodical" => "Journal", + "personal_communication" => "PersonalCommunication", + "post" => "Post", + "post-weblog" => "Article", + "regulation" => "LegalDocument", + "report" => "Report", + "review" => "Review", + "review-book" => "Review", + "software" => "Software", + "song" => "Audiovisual", + "speech" => "Speech", + "standard" => "Standard", + "thesis" => "Dissertation", + "treaty" => "LegalDocument", + "webpage" => "WebPage", } CM_TO_CSL_TRANSLATIONS = { - 'Article' => 'article', - 'JournalArticle' => 'article-journal', - 'Book' => 'book', - 'BookChapter' => 'chapter', - 'Collection' => 'collection', - 'Dataset' => 'dataset', - 'Document' => 'document', - 'Entry' => 'entry', - 'Event' => 'event', - 'Figure' => 'figure', - 'Image' => 'graphic', - 'LegalDocument' => 'legal_case', - 'Manuscript' => 'manuscript', - 'Map' => 'map', - 'Audiovisual' => 'motion_picture', - 'Patent' => 'patent', - 'Performance' => 'performance', - 'Journal' => 'periodical', - 'PersonalCommunication' => 'personal_communication', - 'Post' => 'post', - 'Report' => 'report', - 'Review' => 'review', - 'Software' => 'software', - 'Speech' => 'speech', - 'Standard' => 'standard', - 'Dissertation' => 'thesis', - 'WebPage' => 'webpage' + "Article" => "article", + "JournalArticle" => "article-journal", + "Book" => "book", + "BookChapter" => "chapter", + "Collection" => "collection", + "Dataset" => "dataset", + "Document" => "document", + "Entry" => "entry", + "Event" => "event", + "Figure" => "figure", + "Image" => "graphic", + "LegalDocument" => "legal_case", + "Manuscript" => "manuscript", + "Map" => "map", + "Audiovisual" => "motion_picture", + "Patent" => "patent", + "Performance" => "performance", + "Journal" => "periodical", + "PersonalCommunication" => "personal_communication", + "Post" => "post", + "Report" => "report", + "Review" => "review", + "Software" => "software", + "Speech" => "speech", + "Standard" => "standard", + "Dissertation" => "thesis", + "WebPage" => "webpage", } # source: http://api.crossref.org/types CR_TO_CM_TRANSLATIONS = { - 'BookChapter' => 'BookChapter', - 'BookPart' => 'BookPart', - 'BookSection' => 'BookSection', - 'BookSeries' => 'BookSeries', - 'BookSet' => 'BookSet', - 'BookTrack' => 'BookTrack', - 'Book' => 'Book', - 'Component' => 'Component', - 'Database' => 'Database', - 'Dataset' => 'Dataset', - 'Dissertation' => 'Dissertation', - 'EditedBook' => 'EditedBook', - 'Grant' => 'Grant', - 'JournalArticle' => 'JournalArticle', - 'JournalIssue' => 'JournalIssue', - 'JournalVolume' => 'JournalVolume', - 'Journal' => 'Journal', - 'Monograph' => 'Book', - 'Other' => 'Other', - 'PeerReview' => 'PeerReview', - 'PostedContent' => 'Article', - 'ProceedingsArticle' => 'ProceedingsArticle', - 'ProceedingsSeries' => 'ProceedingsSeries', - 'Proceedings' => 'Proceedings', - 'ReferenceBook' => 'ReferenceBook', - 'ReferenceEntry' => 'Entry', - 'ReportComponent' => 'ReportComponent', - 'ReportSeries' => 'ReportSeries', - 'Report' => 'Report', - 'Standard' => 'Standard' + "BookChapter" => "BookChapter", + "BookPart" => "BookPart", + "BookSection" => "BookSection", + "BookSeries" => "BookSeries", + "BookSet" => "BookSet", + "BookTrack" => "BookTrack", + "Book" => "Book", + "Component" => "Component", + "Database" => "Database", + "Dataset" => "Dataset", + "Dissertation" => "Dissertation", + "EditedBook" => "EditedBook", + "Grant" => "Grant", + "JournalArticle" => "JournalArticle", + "JournalIssue" => "JournalIssue", + "JournalVolume" => "JournalVolume", + "Journal" => "Journal", + "Monograph" => "Book", + "Other" => "Other", + "PeerReview" => "PeerReview", + "PostedContent" => "Article", + "ProceedingsArticle" => "ProceedingsArticle", + "ProceedingsSeries" => "ProceedingsSeries", + "Proceedings" => "Proceedings", + "ReferenceBook" => "ReferenceBook", + "ReferenceEntry" => "Entry", + "ReportComponent" => "ReportComponent", + "ReportSeries" => "ReportSeries", + "Report" => "Report", + "Standard" => "Standard", } CM_TO_CR_TRANSLATIONS = { - 'Article' => 'PostedContent', - 'BookChapter' => 'BookChapter', - 'BookSeries' => 'BookSeries', - 'Book' => 'Book', - 'Component' => 'Component', - 'Dataset' => 'Dataset', - 'Dissertation' => 'Dissertation', - 'Grant' => 'Grant', - 'JournalArticle' => 'JournalArticle', - 'JournalIssue' => 'JournalIssue', - 'JournalVolume' => 'JournalVolume', - 'Journal' => 'Journal', - 'ProceedingsArticle' => 'ProceedingsArticle', - 'ProceedingsSeries' => 'ProceedingsSeries', - 'Proceedings' => 'Proceedings', - 'ReportComponent' => 'ReportComponent', - 'ReportSeries' => 'ReportSeries', - 'Report' => 'Report', - 'PeerReview' => 'PeerReview', - 'Other' => 'Other' + "Article" => "PostedContent", + "BookChapter" => "BookChapter", + "BookSeries" => "BookSeries", + "Book" => "Book", + "Component" => "Component", + "Dataset" => "Dataset", + "Dissertation" => "Dissertation", + "Grant" => "Grant", + "JournalArticle" => "JournalArticle", + "JournalIssue" => "JournalIssue", + "JournalVolume" => "JournalVolume", + "Journal" => "Journal", + "ProceedingsArticle" => "ProceedingsArticle", + "ProceedingsSeries" => "ProceedingsSeries", + "Proceedings" => "Proceedings", + "ReportComponent" => "ReportComponent", + "ReportSeries" => "ReportSeries", + "Report" => "Report", + "PeerReview" => "PeerReview", + "Other" => "Other", } # source: https://github.com/datacite/schema/blob/master/source/meta/kernel-4/include/datacite-resourceType-v4.xsd DC_TO_CM_TRANSLATIONS = { - 'Audiovisual' => 'Audiovisual', - 'BlogPosting' => 'Article', - 'Book' => 'Book', - 'BookChapter' => 'BookChapter', - 'Collection' => 'Collection', - 'ComputationalNotebook' => 'ComputationalNotebook', - 'ConferencePaper' => 'ProceedingsArticle', - 'ConferenceProceeding' => 'Proceedings', - 'DataPaper' => 'JournalArticle', - 'Dataset' => 'Dataset', - 'Dissertation' => 'Dissertation', - 'Event' => 'Event', - 'Image' => 'Image', - 'InteractiveResource' => 'InteractiveResource', - 'Journal' => 'Journal', - 'JournalArticle' => 'JournalArticle', - 'Model' => 'Model', - 'OutputManagementPlan' => 'OutputManagementPlan', - 'PeerReview' => 'PeerReview', - 'PhysicalObject' => 'PhysicalObject', - 'Poster' => 'Speech', - 'Preprint' => 'Article', - 'Report' => 'Report', - 'Service' => 'Service', - 'Software' => 'Software', - 'Sound' => 'Sound', - 'Standard' => 'Standard', - 'Text' => 'Document', - 'Thesis' => 'Dissertation', - 'Workflow' => 'Workflow', - 'Other' => 'Other' + "Audiovisual" => "Audiovisual", + "BlogPosting" => "Article", + "Book" => "Book", + "BookChapter" => "BookChapter", + "Collection" => "Collection", + "ComputationalNotebook" => "ComputationalNotebook", + "ConferencePaper" => "ProceedingsArticle", + "ConferenceProceeding" => "Proceedings", + "DataPaper" => "JournalArticle", + "Dataset" => "Dataset", + "Dissertation" => "Dissertation", + "Event" => "Event", + "Image" => "Image", + "InteractiveResource" => "InteractiveResource", + "Journal" => "Journal", + "JournalArticle" => "JournalArticle", + "Model" => "Model", + "OutputManagementPlan" => "OutputManagementPlan", + "PeerReview" => "PeerReview", + "PhysicalObject" => "PhysicalObject", + "Poster" => "Speech", + "Preprint" => "Article", + "Report" => "Report", + "Service" => "Service", + "Software" => "Software", + "Sound" => "Sound", + "Standard" => "Standard", + "Text" => "Document", + "Thesis" => "Dissertation", + "Workflow" => "Workflow", + "Other" => "Other", } CM_TO_DC_TRANSLATIONS = { - 'Article' => 'Preprint', - 'Audiovisual' => 'Audiovisual', - 'Book' => 'Book', - 'BookChapter' => 'BookChapter', - 'Collection' => 'Collection', - 'Dataset' => 'Dataset', - 'Dissertation' => 'Dissertation', - 'Document' => 'Text', - 'Entry' => 'Text', - 'Event' => 'Event', - 'Figure' => 'Image', - 'Image' => 'Image', - 'JournalArticle' => 'JournalArticle', - 'LegalDocument' => 'Text', - 'Manuscript' => 'Text', - 'Map' => 'Image', - 'Patent' => 'Text', - 'Performance' => 'Audiovisual', - 'PersonalCommunication' => 'Text', - 'Post' => 'Text', - 'ProceedingsArticle' => 'ConferencePaper', - 'Proceedings' => 'ConferenceProceeding', - 'Report' => 'Report', - 'PeerReview' => 'PeerReview', - 'Software' => 'Software', - 'Sound' => 'Sound', - 'Standard' => 'Standard', - 'WebPage' => 'Text' + "Article" => "Preprint", + "Audiovisual" => "Audiovisual", + "Book" => "Book", + "BookChapter" => "BookChapter", + "Collection" => "Collection", + "Dataset" => "Dataset", + "Dissertation" => "Dissertation", + "Document" => "Text", + "Entry" => "Text", + "Event" => "Event", + "Figure" => "Image", + "Image" => "Image", + "JournalArticle" => "JournalArticle", + "LegalDocument" => "Text", + "Manuscript" => "Text", + "Map" => "Image", + "Patent" => "Text", + "Performance" => "Audiovisual", + "PersonalCommunication" => "Text", + "Post" => "Text", + "ProceedingsArticle" => "ConferencePaper", + "Proceedings" => "ConferenceProceeding", + "Report" => "Report", + "PeerReview" => "PeerReview", + "Software" => "Software", + "Sound" => "Sound", + "Standard" => "Standard", + "WebPage" => "Text", } RIS_TO_CM_TRANSLATIONS = { - 'ABST' => 'Text', - 'ADVS' => 'Text', - 'AGGR' => 'Text', - 'ANCIENT' => 'Text', - 'ART' => 'Text', - 'BILL' => 'Text', - 'BLOG' => 'Text', - 'BOOK' => 'Book', - 'CASE' => 'Text', - 'CHAP' => 'BookChapter', - 'CHART' => 'Text', - 'CLSWK' => 'Text', - 'CTLG' => 'Collection', - 'COMP' => 'Software', - 'DATA' => 'Dataset', - 'DBASE' => 'Database', - 'DICT' => 'Dictionary', - 'EBOOK' => 'Book', - 'ECHAP' => 'BookChapter', - 'EDBOOK' => 'Book', - 'EJOUR' => 'JournalArticle', - 'ELEC' => 'Text', - 'ENCYC' => 'Encyclopedia', - 'EQUA' => 'Equation', - 'FIGURE' => 'Image', - 'GEN' => 'CreativeWork', - 'GOVDOC' => 'GovernmentDocument', - 'GRANT' => 'Grant', - 'HEAR' => 'Hearing', - 'ICOMM' => 'Text', - 'INPR' => 'Text', - 'JFULL' => 'JournalArticle', - 'JOUR' => 'JournalArticle', - 'LEGAL' => 'LegalRuleOrRegulation', - 'MANSCPT' => 'Text', - 'MAP' => 'Map', - 'MGZN' => 'MagazineArticle', - 'MPCT' => 'Audiovisual', - 'MULTI' => 'Audiovisual', - 'MUSIC' => 'MusicScore', - 'NEWS' => 'NewspaperArticle', - 'PAMP' => 'Pamphlet', - 'PAT' => 'Patent', - 'PCOMM' => 'PersonalCommunication', - 'RPRT' => 'Report', - 'SER' => 'SerialPublication', - 'SLIDE' => 'Slide', - 'SOUND' => 'SoundRecording', - 'STAND' => 'Standard', - 'THES' => 'Dissertation', - 'UNBILL' => 'UnenactedBill', - 'UNPB' => 'UnpublishedWork', - 'VIDEO' => 'Audiovisual', - 'WEB' => 'WebPage' + "ABST" => "Text", + "ADVS" => "Text", + "AGGR" => "Text", + "ANCIENT" => "Text", + "ART" => "Text", + "BILL" => "Text", + "BLOG" => "Text", + "BOOK" => "Book", + "CASE" => "Text", + "CHAP" => "BookChapter", + "CHART" => "Text", + "CLSWK" => "Text", + "CTLG" => "Collection", + "COMP" => "Software", + "DATA" => "Dataset", + "DBASE" => "Database", + "DICT" => "Dictionary", + "EBOOK" => "Book", + "ECHAP" => "BookChapter", + "EDBOOK" => "Book", + "EJOUR" => "JournalArticle", + "ELEC" => "Text", + "ENCYC" => "Encyclopedia", + "EQUA" => "Equation", + "FIGURE" => "Image", + "GEN" => "CreativeWork", + "GOVDOC" => "GovernmentDocument", + "GRANT" => "Grant", + "HEAR" => "Hearing", + "ICOMM" => "Text", + "INPR" => "Text", + "JFULL" => "JournalArticle", + "JOUR" => "JournalArticle", + "LEGAL" => "LegalRuleOrRegulation", + "MANSCPT" => "Text", + "MAP" => "Map", + "MGZN" => "MagazineArticle", + "MPCT" => "Audiovisual", + "MULTI" => "Audiovisual", + "MUSIC" => "MusicScore", + "NEWS" => "NewspaperArticle", + "PAMP" => "Pamphlet", + "PAT" => "Patent", + "PCOMM" => "PersonalCommunication", + "RPRT" => "Report", + "SER" => "SerialPublication", + "SLIDE" => "Slide", + "SOUND" => "SoundRecording", + "STAND" => "Standard", + "THES" => "Dissertation", + "UNBILL" => "UnenactedBill", + "UNPB" => "UnpublishedWork", + "VIDEO" => "Audiovisual", + "WEB" => "WebPage", } CM_TO_RIS_TRANSLATIONS = { - 'Article' => 'JOUR', - 'Audiovisual' => 'VIDEO', - 'Book' => 'BOOK', - 'BookChapter' => 'CHAP', - 'Collection' => 'CTLG', - 'Dataset' => 'DATA', - 'Dissertation' => 'THES', - 'Document' => 'GEN', - 'Entry' => 'DICT', - 'Event' => 'GEN', - 'Figure' => 'FIGURE', - 'Image' => 'FIGURE', - 'JournalArticle' => 'JOUR', - 'LegalDocument' => 'GEN', - 'Manuscript' => 'GEN', - 'Map' => 'MAP', - 'Patent' => 'PAT', - 'Performance' => 'GEN', - 'PersonalCommunication' => 'PCOMM', - 'Post' => 'GEN', - 'ProceedingsArticle' => 'CPAPER', - 'Proceedings' => 'CONF', - 'Report' => 'RPRT', - 'Review' => 'GEN', - 'Software' => 'COMP', - 'Sound' => 'SOUND', - 'Standard' => 'STAND', - 'WebPage' => 'WEB' + "Article" => "JOUR", + "Audiovisual" => "VIDEO", + "Book" => "BOOK", + "BookChapter" => "CHAP", + "Collection" => "CTLG", + "Dataset" => "DATA", + "Dissertation" => "THES", + "Document" => "GEN", + "Entry" => "DICT", + "Event" => "GEN", + "Figure" => "FIGURE", + "Image" => "FIGURE", + "JournalArticle" => "JOUR", + "LegalDocument" => "GEN", + "Manuscript" => "GEN", + "Map" => "MAP", + "Patent" => "PAT", + "Performance" => "GEN", + "PersonalCommunication" => "PCOMM", + "Post" => "GEN", + "ProceedingsArticle" => "CPAPER", + "Proceedings" => "CONF", + "Report" => "RPRT", + "Review" => "GEN", + "Software" => "COMP", + "Sound" => "SOUND", + "Standard" => "STAND", + "WebPage" => "WEB", } SO_TO_CM_TRANSLATIONS = { - 'Article' => 'Article', - 'BlogPosting' => 'Article', - 'Book' => 'Book', - 'BookChapter' => 'BookChapter', - 'CreativeWork' => 'Other', - 'Dataset' => 'Dataset', - 'Dissertation' => 'Dissertation', - 'NewsArticle' => 'Article', - 'Legislation' => 'LegalDocument', - 'ScholarlyArticle' => 'JournalArticle', - 'SoftwareSourceCode' => 'Software' + "Article" => "Article", + "BlogPosting" => "Article", + "Book" => "Book", + "BookChapter" => "BookChapter", + "CreativeWork" => "Other", + "Dataset" => "Dataset", + "Dissertation" => "Dissertation", + "NewsArticle" => "Article", + "Legislation" => "LegalDocument", + "ScholarlyArticle" => "JournalArticle", + "SoftwareSourceCode" => "Software", } CM_TO_SO_TRANSLATIONS = { - 'Article' => 'Article', - 'Audiovisual' => 'CreativeWork', - 'Book' => 'Book', - 'BookChapter' => 'BookChapter', - 'Collection' => 'CreativeWork', - 'Dataset' => 'Dataset', - 'Dissertation' => 'Dissertation', - 'Document' => 'CreativeWork', - 'Entry' => 'CreativeWork', - 'Event' => 'CreativeWork', - 'Figure' => 'CreativeWork', - 'Image' => 'CreativeWork', - 'JournalArticle' => 'ScholarlyArticle', - 'LegalDocument' => 'Legislation', - 'Software' => 'SoftwareSourceCode' + "Article" => "Article", + "Audiovisual" => "CreativeWork", + "Book" => "Book", + "BookChapter" => "BookChapter", + "Collection" => "CreativeWork", + "Dataset" => "Dataset", + "Dissertation" => "Dissertation", + "Document" => "CreativeWork", + "Entry" => "CreativeWork", + "Event" => "CreativeWork", + "Figure" => "CreativeWork", + "Image" => "CreativeWork", + "JournalArticle" => "ScholarlyArticle", + "LegalDocument" => "Legislation", + "Software" => "SoftwareSourceCode", } CM_TO_JATS_TRANSLATIONS = { - 'Proceedings' => 'working-paper', - 'ReferenceBook' => 'book', - 'JournalIssue' => 'journal', - 'ProceedingsArticle' => 'working-paper', - 'Other' => nil, - 'Dissertation' => nil, - 'Dataset' => 'data', - 'Document' => 'journal', - 'EditedBook' => 'book', - 'JournalArticle' => 'journal', - 'Journal' => 'journal', - 'Report' => 'report', - 'BookSeries' => 'book', - 'ReportSeries' => 'report', - 'BookTrack' => 'book', - 'Standard' => 'standard', - 'BookSection' => 'chapter', - 'BookPart' => 'chapter', - 'Book' => 'book', - 'BookChapter' => 'chapter', - 'StandardSeries' => 'standard', - 'Monograph' => 'book', - 'Component' => nil, - 'ReferenceEntry' => nil, - 'JournalVolume' => 'journal', - 'BookSet' => 'book', - 'Article' => 'journal', - 'Software' => 'software' + "Proceedings" => "working-paper", + "ReferenceBook" => "book", + "JournalIssue" => "journal", + "ProceedingsArticle" => "working-paper", + "Other" => nil, + "Dissertation" => nil, + "Dataset" => "data", + "Document" => "journal", + "EditedBook" => "book", + "JournalArticle" => "journal", + "Journal" => "journal", + "Report" => "report", + "BookSeries" => "book", + "ReportSeries" => "report", + "BookTrack" => "book", + "Standard" => "standard", + "BookSection" => "chapter", + "BookPart" => "chapter", + "Book" => "book", + "BookChapter" => "chapter", + "StandardSeries" => "standard", + "Monograph" => "book", + "Component" => nil, + "ReferenceEntry" => nil, + "JournalVolume" => "journal", + "BookSet" => "book", + "Article" => "journal", + "Software" => "software", } UNKNOWN_INFORMATION = { - ':unac' => 'temporarily inaccessible', - ':unal' => 'unallowed, suppressed intentionally', - ':unap' => 'not applicable, makes no sense', - ':unas' => 'value unassigned (e.g., Untitled)', - ':unav' => 'value unavailable, possibly unknown', - ':unkn' => 'known to be unknown (e.g., Anonymous, Inconnue)', - ':none' => 'never had a value, never will', - ':null' => 'explicitly and meaningfully empty', - ':tba' => 'to be assigned or announced later', - ':etal' => 'too numerous to list (et alia)' + ":unac" => "temporarily inaccessible", + ":unal" => "unallowed, suppressed intentionally", + ":unap" => "not applicable, makes no sense", + ":unas" => "value unassigned (e.g., Untitled)", + ":unav" => "value unavailable, possibly unknown", + ":unkn" => "known to be unknown (e.g., Anonymous, Inconnue)", + ":none" => "never had a value, never will", + ":null" => "explicitly and meaningfully empty", + ":tba" => "to be assigned or announced later", + ":etal" => "too numerous to list (et alia)", } def find_from_format(id: nil, string: nil, ext: nil, filename: nil) if id.present? find_from_format_by_id(id) @@ -446,87 +446,87 @@ elsif string.present? find_from_format_by_string(string) elsif filename.present? find_from_format_by_filename(filename) else - 'datacite' + "datacite" end end def find_from_format_by_id(id) id = normalize_id(id) if %r{\A(?:(http|https):/(/)?(dx\.)?(doi\.org|handle\.stage\.datacite\.org)/)?(doi:)?(10\.\d{4,5}/.+)\z}.match?(id) ra = get_doi_ra(id) %w[DataCite Crossref mEDRA KISTI JaLC OP].include?(ra) ? ra.downcase : nil elsif %r{\A(?:(http|https):/(/)?orcid\.org/)?(\d{4}-\d{4}-\d{4}-\d{3}[0-9X]+)\z}.match?(id) - 'orcid' + "orcid" elsif %r{\A(http|https):/(/)?github\.com/(.+)/package.json\z}.match?(id) - 'npm' + "npm" elsif %r{\A(http|https):/(/)?github\.com/(.+)/codemeta.json\z}.match?(id) - 'codemeta' + "codemeta" elsif %r{\A(http|https):/(/)?github\.com/(.+)/CITATION.cff\z}.match?(id) - 'cff' + "cff" elsif %r{\A(http|https):/(/)?github\.com/(.+)\z}.match?(id) - 'cff' + "cff" else - 'schema_org' + "schema_org" end end def find_from_format_by_filename(filename) - if filename == 'package.json' - 'npm' - elsif filename == 'CITATION.cff' - 'cff' + if filename == "package.json" + "npm" + elsif filename == "CITATION.cff" + "cff" end end def find_from_format_by_ext(string, options = {}) case options[:ext] - when '.bib' - 'bibtex' - when '.ris' - 'ris' - when '.xml', '.json' + when ".bib" + "bibtex" + when ".ris" + "ris" + when ".xml", ".json" find_from_format_by_string(string) end end def find_from_format_by_string(string) begin # try to parse as JSON hsh = MultiJson.load(string).to_h - if hsh.dig('@context') && URI.parse(hsh.dig('@context')).host == 'schema.org' - return 'schema_org' - elsif hsh.dig('schemaVersion').to_s.start_with?('http://datacite.org/schema/kernel') - return 'datacite' - elsif hsh.dig('source') == 'Crossref' - return 'crossref' - elsif hsh.dig('issued', 'date-parts').present? - return 'csl' - elsif URI.parse(hsh.dig('@context')).to_s == 'https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld' - return 'codemeta' + if hsh.dig("@context") && URI.parse(hsh.dig("@context")).host == "schema.org" + return "schema_org" + elsif hsh.dig("schemaVersion").to_s.start_with?("http://datacite.org/schema/kernel") + return "datacite" + elsif hsh.dig("source") == "Crossref" + return "crossref" + elsif hsh.dig("issued", "date-parts").present? + return "csl" + elsif URI.parse(hsh.dig("@context")).to_s == "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld" + return "codemeta" end rescue MultiJson::ParseError end begin # try to parse as XML hsh = Hash.from_xml(string) - return 'crossref_xml' if hsh.to_h.dig('crossref_result').present? + return "crossref_xml" if hsh.to_h.dig("crossref_result").present? rescue Nokogiri::XML::SyntaxError end begin # try to parse as YAML hsh = YAML.load(string, permitted_classes: [Date]) - return 'cff' if hsh.is_a?(Hash) && hsh.fetch('cff-version', nil).present? + return "cff" if hsh.is_a?(Hash) && hsh.fetch("cff-version", nil).present? rescue Psych::SyntaxError end - if string.start_with?('TY - ') - 'ris' + if string.start_with?("TY - ") + "ris" elsif BibTeX.parse(string).first - 'bibtex' + "bibtex" end end def orcid_from_url(url) Array(%r{\A:(http|https)://orcid\.org/(.+)}.match(url)).last @@ -536,29 +536,29 @@ "https://orcid.org/#{orcid}" if orcid.present? end def validate_orcid(orcid) orcid = Array(%r{\A(?:(?:http|https)://(?:(?:www|sandbox)?\.)?orcid\.org/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\z}.match(orcid)).last - orcid.gsub(/[[:space:]]/, '-') if orcid.present? + orcid.gsub(/[[:space:]]/, "-") if orcid.present? end def validate_orcid_scheme(orcid_scheme) Array(%r{\A(http|https)://(www\.)?(orcid\.org)}.match(orcid_scheme)).last end def validate_url(str) if %r{\A(?:(http|https)://(dx\.)?doi.org/)?(doi:)?(10\.\d{4,5}/.+)\z}.match?(str) - 'DOI' + "DOI" elsif %r{\A(http|https)://}.match?(str) - 'URL' + "URL" elsif /\A(ISSN|eISSN) (\d{4}-\d{3}[0-9X]+)\z/.match?(str) - 'ISSN' + "ISSN" end end def parse_attributes(element, options = {}) - content = options[:content] || '__content__' + content = options[:content] || "__content__" if element.is_a?(String) && options[:content].nil? CGI.unescapeHTML(element) elsif element.is_a?(Hash) element.fetch(CGI.unescapeHTML(content), nil) @@ -587,19 +587,19 @@ def normalize_url(id, options = {}) return nil unless id.present? # handle info URIs - return id if id.to_s.start_with?('info') + return id if id.to_s.start_with?("info") # check for valid HTTP uri uri = Addressable::URI.parse(id) return nil unless uri && uri.host && %w[http https ftp].include?(uri.scheme) # optionally turn into https URL - uri.scheme = 'https' if options[:https] + uri.scheme = "https" if options[:https] # clean up URL uri.path = PostRank::URI.clean(uri.path) uri.to_s @@ -615,65 +615,65 @@ def normalize_orcid(orcid) orcid = validate_orcid(orcid) return nil unless orcid.present? # turn ORCID ID into URL - 'https://orcid.org/' + Addressable::URI.encode(orcid) + "https://orcid.org/" + Addressable::URI.encode(orcid) end # pick electronic issn if there are multiple # format issn as xxxx-xxxx def normalize_issn(input, options = {}) - content = options[:content] || '__content__' + content = options[:content] || "__content__" issn = if input.blank? - nil - elsif input.is_a?(String) && options[:content].nil? - input - elsif input.is_a?(Hash) - input.fetch(content, nil) - elsif input.is_a?(Array) - a = input.find { |a| a['media_type'] == 'electronic' } || input.first - a.fetch(content, nil) - end + nil + elsif input.is_a?(String) && options[:content].nil? + input + elsif input.is_a?(Hash) + input.fetch(content, nil) + elsif input.is_a?(Array) + a = input.find { |a| a["media_type"] == "electronic" } || input.first + a.fetch(content, nil) + end case issn.to_s.length when 9 issn when 8 - issn[0..3] + '-' + issn[4..7] + issn[0..3] + "-" + issn[4..7] end end # find Creative Commons or OSI license in licenses array, normalize url and name def normalize_licenses(licenses) standard_licenses = Array.wrap(licenses).map do |l| - URI.parse(l['url']) + URI.parse(l["url"]) end.select { |li| li.host && li.host[/(creativecommons.org|opensource.org)$/] } return licenses unless standard_licenses.present? # use HTTPS - uri.scheme = 'https' + uri.scheme = "https" # use host name without subdomain uri.host = Array(/(creativecommons.org|opensource.org)/.match uri.host).last # normalize URLs - if uri.host == 'creativecommons.org' - uri.path = uri.path.split('/')[0..-2].join('/') if uri.path.split('/').last == 'legalcode' - uri.path << '/' unless uri.path.end_with?('/') + if uri.host == "creativecommons.org" + uri.path = uri.path.split("/")[0..-2].join("/") if uri.path.split("/").last == "legalcode" + uri.path << "/" unless uri.path.end_with?("/") else - uri.path = uri.path.gsub(/(-license|\.php|\.html)/, '') + uri.path = uri.path.gsub(/(-license|\.php|\.html)/, "") uri.path = uri.path.sub(/(mit|afl|apl|osl|gpl|ecl)/) { |match| match.upcase } uri.path = uri.path.sub(/(artistic|apache)/) { |match| match.titleize } uri.path = uri.path.sub(/([^0-9-]+)(-)?([1-9])?(\.)?([0-9])?$/) do m = Regexp.last_match text = m[1] if m[3].present? - version = [m[3], m[5].presence || '0'].join('.') - [text, version].join('-') + version = [m[3], m[5].presence || "0"].join(".") + [text, version].join("-") else text end end end @@ -691,171 +691,171 @@ end options[:first] ? a.unwrap : a.presence end def from_datacite(element) - mapping = { 'nameType' => 'type', 'creatorName' => 'name' } + mapping = { "nameType" => "type", "creatorName" => "name" } map_hash_keys(element: element, mapping: mapping) end def to_schema_org(element) - mapping = { 'type' => '@type', 'id' => '@id', 'title' => 'name' } + mapping = { "type" => "@type", "id" => "@id", "title" => "name" } map_hash_keys(element: element, mapping: mapping) end def to_schema_org_container(element, options = {}) return nil unless element.is_a?(Hash) || (element.nil? && options[:container_title].present?) - issn = element['identifier'] if element['identifierType'] == 'ISSN' - id = issn.blank? ? element['identifier'] : nil - name = options[:container_title] || element['title'] - type = id || name ? options[:type] || element['type'] : nil + issn = element["identifier"] if element["identifierType"] == "ISSN" + id = issn.blank? ? element["identifier"] : nil + name = options[:container_title] || element["title"] + type = id || name ? options[:type] || element["type"] : nil - { '@id' => id, '@type' => type, 'name' => name, 'issn' => issn }.compact + { "@id" => id, "@type" => type, "name" => name, "issn" => issn }.compact end def to_schema_org_identifiers(element, _options = {}) Array.wrap(element).map do |ai| { - '@type' => 'PropertyValue', - 'propertyID' => ai['alternateIdentifierType'], - 'value' => ai['alternateIdentifier'] + "@type" => "PropertyValue", + "propertyID" => ai["alternateIdentifierType"], + "value" => ai["alternateIdentifier"], } end.unwrap end def to_schema_org_relation(related_identifiers: nil, relation_type: nil) return nil unless related_identifiers.present? && relation_type.present? - relation_type = if relation_type == 'References' - %w[References Cites - Documents] - else - [relation_type] - end + relation_type = if relation_type == "References" + %w[References Cites + Documents] + else + [relation_type] + end Array.wrap(related_identifiers).select do |ri| - relation_type.include?(ri['relationType']) + relation_type.include?(ri["relationType"]) end.map do |r| - if r['relatedIdentifierType'] == 'ISSN' && r['relationType'] == 'IsPartOf' - { '@type' => 'Periodical', 'issn' => r['relatedIdentifier'] }.compact + if r["relatedIdentifierType"] == "ISSN" && r["relationType"] == "IsPartOf" + { "@type" => "Periodical", "issn" => r["relatedIdentifier"] }.compact else { - '@id' => normalize_id(r['relatedIdentifier']), - '@type' => DC_TO_SO_TRANSLATIONS[r['resourceTypeGeneral']] || 'CreativeWork' + "@id" => normalize_id(r["relatedIdentifier"]), + "@type" => DC_TO_SO_TRANSLATIONS[r["resourceTypeGeneral"]] || "CreativeWork", }.compact end end.unwrap end def to_schema_org_funder(funding_references) return nil unless funding_references.present? Array.wrap(funding_references).map do |fr| { - '@id' => fr['funderIdentifier'], - '@type' => 'Organization', - 'name' => fr['funderName'] + "@id" => fr["funderIdentifier"], + "@type" => "Organization", + "name" => fr["funderName"], }.compact end.unwrap end def to_schema_org_citation(reference) return nil unless reference.present? { - '@type' => 'CreativeWork', - '@id' => reference['doi'] ? normalize_id(reference['doi']) : nil, - 'name' => reference['title'], - 'datePublished' => reference['publicationYear'] + "@type" => "CreativeWork", + "@id" => reference["doi"] ? normalize_id(reference["doi"]) : nil, + "name" => reference["title"], + "datePublished" => reference["publicationYear"], }.compact end def to_schema_org_spatial_coverage(geo_location) return nil unless geo_location.present? Array.wrap(geo_location).each_with_object([]) do |gl, sum| - if gl.fetch('geoLocationPoint', nil) + if gl.fetch("geoLocationPoint", nil) sum << { - '@type' => 'Place', - 'geo' => { - '@type' => 'GeoCoordinates', - 'address' => gl['geoLocationPlace'], - 'latitude' => gl.dig('geoLocationPoint', 'pointLatitude'), - 'longitude' => gl.dig('geoLocationPoint', 'pointLongitude') - } + "@type" => "Place", + "geo" => { + "@type" => "GeoCoordinates", + "address" => gl["geoLocationPlace"], + "latitude" => gl.dig("geoLocationPoint", "pointLatitude"), + "longitude" => gl.dig("geoLocationPoint", "pointLongitude"), + }, }.compact end - if gl.fetch('geoLocationBox', nil) + if gl.fetch("geoLocationBox", nil) sum << { - '@type' => 'Place', - 'geo' => { - '@type' => 'GeoShape', - 'address' => gl['geoLocationPlace'], - 'box' => [gl.dig('geoLocationBox', 'southBoundLatitude'), - gl.dig('geoLocationBox', 'westBoundLongitude'), - gl.dig('geoLocationBox', 'northBoundLatitude'), - gl.dig('geoLocationBox', 'eastBoundLongitude')].compact.join(' ').presence - }.compact + "@type" => "Place", + "geo" => { + "@type" => "GeoShape", + "address" => gl["geoLocationPlace"], + "box" => [gl.dig("geoLocationBox", "southBoundLatitude"), + gl.dig("geoLocationBox", "westBoundLongitude"), + gl.dig("geoLocationBox", "northBoundLatitude"), + gl.dig("geoLocationBox", "eastBoundLongitude")].compact.join(" ").presence, + }.compact, }.compact end - if gl.fetch('geoLocationPolygon', nil) + if gl.fetch("geoLocationPolygon", nil) sum << { - '@type' => 'Place', - 'geo' => { - '@type' => 'GeoShape', - 'address' => gl['geoLocationPlace'], - 'polygon' => Array.wrap(gl.dig('geoLocationPolygon')).map do |glp| + "@type" => "Place", + "geo" => { + "@type" => "GeoShape", + "address" => gl["geoLocationPlace"], + "polygon" => Array.wrap(gl.dig("geoLocationPolygon")).map do |glp| Array.wrap(glp).map do |glpp| - [glpp.dig('polygonPoint', 'pointLongitude'), - glpp.dig('polygonPoint', 'pointLatitude')].compact + [glpp.dig("polygonPoint", "pointLongitude"), + glpp.dig("polygonPoint", "pointLatitude")].compact end.compact - end.compact.presence - } + end.compact.presence, + }, } end - next unless gl.fetch('geoLocationPlace', - nil) && !gl.fetch('geoLocationPoint', - nil) && !gl.fetch('geoLocationBox', + next unless gl.fetch("geoLocationPlace", + nil) && !gl.fetch("geoLocationPoint", + nil) && !gl.fetch("geoLocationBox", nil) && !gl.fetch( - 'geoLocationPolygon', nil - ) + "geoLocationPolygon", nil + ) sum << { - '@type' => 'Place', - 'geo' => { - '@type' => 'GeoCoordinates', - 'address' => gl['geoLocationPlace'] - } + "@type" => "Place", + "geo" => { + "@type" => "GeoCoordinates", + "address" => gl["geoLocationPlace"], + }, }.compact end.unwrap end def from_schema_org(element) - mapping = { '@type' => 'type', '@id' => 'id' } + mapping = { "@type" => "type", "@id" => "id" } map_hash_keys(element: element, mapping: mapping) end def map_hash_keys(element: nil, mapping: nil) Array.wrap(element).map do |a| a.map { |k, v| [mapping.fetch(k, k), v] }.reduce({}) do |hsh, (k, v)| - if k == 'affiliation' && v.is_a?(Array) + if k == "affiliation" && v.is_a?(Array) hsh[k] = v.map do |affiliation| if affiliation.is_a?(Hash) - affiliation.merge('@type' => 'Organization') + affiliation.merge("@type" => "Organization") else affiliation end end hsh - elsif k == 'type' && v.is_a?(String) + elsif k == "type" && v.is_a?(String) hsh[k] = v.capitalize hsh elsif v.is_a?(Hash) hsh[k] = to_schema_org(v) hsh @@ -867,55 +867,55 @@ end.unwrap end def to_identifier(identifier) { - '@type' => 'PropertyValue', - 'propertyID' => identifier['relatedIdentifierType'], - 'value' => identifier['relatedIdentifier'] + "@type" => "PropertyValue", + "propertyID" => identifier["relatedIdentifierType"], + "value" => identifier["relatedIdentifier"], } end def from_csl(element) Array.wrap(element).map do |a| - if a['literal'].present? - a['type'] = 'Organization' - a['name'] = a['literal'] - elsif a['name'].present? - a['type'] = 'Organization' - elsif a['given'].present? || a['family'].present? - a['type'] = 'Person' + if a["literal"].present? + a["type"] = "Organization" + a["name"] = a["literal"] + elsif a["name"].present? + a["type"] = "Organization" + elsif a["given"].present? || a["family"].present? + a["type"] = "Person" end - a['givenName'] = a['given'] - a['familyName'] = a['family'] - a.except('given', 'family', 'literal').compact + a["givenName"] = a["given"] + a["familyName"] = a["family"] + a.except("given", "family", "literal").compact end.unwrap end def to_csl(element) Array.wrap(element).map do |a| - a['family'] = a['familyName'] - a['given'] = a['givenName'] - a['literal'] = a['name'] unless a['familyName'].present? - a.except('nameType', 'type', '@type', 'id', '@id', 'name', 'familyName', 'givenName', - 'affiliation', 'contributorType').compact + a["family"] = a["familyName"] + a["given"] = a["givenName"] + a["literal"] = a["name"] unless a["familyName"].present? + a.except("nameType", "type", "@type", "id", "@id", "name", "familyName", "givenName", + "affiliation", "contributorType").compact end.presence end def to_ris(element) Array.wrap(element).map do |a| - if a['familyName'].present? - [a['familyName'], a['givenName']].join(', ') + if a["familyName"].present? + [a["familyName"], a["givenName"]].join(", ") else - a['name'] + a["name"] end end.unwrap end def sanitize(text, options = {}) options[:tags] ||= Set.new(%w[strong em b i code pre sub sup br]) - content = options[:content] || '__content__' + content = options[:content] || "__content__" custom_scrubber = Commonmeta::WhitelistScrubber.new(options) if text.is_a?(String) # remove excessive internal whitespace with squish Loofah.scrub_fragment(text, custom_scrubber).to_s.squish @@ -928,12 +928,12 @@ end def github_from_url(url) return {} unless %r{\Ahttps://github\.com/(.+)(?:/)?(.+)?(?:/tree/)?(.*)\z}.match?(url) - words = URI.parse(url).path[1..-1].split('/') - path = words.length > 3 ? words[4...words.length].join('/') : nil + words = URI.parse(url).path[1..-1].split("/") + path = words.length > 3 ? words[4...words.length].join("/") : nil { owner: words[0], repo: words[1], release: words[3], path: path }.compact end def github_repo_from_url(url) @@ -968,40 +968,40 @@ end def github_as_codemeta_url(url) github_hash = github_from_url(url) - if github_hash[:path].to_s.end_with?('codemeta.json') + if github_hash[:path].to_s.end_with?("codemeta.json") "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}" elsif github_hash[:owner].present? "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/master/codemeta.json" end end def github_as_cff_url(url) github_hash = github_from_url(url) - if github_hash[:path].to_s.end_with?('CITATION.cff') + if github_hash[:path].to_s.end_with?("CITATION.cff") "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}" elsif github_hash[:owner].present? "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/main/CITATION.cff" end end def get_date_parts(iso8601_time) - return { 'date-parts' => [[]] } if iso8601_time.nil? + return { "date-parts" => [[]] } if iso8601_time.nil? year = iso8601_time[0..3].to_i month = iso8601_time[5..6].to_i day = iso8601_time[8..9].to_i - { 'date-parts' => [[year, month, day].reject { |part| part == 0 }] } + { "date-parts" => [[year, month, day].reject { |part| part == 0 }] } rescue TypeError nil end def get_date_from_date_parts(date_as_parts) - date_parts = date_as_parts.fetch('date-parts', []).first + date_parts = date_as_parts.fetch("date-parts", []).first return nil if date_parts == [nil] year = date_parts[0] month = date_parts[1] day = date_parts[2] @@ -1009,17 +1009,17 @@ rescue NoMethodError # if date_parts is nil nil end def get_date_from_parts(year, month = nil, day = nil) - [year.to_s.rjust(4, '0'), month.to_s.rjust(2, '0'), day.to_s.rjust(2, '0')].reject do |part| - part == '00' - end.join('-') + [year.to_s.rjust(4, "0"), month.to_s.rjust(2, "0"), day.to_s.rjust(2, "0")].reject do |part| + part == "00" + end.join("-") end def get_date_parts_from_parts(year, month = nil, day = nil) - { 'date-parts' => [[year.to_i, month.to_i, day.to_i].reject { |part| part == 0 }] } + { "date-parts" => [[year.to_i, month.to_i, day.to_i].reject { |part| part == 0 }] } end def get_iso8601_date(iso8601_time) return nil if iso8601_time.nil? || iso8601_time.length < 4 @@ -1062,293 +1062,304 @@ end # strip milliseconds if there is a time, as it interferes with edtc parsing # keep dates unchanged def strip_milliseconds(iso8601_time) - return iso8601_time.split(' ').first if iso8601_time.to_s.include? ' ' + return iso8601_time.split(" ").first if iso8601_time.to_s.include? " " - return iso8601_time.split('.').first + 'Z' if iso8601_time.to_s.include? '.' + return iso8601_time.split(".").first + "Z" if iso8601_time.to_s.include? "." iso8601_time end # iso8601 datetime without hyphens and colons, used by Crossref # return nil if invalid def get_datetime_from_time(time) - DateTime.strptime(time.to_s, '%Y%m%d%H%M%S').strftime('%Y-%m-%dT%H:%M:%SZ') + DateTime.strptime(time.to_s, "%Y%m%d%H%M%S").strftime("%Y-%m-%dT%H:%M:%SZ") rescue ArgumentError nil end def get_date(dates, date_type) - dd = Array.wrap(dates).find { |d| d['dateType'] == date_type } || {} - dd.fetch('date', nil) + dd = Array.wrap(dates).find { |d| d["dateType"] == date_type } || {} + dd.fetch("date", nil) end # convert commonmeta dates to DataCite format def get_dates_from_date(date) return nil if date.nil? - mapping = { 'published' => 'issued' } + mapping = { "published" => "issued" } date = map_hash_keys(element: date, mapping: mapping) date.map do |k, v| - { 'date' => v, - 'dateType' => k.capitalize } + { "date" => v, + "dateType" => k.capitalize } end end def get_contributor(contributor, contributor_type) - contributor.select { |c| c['contributorType'] == contributor_type } + contributor.select { |c| c["contributorType"] == contributor_type } end def get_identifier(identifiers, identifier_type) - id = Array.wrap(identifiers).find { |i| i['identifierType'] == identifier_type } || {} - id.fetch('identifier', nil) + id = Array.wrap(identifiers).find { |i| i["identifierType"] == identifier_type } || {} + id.fetch("identifier", nil) end def get_identifier_type(identifier_type) return nil unless identifier_type.present? identifierTypes = { - 'ark' => 'ARK', - 'arxiv' => 'arXiv', - 'bibcode' => 'bibcode', - 'doi' => 'DOI', - 'ean13' => 'EAN13', - 'eissn' => 'EISSN', - 'handle' => 'Handle', - 'igsn' => 'IGSN', - 'isbn' => 'ISBN', - 'issn' => 'ISSN', - 'istc' => 'ISTC', - 'lissn' => 'LISSN', - 'lsid' => 'LSID', - 'pmid' => 'PMID', - 'purl' => 'PURL', - 'upc' => 'UPC', - 'url' => 'URL', - 'urn' => 'URN', - 'md5' => 'md5', - 'minid' => 'minid', - 'dataguid' => 'dataguid' + "ark" => "ARK", + "arxiv" => "arXiv", + "bibcode" => "bibcode", + "doi" => "DOI", + "ean13" => "EAN13", + "eissn" => "EISSN", + "handle" => "Handle", + "igsn" => "IGSN", + "isbn" => "ISBN", + "issn" => "ISSN", + "istc" => "ISTC", + "lissn" => "LISSN", + "lsid" => "LSID", + "pmid" => "PMID", + "purl" => "PURL", + "upc" => "UPC", + "url" => "URL", + "urn" => "URN", + "md5" => "md5", + "minid" => "minid", + "dataguid" => "dataguid", } identifierTypes[identifier_type.downcase] || identifier_type end def get_series_information(str) return {} unless str.present? - str = str.split(',').map(&:strip) + str = str.split(",").map(&:strip) title = str.first volume_issue = str.length > 2 ? str[1].rpartition(/\(([^)]+)\)/) : nil volume = volume_issue.present? ? volume_issue[0].presence || volume_issue[2].presence : nil issue = volume_issue.present? ? volume_issue[1][1...-1].presence : nil pages = str.length > 1 ? str.last : nil - first_page = pages.present? ? pages.split('-').map(&:strip)[0] : nil - last_page = pages.present? ? pages.split('-').map(&:strip)[1] : nil + first_page = pages.present? ? pages.split("-").map(&:strip)[0] : nil + last_page = pages.present? ? pages.split("-").map(&:strip)[1] : nil { - 'title' => title, - 'volume' => volume, - 'issue' => issue, - 'firstPage' => first_page, - 'lastPage' => last_page + "title" => title, + "volume" => volume, + "issue" => issue, + "firstPage" => first_page, + "lastPage" => last_page, }.compact end def jsonlint(json) - return ['No JSON provided'] unless json.present? + return ["No JSON provided"] unless json.present? error_array = [] linter = JsonLint::Linter.new linter.send(:check_data, json, error_array) error_array end def name_to_spdx(name) - spdx = JSON.load(File.read(File.expand_path('../../resources/spdx/licenses.json', - __dir__))).fetch('licenses') + spdx = JSON.load(File.read(File.expand_path("../../resources/spdx/licenses.json", + __dir__))).fetch("licenses") license = spdx.find do |l| - l['name'] == name || l['licenseId'] == name || l['seeAlso'].first == normalize_cc_url(name) + l["name"] == name || l["licenseId"] == name || l["seeAlso"].first == normalize_cc_url(name) end if license - { 'id' => license['licenseId'], 'url' => license['seeAlso'].first }.compact + { "id" => license["licenseId"], "url" => license["seeAlso"].first }.compact else - { 'rights' => name } + { "rights" => name } end end def hsh_to_spdx(hsh) - spdx = JSON.load(File.read(File.expand_path('../../resources/spdx/licenses.json', - __dir__))).fetch('licenses') - hsh['rightsUri'] = hsh.delete('rightsURI') if hsh['rightsUri'].blank? + spdx = JSON.load(File.read(File.expand_path("../../resources/spdx/licenses.json", + __dir__))).fetch("licenses") + hsh["rightsUri"] = hsh.delete("rightsURI") if hsh["rightsUri"].blank? license = spdx.find do |l| - l['licenseId'].casecmp?(hsh['rightsIdentifier']) || l['seeAlso'].first == normalize_cc_url(hsh['rightsUri']) || l['name'] == hsh['rights'] || l['seeAlso'].first == normalize_cc_url(hsh['rights']) + l["licenseId"].casecmp?(hsh["rightsIdentifier"]) || l["seeAlso"].first == normalize_cc_url(hsh["rightsUri"]) || l["name"] == hsh["rights"] || l["seeAlso"].first == normalize_cc_url(hsh["rights"]) end if license - { 'id' => license['licenseId'], 'url' => license['seeAlso'].first }.compact + { "id" => license["licenseId"], "url" => license["seeAlso"].first }.compact else { - 'id' => hsh['rightsIdentifier'].present? ? hsh['rightsIdentifier'].downcase : nil, - 'url' => hsh['rightsURI'] || hsh['rightsUri'] + "id" => hsh["rightsIdentifier"].present? ? hsh["rightsIdentifier"].downcase : nil, + "url" => hsh["rightsURI"] || hsh["rightsUri"], }.compact end end def spdx_to_hsh(hsh) return nil unless hsh.present? && hsh.is_a?(Hash) - spdx = JSON.load(File.read(File.expand_path('../../resources/spdx/licenses.json', - __dir__))).fetch('licenses') + spdx = JSON.load(File.read(File.expand_path("../../resources/spdx/licenses.json", + __dir__))).fetch("licenses") - license = spdx.find { |l| l['licenseId'].casecmp?(hsh['id']) } + license = spdx.find { |l| l["licenseId"].casecmp?(hsh["id"]) } if license [{ - 'rightsIdentifier' => license['licenseId'].downcase, - 'rightsUri' => license['seeAlso'].first, - 'rights' => license['name'], - 'rightsIdentifierScheme' => 'SPDX', - 'schemeUri' => 'https://spdx.org/licenses/' + "rightsIdentifier" => license["licenseId"].downcase, + "rightsUri" => license["seeAlso"].first, + "rights" => license["name"], + "rightsIdentifierScheme" => "SPDX", + "schemeUri" => "https://spdx.org/licenses/", }.compact] else - [{ 'rightsIdentifier' => hsh['id'], 'rightsURI' => hsh['url'] }.compact] + [{ "rightsIdentifier" => hsh["id"], "rightsURI" => hsh["url"] }.compact] end end def name_to_fos(name) # first find subject in Fields of Science (OECD) - fos = JSON.load(File.read(File.expand_path('../../resources/oecd/fos-mappings.json', - __dir__))).fetch('fosFields') + fos = JSON.load(File.read(File.expand_path("../../resources/oecd/fos-mappings.json", + __dir__))).fetch("fosFields") - subject = fos.find { |l| l['fosLabel'] == name || 'FOS: ' + l['fosLabel'] == name } + subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name } if subject return [{ - 'subject' => sanitize(name).downcase - }, + "subject" => sanitize(name).downcase, + }, { - 'subject' => 'FOS: ' + subject['fosLabel'], - 'subjectScheme' => 'Fields of Science and Technology (FOS)', - 'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf' - }] + "subject" => "FOS: " + subject["fosLabel"], + "subjectScheme" => "Fields of Science and Technology (FOS)", + "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", + }] end # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification) # and map to Fields of Science. Add an extra entry for the latter - fores = JSON.load(File.read(File.expand_path('../../resources/oecd/for-mappings.json', + fores = JSON.load(File.read(File.expand_path("../../resources/oecd/for-mappings.json", __dir__))) - for_fields = fores.fetch('forFields') - for_disciplines = fores.fetch('forDisciplines') + for_fields = fores.fetch("forFields") + for_disciplines = fores.fetch("forDisciplines") - subject = for_fields.find { |l| l['forLabel'] == name } || - for_disciplines.find { |l| l['forLabel'] == name } + subject = for_fields.find { |l| l["forLabel"] == name } || + for_disciplines.find { |l| l["forLabel"] == name } if subject [{ - 'subject' => sanitize(name).downcase + "subject" => sanitize(name).downcase, }, { - 'subject' => 'FOS: ' + subject['fosLabel'], - 'subjectScheme' => 'Fields of Science and Technology (FOS)', - 'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf' - }] + "subject" => "FOS: " + subject["fosLabel"], + "subjectScheme" => "Fields of Science and Technology (FOS)", + "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", + }] else - [{ 'subject' => sanitize(name).downcase }] + [{ "subject" => sanitize(name).downcase }] end end def hsh_to_fos(hsh) # first find subject in Fields of Science (OECD) - fos = JSON.load(File.read(File.expand_path('../../resources/oecd/fos-mappings.json', - __dir__))).fetch('fosFields') + fos = JSON.load(File.read(File.expand_path("../../resources/oecd/fos-mappings.json", + __dir__))).fetch("fosFields") subject = fos.find do |l| - l['fosLabel'] == hsh['__content__'] || 'FOS: ' + l['fosLabel'] == hsh['__content__'] || l['fosLabel'] == hsh['subject'] + l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] || l["fosLabel"] == hsh["subject"] end if subject return [{ - 'subject' => sanitize(hsh['__content__'] || hsh['subject']), - 'subjectScheme' => hsh['subjectScheme'], - 'schemeUri' => hsh['schemeURI'] || hsh['schemeUri'], - 'valueUri' => hsh['valueURI'] || hsh['valueUri'], - 'classificationCode' => hsh['classificationCode'], - 'lang' => hsh['lang'] - }.compact, + "subject" => sanitize(hsh["__content__"] || hsh["subject"]), + "subjectScheme" => hsh["subjectScheme"], + "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"], + "valueUri" => hsh["valueURI"] || hsh["valueUri"], + "classificationCode" => hsh["classificationCode"], + "lang" => hsh["lang"], + }.compact, { - 'subject' => 'FOS: ' + subject['fosLabel'], - 'subjectScheme' => 'Fields of Science and Technology (FOS)', - 'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf' - }.compact] + "subject" => "FOS: " + subject["fosLabel"], + "subjectScheme" => "Fields of Science and Technology (FOS)", + "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", + }.compact] end # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification) # and map to Fields of Science. Add an extra entry for the latter - fores = JSON.load(File.read(File.expand_path('../../resources/oecd/for-mappings.json', + fores = JSON.load(File.read(File.expand_path("../../resources/oecd/for-mappings.json", __dir__))) - for_fields = fores.fetch('forFields') - for_disciplines = fores.fetch('forDisciplines') + for_fields = fores.fetch("forFields") + for_disciplines = fores.fetch("forDisciplines") # try to extract forId - if hsh['subjectScheme'] == 'FOR' - for_id = hsh['__content__'].to_s.split(' ').first || hsh['subject'].to_s.split(' ').first - for_id = for_id.rjust(6, '0') + if hsh["subjectScheme"] == "FOR" + for_id = hsh["__content__"].to_s.split(" ").first || hsh["subject"].to_s.split(" ").first + for_id = for_id.rjust(6, "0") - subject = for_fields.find { |l| l['forId'] == for_id } || - for_disciplines.find { |l| l['forId'] == for_id[0..3] } + subject = for_fields.find { |l| l["forId"] == for_id } || + for_disciplines.find { |l| l["forId"] == for_id[0..3] } else subject = for_fields.find do |l| - l['forLabel'] == hsh['__content__'] || l['forLabel'] == hsh['subject'] + l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"] end || for_disciplines.find do |l| - l['forLabel'] == hsh['__content__'] || l['forLabel'] == hsh['subject'] + l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"] end end if subject [{ - 'subject' => sanitize(hsh['__content__'] || hsh['subject']), - 'subjectScheme' => hsh['subjectScheme'], - 'classificationCode' => hsh['classificationCode'], - 'schemeUri' => hsh['schemeURI'] || hsh['schemeUri'], - 'valueUri' => hsh['valueURI'] || hsh['valueUri'], - 'lang' => hsh['lang'] + "subject" => sanitize(hsh["__content__"] || hsh["subject"]), + "subjectScheme" => hsh["subjectScheme"], + "classificationCode" => hsh["classificationCode"], + "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"], + "valueUri" => hsh["valueURI"] || hsh["valueUri"], + "lang" => hsh["lang"], }.compact, { - 'subject' => 'FOS: ' + subject['fosLabel'], - 'subjectScheme' => 'Fields of Science and Technology (FOS)', - 'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf' - }] + "subject" => "FOS: " + subject["fosLabel"], + "subjectScheme" => "Fields of Science and Technology (FOS)", + "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", + }] else [{ - 'subject' => sanitize(hsh['__content__'] || hsh['subject']), - 'subjectScheme' => hsh['subjectScheme'], - 'classificationCode' => hsh['classificationCode'], - 'schemeUri' => hsh['schemeURI'] || hsh['schemeUri'], - 'valueUri' => hsh['valueURI'] || hsh['valueUri'], - 'lang' => hsh['lang'] + "subject" => sanitize(hsh["__content__"] || hsh["subject"]), + "subjectScheme" => hsh["subjectScheme"], + "classificationCode" => hsh["classificationCode"], + "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"], + "valueUri" => hsh["valueURI"] || hsh["valueUri"], + "lang" => hsh["lang"], }.compact] end end def encode_doi(prefix) # DOI suffix is a generated from a random number, encoded in base32 - # suffix has 8 digits plus two checksum digits. With base32 there are + # suffix has 8 digits plus two checksum digits. With base32 there are # 32 possible digits, so 8 digits gives 32^8 possible combinations - random_int = SecureRandom.random_number(32**7..(32**8) - 1) + random_int = SecureRandom.random_number(32 ** 7..(32 ** 8) - 1) suffix = Base32::URL.encode(random_int, checksum: true) str = "#{suffix[0, 5]}-#{suffix[5, 10]}" "https://doi.org/#{prefix}/#{str}" end def decode_doi(doi) - suffix = doi.split('/', 5).last + suffix = doi.split("/", 5).last Base32::URL.decode(suffix) + end + + def encode_id + # suffix has 5 digits plus two checksum digits. With base32 there are + # 32 possible digits, so 5 digits gives 32^5 possible combinations + random_int = SecureRandom.random_number(32 ** 4..(32 ** 5) - 1) + Base32::URL.encode(random_int, checksum: true) + end + + def decode_id(id) + Base32::URL.decode(id) end end end