exe/stepmod-extract-terms in stepmod-utils-0.3.2 vs exe/stepmod-extract-terms in stepmod-utils-0.3.4

- old
+ new

@@ -15,43 +15,21 @@ class Gem::Specification def this; self; end end require 'bundler/setup' -require 'stepmod/utils/stepmod_definition_converter' -require 'stepmod/utils/bibdata' -require 'stepmod/utils/concept' -require 'ptools' +require 'stepmod/utils/terms_extractor' -ReverseAdoc.config.unknown_tags = :bypass - -# TODO: we may want a command line option to override this in the future -ACCEPTED_STAGES = %w(IS DIS FDIS TS) - -general_concepts = [] -resource_concepts = [] -module_concepts = [] -parsed_bibliography = [] -encountered_terms = {} - stepmod_dir = ARGV.first || Dir.pwd -def log message - puts "[stepmod-utils] #{message}" -end +general_concepts, +resource_concepts, +parsed_bibliography, +part_concepts, +part_resources, +part_modules = Stepmod::Utils::TermsExtractor.call(stepmod_dir) -def term_special_category(bibdata) - case bibdata.part.to_i - when 41,42,43,44,45,46,47,51 - true - when [56..112] - true - else - false - end -end - def part_to_title(bibdata) case bibdata.part.to_i when 41 "Part 41" when 42 @@ -71,229 +49,71 @@ else bibdata.title_en end end -stepmod_path = Pathname.new(stepmod_dir).realpath +def log message + puts "[stepmod-utils] #{message}" +end -# If we are using the stepmod CVS repository, provide the revision number per file -has_cvs = File.which("cvs") -cvs_mode = has_cvs && Dir.exists?(stepmod_path.join('CVS')) +part_concepts.each do |(bibdata, current_part_concepts)| + fn = "03x-stepmod-#{bibdata.part}.adoc" + File.open(fn, 'w') { |file| + file.puts("== #{part_to_title(bibdata)}\n\n") + file.puts(current_part_concepts.map(&:to_mn_adoc).join("\n")) + } + log "INFO: written to: #{fn}" +end -log "INFO: STEPmod directory set to #{stepmod_dir}." -if cvs_mode - log "INFO: STEPmod directory is a CVS repository and will detect revisions." - log "INFO: [CVS] Detecting file revisions can be slow, please be patient!" -else - log "INFO: STEPmod directory is not a CVS repository, skipping revision detection." +part_resources.each do |(bibdata, current_part_resources)| + fn = "04x-stepmod-entities-resources-#{bibdata.part}.adoc" + File.open(fn, 'w') { |file| + file.puts("== #{part_to_title(bibdata)}\n\n") + file.puts(current_part_resources.map(&:to_mn_adoc).join("\n")) + } + log "INFO: written to: #{fn}" end -log "INFO: Detecting paths..." -files = %w( - resource.xml - application_protocol.xml - business_object_model.xml - module.xml - ).inject([]) do |acc, t| +part_modules.each do |(bibdata, part_modules_arm, part_modules_mim)| + fn = "05x-stepmod-entities-modules-#{bibdata.part}.adoc" + File.open(fn, 'w') { |file| + file.puts("") + unless part_modules_arm.empty? + schema_name = part_modules_arm.first.first + concepts = part_modules_arm.first.last - candidate_paths = Dir["#{stepmod_dir}/**/#{t}"] - acc << candidate_paths - -end.flatten.sort.uniq - -max_encountered_refs_indexes = {} - -files.each do |file_path| - file_path = Pathname.new(file_path).realpath - fpath = file_path.relative_path_from(stepmod_path) - - log "INFO: Processing XML file #{fpath}" - current_document = Nokogiri::XML(File.read(file_path)).root - - bibdata = nil - begin - bibdata = Stepmod::Utils::Bibdata.new(document: current_document) - rescue - log "WARNING: Unknown file #{fpath}, skipped" - next - end - - unless ACCEPTED_STAGES.include? bibdata.doctype - log "INFO: skipped #{bibdata.docid} as it is not one of (#{ACCEPTED_STAGES.join(", ")})." - next - end - - if bibdata.part.to_s.empty? - log "FATAL: missing `part` attribute: #{fpath}" - log "INFO: skipped #{bibdata.docid} as it is missing `part` attribute." - next - end - - revision_string = "\n// CVS: revision not detected" - if cvs_mode - # Run `cvs status` to find out version - - log "INFO: Detecting CVS revision..." - Dir.chdir(stepmod_path) do - status = `cvs status #{fpath}` - - unless status.empty? - working_rev = status.split(/\n/).grep(/Working revision:/).first.match(/revision:\s+(.+)$/)[1] - repo_rev = status.split(/\n/).grep(/Repository revision:/).first.match(/revision:\t(.+)\t/)[1] - log "INFO: CVS working rev (#{working_rev}), repo rev (#{repo_rev})" - revision_string = "\n// CVS working rev: (#{working_rev}), repo rev (#{repo_rev})\n" + - "// CVS: revision #{working_rev == repo_rev ? 'up to date' : 'differs'}" - end + # puts "SCHEMA NAME ARM: #{schema_name}" + file.puts("== #{schema_name}\n\n") + file.puts(concepts.map(&:to_mn_adoc).join("\n")) end - end - # read definitions - part_concepts = [] - current_document.xpath('//definition').each do |definition| - index = max_encountered_refs_indexes[bibdata.anchor] || 1 - term_id = definition['id'] - unless term_id.nil? - if encountered_terms[term_id] - log "FATAL: Duplicated term with id: #{term_id}, #{fpath}" - end - encountered_terms[term_id] = true - end + file.puts("") - # Assume that definition is located in clause 3 of the ISO document - # in order. We really don't have a good reference here. - ref_clause = "3.#{index}" + unless part_modules_mim.empty? + schema_name = part_modules_mim.first.first - concept = Stepmod::Utils::Concept.parse( - definition, - reference_anchor: bibdata.anchor, - reference_clause: ref_clause, - file_path: fpath + revision_string - ) - next unless concept - - unless term_special_category(bibdata) - # log "INFO: this part is generic" - general_concepts << concept - else - # log "INFO: this part is special" - part_concepts << concept + # puts "SCHEMA NAME MIM: #{schema_name}" + concepts = part_modules_mim.first.last + file.puts("== #{schema_name}\n\n") + file.puts(concepts.map(&:to_mn_adoc).join("\n")) end - max_encountered_refs_indexes[bibdata.anchor] = index + 1 - parsed_bibliography << bibdata - end - - part_modules = [] - current_document.xpath('//arm/uof').each do |uof_node| - concept = Stepmod::Utils::Concept.parse( - uof_node, - reference_anchor: bibdata.anchor, - reference_clause: nil, - file_path: fpath + revision_string - ) - # puts concept.inspect - - next unless concept - - unless term_special_category(bibdata) - # log "INFO: this part is generic" - module_concepts << concept - else - # log "INFO: this part is special" - part_modules << concept - end - - parsed_bibliography << bibdata - end - - part_resources = [] - # Assumption: every schema is only linked by a single resource_docs document. - current_document.xpath('//schema').each do |schema_node| - schema_name = schema_node['name'] - - Dir["#{stepmod_path}/*/#{schema_name}/descriptions.xml"].each do |description_xml_path| - log "INFO: Processing resources schema #{description_xml_path}" - description_document = Nokogiri::XML(File.read(description_xml_path)).root - description_document.xpath('//ext_description').each do |ext_description| - - concept = Stepmod::Utils::Concept.parse( - ext_description, - reference_anchor: bibdata.anchor, - reference_clause: nil, - file_path: fpath + revision_string - ) - next unless concept - - unless term_special_category(bibdata) - # log "INFO: this part is generic" - resource_concepts << concept - else - # log "INFO: this part is special" - part_resources << concept - end - - parsed_bibliography << bibdata - end - end - end - - log "INFO: Completed processing XML file #{fpath}" - - if part_concepts.empty? - log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it contains no concepts." - next - elsif part_concepts.length < 3 - log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it only has #{part_concepts.length} terms." - - part_concepts.each do |x| - general_concepts << x - end - else - fn = "03x-stepmod-#{bibdata.part}.adoc" - File.open(fn, 'w') { |file| - file.puts("== #{part_to_title(bibdata)}\n\n") - file.puts(part_concepts.map(&:to_mn_adoc).join("\n")) - } - log "INFO: written to: #{fn}" - end - - unless part_resources.empty? - fn = "04x-stepmod-entities-resources-#{bibdata.part}.adoc" - File.open(fn, 'w') { |file| - file.puts("== #{part_to_title(bibdata)}\n\n") - file.puts(part_resources.map(&:to_mn_adoc).join("\n")) - } - log "INFO: written to: #{fn}" - end - - unless part_modules.empty? - fn = "04x-stepmod-entities-modules-#{bibdata.part}.adoc" - File.open(fn, 'w') { |file| - file.puts("== #{part_to_title(bibdata)}\n\n") - file.puts(part_modules.map(&:to_mn_adoc).join("\n")) - } - log "INFO: written to: #{fn}" - end - + } + log "INFO: written to: #{fn}" end File.open('031-stepmod-general.adoc', 'w') { |file| file.puts(general_concepts.map(&:to_mn_adoc).join("\n")) } +log "INFO: written to: 031-stepmod-general.adoc" File.open('041-stepmod-entities-resources.adoc', 'w') { |file| file.puts(resource_concepts.map(&:to_mn_adoc).join("\n")) } +log "INFO: written to: 041-stepmod-entities-resources.adoc" -File.open('051-stepmod-entities-modules.adoc', 'w') { |file| - file.puts(module_concepts.map(&:to_mn_adoc).join("\n")) -} - -log "INFO: written to: 031-stepmod-general.adoc" - File.open('991-generated-bibliography.adoc', 'w') { |file| file.puts(parsed_bibliography.map(&:to_mn_adoc).sort.uniq.join("\n")) } - log "INFO: written to: 991-generated-bibliography.adoc"