exe/stepmod-extract-terms in stepmod-utils-0.3.2 vs exe/stepmod-extract-terms in stepmod-utils-0.3.4
- old
+ new
@@ -15,43 +15,21 @@
class Gem::Specification
def this; self; end
end
require 'bundler/setup'
-require 'stepmod/utils/stepmod_definition_converter'
-require 'stepmod/utils/bibdata'
-require 'stepmod/utils/concept'
-require 'ptools'
+require 'stepmod/utils/terms_extractor'
-ReverseAdoc.config.unknown_tags = :bypass
-
-# TODO: we may want a command line option to override this in the future
-ACCEPTED_STAGES = %w(IS DIS FDIS TS)
-
-general_concepts = []
-resource_concepts = []
-module_concepts = []
-parsed_bibliography = []
-encountered_terms = {}
-
stepmod_dir = ARGV.first || Dir.pwd
-def log message
- puts "[stepmod-utils] #{message}"
-end
+general_concepts,
+resource_concepts,
+parsed_bibliography,
+part_concepts,
+part_resources,
+part_modules = Stepmod::Utils::TermsExtractor.call(stepmod_dir)
-def term_special_category(bibdata)
- case bibdata.part.to_i
- when 41,42,43,44,45,46,47,51
- true
- when [56..112]
- true
- else
- false
- end
-end
-
def part_to_title(bibdata)
case bibdata.part.to_i
when 41
"Part 41"
when 42
@@ -71,229 +49,71 @@
else
bibdata.title_en
end
end
-stepmod_path = Pathname.new(stepmod_dir).realpath
+def log message
+ puts "[stepmod-utils] #{message}"
+end
-# If we are using the stepmod CVS repository, provide the revision number per file
-has_cvs = File.which("cvs")
-cvs_mode = has_cvs && Dir.exists?(stepmod_path.join('CVS'))
+part_concepts.each do |(bibdata, current_part_concepts)|
+ fn = "03x-stepmod-#{bibdata.part}.adoc"
+ File.open(fn, 'w') { |file|
+ file.puts("== #{part_to_title(bibdata)}\n\n")
+ file.puts(current_part_concepts.map(&:to_mn_adoc).join("\n"))
+ }
+ log "INFO: written to: #{fn}"
+end
-log "INFO: STEPmod directory set to #{stepmod_dir}."
-if cvs_mode
- log "INFO: STEPmod directory is a CVS repository and will detect revisions."
- log "INFO: [CVS] Detecting file revisions can be slow, please be patient!"
-else
- log "INFO: STEPmod directory is not a CVS repository, skipping revision detection."
+part_resources.each do |(bibdata, current_part_resources)|
+ fn = "04x-stepmod-entities-resources-#{bibdata.part}.adoc"
+ File.open(fn, 'w') { |file|
+ file.puts("== #{part_to_title(bibdata)}\n\n")
+ file.puts(current_part_resources.map(&:to_mn_adoc).join("\n"))
+ }
+ log "INFO: written to: #{fn}"
end
-log "INFO: Detecting paths..."
-files = %w(
- resource.xml
- application_protocol.xml
- business_object_model.xml
- module.xml
- ).inject([]) do |acc, t|
+part_modules.each do |(bibdata, part_modules_arm, part_modules_mim)|
+ fn = "05x-stepmod-entities-modules-#{bibdata.part}.adoc"
+ File.open(fn, 'w') { |file|
+ file.puts("")
+ unless part_modules_arm.empty?
+ schema_name = part_modules_arm.first.first
+ concepts = part_modules_arm.first.last
- candidate_paths = Dir["#{stepmod_dir}/**/#{t}"]
- acc << candidate_paths
-
-end.flatten.sort.uniq
-
-max_encountered_refs_indexes = {}
-
-files.each do |file_path|
- file_path = Pathname.new(file_path).realpath
- fpath = file_path.relative_path_from(stepmod_path)
-
- log "INFO: Processing XML file #{fpath}"
- current_document = Nokogiri::XML(File.read(file_path)).root
-
- bibdata = nil
- begin
- bibdata = Stepmod::Utils::Bibdata.new(document: current_document)
- rescue
- log "WARNING: Unknown file #{fpath}, skipped"
- next
- end
-
- unless ACCEPTED_STAGES.include? bibdata.doctype
- log "INFO: skipped #{bibdata.docid} as it is not one of (#{ACCEPTED_STAGES.join(", ")})."
- next
- end
-
- if bibdata.part.to_s.empty?
- log "FATAL: missing `part` attribute: #{fpath}"
- log "INFO: skipped #{bibdata.docid} as it is missing `part` attribute."
- next
- end
-
- revision_string = "\n// CVS: revision not detected"
- if cvs_mode
- # Run `cvs status` to find out version
-
- log "INFO: Detecting CVS revision..."
- Dir.chdir(stepmod_path) do
- status = `cvs status #{fpath}`
-
- unless status.empty?
- working_rev = status.split(/\n/).grep(/Working revision:/).first.match(/revision:\s+(.+)$/)[1]
- repo_rev = status.split(/\n/).grep(/Repository revision:/).first.match(/revision:\t(.+)\t/)[1]
- log "INFO: CVS working rev (#{working_rev}), repo rev (#{repo_rev})"
- revision_string = "\n// CVS working rev: (#{working_rev}), repo rev (#{repo_rev})\n" +
- "// CVS: revision #{working_rev == repo_rev ? 'up to date' : 'differs'}"
- end
+ # puts "SCHEMA NAME ARM: #{schema_name}"
+ file.puts("== #{schema_name}\n\n")
+ file.puts(concepts.map(&:to_mn_adoc).join("\n"))
end
- end
- # read definitions
- part_concepts = []
- current_document.xpath('//definition').each do |definition|
- index = max_encountered_refs_indexes[bibdata.anchor] || 1
- term_id = definition['id']
- unless term_id.nil?
- if encountered_terms[term_id]
- log "FATAL: Duplicated term with id: #{term_id}, #{fpath}"
- end
- encountered_terms[term_id] = true
- end
+ file.puts("")
- # Assume that definition is located in clause 3 of the ISO document
- # in order. We really don't have a good reference here.
- ref_clause = "3.#{index}"
+ unless part_modules_mim.empty?
+ schema_name = part_modules_mim.first.first
- concept = Stepmod::Utils::Concept.parse(
- definition,
- reference_anchor: bibdata.anchor,
- reference_clause: ref_clause,
- file_path: fpath + revision_string
- )
- next unless concept
-
- unless term_special_category(bibdata)
- # log "INFO: this part is generic"
- general_concepts << concept
- else
- # log "INFO: this part is special"
- part_concepts << concept
+ # puts "SCHEMA NAME MIM: #{schema_name}"
+ concepts = part_modules_mim.first.last
+ file.puts("== #{schema_name}\n\n")
+ file.puts(concepts.map(&:to_mn_adoc).join("\n"))
end
- max_encountered_refs_indexes[bibdata.anchor] = index + 1
- parsed_bibliography << bibdata
- end
-
- part_modules = []
- current_document.xpath('//arm/uof').each do |uof_node|
- concept = Stepmod::Utils::Concept.parse(
- uof_node,
- reference_anchor: bibdata.anchor,
- reference_clause: nil,
- file_path: fpath + revision_string
- )
- # puts concept.inspect
-
- next unless concept
-
- unless term_special_category(bibdata)
- # log "INFO: this part is generic"
- module_concepts << concept
- else
- # log "INFO: this part is special"
- part_modules << concept
- end
-
- parsed_bibliography << bibdata
- end
-
- part_resources = []
- # Assumption: every schema is only linked by a single resource_docs document.
- current_document.xpath('//schema').each do |schema_node|
- schema_name = schema_node['name']
-
- Dir["#{stepmod_path}/*/#{schema_name}/descriptions.xml"].each do |description_xml_path|
- log "INFO: Processing resources schema #{description_xml_path}"
- description_document = Nokogiri::XML(File.read(description_xml_path)).root
- description_document.xpath('//ext_description').each do |ext_description|
-
- concept = Stepmod::Utils::Concept.parse(
- ext_description,
- reference_anchor: bibdata.anchor,
- reference_clause: nil,
- file_path: fpath + revision_string
- )
- next unless concept
-
- unless term_special_category(bibdata)
- # log "INFO: this part is generic"
- resource_concepts << concept
- else
- # log "INFO: this part is special"
- part_resources << concept
- end
-
- parsed_bibliography << bibdata
- end
- end
- end
-
- log "INFO: Completed processing XML file #{fpath}"
-
- if part_concepts.empty?
- log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it contains no concepts."
- next
- elsif part_concepts.length < 3
- log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it only has #{part_concepts.length} terms."
-
- part_concepts.each do |x|
- general_concepts << x
- end
- else
- fn = "03x-stepmod-#{bibdata.part}.adoc"
- File.open(fn, 'w') { |file|
- file.puts("== #{part_to_title(bibdata)}\n\n")
- file.puts(part_concepts.map(&:to_mn_adoc).join("\n"))
- }
- log "INFO: written to: #{fn}"
- end
-
- unless part_resources.empty?
- fn = "04x-stepmod-entities-resources-#{bibdata.part}.adoc"
- File.open(fn, 'w') { |file|
- file.puts("== #{part_to_title(bibdata)}\n\n")
- file.puts(part_resources.map(&:to_mn_adoc).join("\n"))
- }
- log "INFO: written to: #{fn}"
- end
-
- unless part_modules.empty?
- fn = "04x-stepmod-entities-modules-#{bibdata.part}.adoc"
- File.open(fn, 'w') { |file|
- file.puts("== #{part_to_title(bibdata)}\n\n")
- file.puts(part_modules.map(&:to_mn_adoc).join("\n"))
- }
- log "INFO: written to: #{fn}"
- end
-
+ }
+ log "INFO: written to: #{fn}"
end
File.open('031-stepmod-general.adoc', 'w') { |file|
file.puts(general_concepts.map(&:to_mn_adoc).join("\n"))
}
+log "INFO: written to: 031-stepmod-general.adoc"
File.open('041-stepmod-entities-resources.adoc', 'w') { |file|
file.puts(resource_concepts.map(&:to_mn_adoc).join("\n"))
}
+log "INFO: written to: 041-stepmod-entities-resources.adoc"
-File.open('051-stepmod-entities-modules.adoc', 'w') { |file|
- file.puts(module_concepts.map(&:to_mn_adoc).join("\n"))
-}
-
-log "INFO: written to: 031-stepmod-general.adoc"
-
File.open('991-generated-bibliography.adoc', 'w') { |file|
file.puts(parsed_bibliography.map(&:to_mn_adoc).sort.uniq.join("\n"))
}
-
log "INFO: written to: 991-generated-bibliography.adoc"