require "json" require "stepmod/utils/smrl_description_converter" require "stepmod/utils/smrl_resource_converter" require "stepmod/utils/converters/express_note" require "stepmod/utils/converters/express_example" require "stepmod/utils/converters/express_figure" require "stepmod/utils/converters/express_table" require "expressir" require "expressir/express/parser" require "pubid-iso" module Stepmod module Utils class StepmodFileAnnotator attr_reader :express_file, :resource_docs_cache, :stepmod_dir, :schema_name # @param express_file [String] path to the exp file needed to annotate # @param resource_docs_cache [String] output of ./stepmod-build-resource-docs-cache def initialize(express_file:, stepmod_dir: nil) @express_file = express_file @resource_docs_cache = resource_docs_schemas(stepmod_dir) @stepmod_dir = stepmod_dir || Dir.pwd @added_bibdata = {} @images_references = {} @schema_name = Expressir::Express::Parser.from_file(express_file) .schemas .first .id @schema_name = normalize_schema_name(@schema_name) self end # Needed to fix scheme casing issues, e.g. xxx_LF => xxx_lf def normalize_schema_name(name) case name.downcase # module schemas have first letter capitalized, rest in lowercase when /_arm_lf\Z/i, /_mim_lf\Z/i, /_arm\Z/i, /_mim\Z/i name.downcase.capitalize # resource schemas are all in lowercase else name.downcase end end SCHEMA_VERSION_MATCH_REGEX = /^SCHEMA [0-9a-zA-Z_]+;\s*$/ def is_missing_version(schema_content) m = schema_content.match(SCHEMA_VERSION_MATCH_REGEX) if m.nil? false elsif m[0] # match true else false end end def build_schema_string_with_version # Geometric_tolerance_arm => geometric-tolarance-arm name_in_asn1 = @schema_name.downcase.gsub("_", "-") schema_type, type_number = case @schema_name.downcase when /_arm\Z/i [:module, 1] when /_mim\Z/i [:module, 2] when /_arm_lf\Z/i [:module, 3] when /_mim_lf\Z/i [:module, @module_has_arm_lf ? 4 : 3] else # any resource schema without version strings puts "[annotator-WARNING] this resource schema is missing a version string: #{@schema_name}" [:resource, 1] end # TODO there are schemas with only arm, arm_lf: # schemas/modules/reference_schema_for_sysml_mapping/arm_lf.exp # TODO there are schemas with only arm, mim, mim_lf: # schemas/modules/limited_length_or_area_indicator_assignment/mim_lf.exp part = @identifier.part edition = @identifier.edition schema_or_object = (schema_type == :module) ? "schema" : "object" "SCHEMA #{@schema_name} '{ " \ "iso standard 10303 part(#{part}) " \ "version(#{edition}) " \ "#{schema_or_object}(1) " \ "#{name_in_asn1}(#{type_number}) " \ "}';\n" end def resource_docs_schemas(stepmod_dir) filepath = File.join(stepmod_dir, "data", "resource_docs", "*", "resource.xml") schemas = {} Dir.glob(filepath).each do |resource_docs_file| match = resource_docs_file.match("data[/\]resource_docs[/\]([^/\]+)[/\]resource.xml") resource_docs_dir = match.captures[0] resource_docs = Nokogiri::XML(File.read(resource_docs_file)).root resource_docs.xpath("schema").each do |schema| schemas[schema["name"]] = resource_docs_dir end end schemas end def call match = File.basename(express_file).match('^(arm|mim|bom|arm_lf|mim_lf|DomainModel)\.exp$') descriptions_base = match ? "#{match.captures[0]}_descriptions.xml" : "descriptions.xml" descriptions_file = File.join(File.dirname(express_file), descriptions_base) output_express = File.read(express_file) converted_description = "" base_linked = "" processed_images_cache = {} if File.exist?(descriptions_file) descriptions = Nokogiri::XML(File.read(descriptions_file)).root added_resource_descriptions = {} descriptions.xpath("ext_description").each do |description| # Add base resource from linked path if exists, eg "language_schema.language.wr:WR1" -> "language_schema" base_linked = description["linkend"].to_s.split(".").first if added_resource_descriptions[base_linked].nil? base_reource_doc_dir = resource_docs_cache[description["linkend"].to_s.split(".").first] if base_reource_doc_dir converted_description << convert_from_resource_file( base_reource_doc_dir, stepmod_dir, base_linked, descriptions_file ) end added_resource_descriptions[base_linked] = true end resource_docs_dir = resource_docs_cache[description["linkend"]] # Convert content description # when a schema description is available from resource.xml and also descriptions.xml, the description from resource.xml is only used. # https://github.com/metanorma/annotated-express/issues/32#issuecomment-792609078 if description.text.strip.length.positive? && resource_docs_dir.nil? converted_description << convert_from_description_text( descriptions_file, description ) end schema_base_dir = resource_docs_cache[base_linked] add_images_references(converted_description, schema_base_dir, processed_images_cache) # Add converted description from exact linked path if resource_docs_dir && added_resource_descriptions[description["linkend"]].nil? output_express << convert_from_resource_file(resource_docs_dir, stepmod_dir, description["linkend"], descriptions_file) added_resource_descriptions[description["linkend"]] = true end end end bib_file_name = extract_bib_file_name(match, resource_docs_cache[@schema_name || ""]) bib_file = if match File.join(File.dirname(express_file), bib_file_name) else resource_docs_file_path(stepmod_dir, bib_file_name) end unless bib_file && File.exist?(bib_file) raise StandardError.new( "bib_file for #{schema_name} does not exist: #{bib_file}" ) end output_express << prepend_bibdata( converted_description || "", # bib_file will not be present for resouces # that are not in resource_docs cache. # e.g hierarchy_schema bib_file, @schema_name, match, ) if is_missing_version(output_express) puts "[annotator-WARNING] schema (#{@schema_name}) missing version string. "\ "Adding: `#{build_schema_string_with_version}` to schema." output_express.gsub!( SCHEMA_VERSION_MATCH_REGEX, build_schema_string_with_version ) end { annotated_text: sanitize(output_express), images_references: @images_references, } rescue StandardError => e puts "[ERROR]!!! #{e.message}" puts e.backtrace end private def sanitize(file_content) file_content .gsub("(*)", "(`*`)") .gsub(";;", ";") end def add_images_references(description, schema_base_dir, processed_images_cache) referenced_images = description.scan(/image::(.*?)\[\]/).flatten referenced_images.each do |referenced_image| next unless schema_base_dir image_file_path = File.join("resource_docs", schema_base_dir, referenced_image) new_image_file_path = referenced_image if processed_images_cache[new_image_file_path] processed_images_cache[new_image_file_path] = true next end processed_images_cache[new_image_file_path] = true @images_references[image_file_path] = new_image_file_path end @images_references end def convert_from_description_text(descriptions_file, description) Dir.chdir(File.dirname(descriptions_file)) do wrapper = "#{description}" notes = description.xpath("note") examples = description.xpath("example") figures = description.xpath("figure") tables = description.xpath("table") converted_description = <<~DESCRIPTION #{Stepmod::Utils::SmrlDescriptionConverter.convert( wrapper, no_notes_examples: true, descriptions_file: descriptions_file )} DESCRIPTION if description["linkend"].nil? raise StandardError.new("[stepmod-file-annotator] ERROR: no linkend for #{descriptions_file}!") end converted_figures = figures.map do |figure| Stepmod::Utils::Converters::ExpressFigure .new .convert(figure, schema_and_entity: description["linkend"], descriptions_file: descriptions_file) end.join converted_tables = tables.map do |table| Stepmod::Utils::Converters::ExpressTable .new .convert(table, schema_and_entity: description["linkend"], descriptions_file: descriptions_file) end.join converted_notes = notes.map do |note| Stepmod::Utils::Converters::ExpressNote .new .convert(note, schema_and_entity: description["linkend"], descriptions_file: descriptions_file) end.join converted_examples = examples.map do |example| Stepmod::Utils::Converters::ExpressExample .new .convert(example, schema_and_entity: description["linkend"], descriptions_file: descriptions_file) end.join [ converted_description, converted_figures, converted_tables, converted_examples, converted_notes, ].join("") end end def prepend_bibdata(description, bibdata_file, schema_and_entity, match) bib = Nokogiri::XML(File.read(bibdata_file)).root bibdata = extract_bib_data(match, bib, schema_and_entity) # for schema version string generation @identifier = bibdata[:identifier] return description.to_s if @added_bibdata[schema_and_entity] published_in = <<~PUBLISHED_IN (*"#{schema_and_entity}.__published_in" #{bibdata[:identifier].to_s(with_edition: true)} *) PUBLISHED_IN identifier = <<~IDENTIFIER if bibdata[:number] (*"#{schema_and_entity}.__identifier" ISO/TC 184/SC 4/WG 12 N#{bibdata[:number]} *) IDENTIFIER supersedes = <<~SUPERSEDES if bibdata[:supersedes_concept] (*"#{schema_and_entity}.__supersedes" ISO/TC 184/SC 4/WG 12 N#{bibdata[:supersedes_concept]} *) SUPERSEDES status = <<~STATUS if bibdata[:status] (*"#{schema_and_entity}.__status" #{bibdata[:status]} *) STATUS title = <<~TITLE if bibdata[:title] (*"#{schema_and_entity}.__title" #{bibdata[:title]} *) TITLE document = <<~DOCUMENT if bibdata_file (*"#{schema_and_entity}.__schema_file" #{Pathname(bibdata_file).relative_path_from(@stepmod_dir)} *) DOCUMENT @added_bibdata[schema_and_entity] = true [ published_in, identifier, supersedes, status, title, description, document, ].compact.join("\n") end def module?(match) match && %w[arm mim arm_lf mim_lf].include?(match.captures[0]) end def bom?(match) match && %w[bom DomainModel].include?(match.captures[0]) end def extract_bib_file_name(match, default_file_name = "") return default_file_name || "" unless match if %w[arm mim arm_lf mim_lf].include?(match.captures[0]) "module.xml" else "business_object_model.xml" end end def extract_bib_data(match, bib, schema_and_entity) # for schema version string generation @identifier = identifier(bib) return resource_bib_data(bib, schema_and_entity) unless match if module?(match) @module_has_arm = !bib.xpath("arm").first.nil? @module_has_mim = !bib.xpath("mim").first.nil? @module_has_arm_lf = !bib.xpath("arm_lf").first.nil? @module_has_mim_lf = !bib.xpath("mim_lf").first.nil? puts "[annotator] module has schemas: " \ "ARM(#{@module_has_arm}) MIM(#{@module_has_mim}) " \ "ARM_LF(#{@module_has_arm_lf}) MIM_LF(#{@module_has_mim_lf})" module_bib_data(bib, match.captures[0]) elsif bom?(match) bom_bib_data(bib) end end def identifier(bib) part = bib.attributes["part"].value year = bib.attributes["publication.year"].value # year="tbd" in data/modules/geometric_tolerance/module.xml and # probabaly in some other places as well year = "" if year == "tbd" edition = bib.attributes["version"].value pubid = Pubid::Iso::Identifier.create( publisher: "ISO", number: 10303, ) pubid.part = part if part && !part.empty? pubid.year = year.split("-").first if year && !year.empty? pubid.edition = edition if edition && !edition.empty? pubid end def resource_bib_data(bib, schema_and_entity) schema = bib.xpath("schema[@name='#{schema_and_entity}']").first { identifier: identifier(bib), edition: bib.attributes["version"], number: schema.attributes["number"], supersedes_concept: schema.attributes["number.supersedes"], status: bib.attributes["status"], title: bib.attributes["title"] || bib.attributes["name"], } end def module_bib_data(bib, type) { identifier: identifier(bib), edition: bib.attributes["version"], number: bib.attributes["wg.number.#{type}"], supersedes_concept: bib.attributes["wg.number.#{type}.supersedes"], status: bib.attributes["status"], title: bib.attributes["title"] || bib.attributes["name"], } end def bom_bib_data(bib) { identifier: identifier(bib), edition: bib.attributes["version"], number: bib.attributes["wg.number.bom.exp"], supersedes_concept: bib.attributes["wg.number.bom.supersedes"], status: bib.attributes["status"], title: bib.attributes["title"] || bib.attributes["name"], } end def convert_from_resource_file(resource_docs_dir, stepmod_dir, linked, descriptions_file) resource_docs_file = resource_docs_file_path(stepmod_dir, resource_docs_dir) resource_docs = Nokogiri::XML(File.read(resource_docs_file)).root schema = resource_docs.xpath("schema[@name='#{linked}']") Dir.chdir(File.dirname(descriptions_file)) do wrapper = "#{schema}" "\n" + Stepmod::Utils::SmrlResourceConverter.convert( wrapper, { no_notes_examples: false, schema_and_entity: linked, descriptions_file: descriptions_file, }, ) end end def resource_docs_file_path(stepmod_dir, resource_docs_dir) File.join( stepmod_dir, "data/resource_docs", resource_docs_dir, "resource.xml", ) end end end end