lib/utilities/text_cleaner.rb in inspec_tools-2.3.5 vs lib/utilities/text_cleaner.rb in inspec_tools-2.3.6
- old
+ new
@@ -13,49 +13,44 @@
clean_section_header = remove_section_header(controls_data)
clean_whitespace = remove_newline_in_controls(clean_section_header)
clean_special = remove_special(clean_whitespace)
clean_no_space = remove_extra_space(clean_special)
clean_pagenum = remove_pagenum(clean_no_space)
- clean_data = separate_controls(clean_pagenum)
- clean_data
+ separate_controls(clean_pagenum)
end
# Removes everything before and after the controls
def isolate_controls_data(extracted_data)
extracted_data = extracted_data.gsub(/\| P a g e+/, "| P a g e\n")
- extracted_data = extracted_data.split("\n").map{ |line| line.strip}.reject { |e| e.to_s.empty? }.join("\n")
+ extracted_data = extracted_data.split("\n").map(&:strip).reject { |e| e.to_s.empty? }.join("\n")
extracted_data = extracted_data.gsub('???', '')
- controls_data = /^1\.1\s*[^\)]*?(?=\)$)(.*\n)*?(?=\s*Appendix:)/.match(extracted_data).to_s
- controls_data
+ /^1\.1\s*[^)]*?(?=\)$)(.*\n)*?(?=\s*Appendix:)/.match(extracted_data).to_s
end
# Removes all pagenumbers between the controls
def remove_pagenum(extracted_data)
clean_pagenum = extracted_data.gsub(/(\d{1,3}\|Page|\d{1,3} \| P a g e)/, '').to_s
- clean_pagenum = clean_pagenum.gsub(/(\d{1,3} \| Page)/, '').to_s
- clean_pagenum
+ clean_pagenum.gsub(/(\d{1,3} \| Page)/, '').to_s
end
# Removes section headers for each control
def remove_section_header(extracted_data)
- clean_section_header = extracted_data.gsub(/(?<!•)\s\n\d{1}\s.*(?:.*\n)*?(?=\d\.\d)/, "\n\n").to_s
- clean_section_header
+ extracted_data.gsub(/(?<!•)\s\n\d{1}\s.*(?:.*\n)*?(?=\d\.\d)/, "\n\n").to_s
end
# removes newlines between a control
def remove_newline_in_controls(extracted_data)
- clean_whitespace = extracted_data.gsub(/\s\n.*?(?!d\.)/, "\n").to_s
- clean_whitespace
+ extracted_data.gsub(/\s\n.*?(?!d\.)/, "\n").to_s
end
# adds whitespace between different controls
def separate_controls(extracted_data)
extracted_data.gsub(/((?=^\s*?\d\.\d{1,}.*\n?.*?(?<=\)$)))/, "\n").to_s
end
def remove_special(extracted_data)
- extracted_data = extracted_data.gsub(/[]/, '')
- extracted_data.gsub(/[•]/, '')
+ extracted_data = extracted_data.gsub(//, '')
+ extracted_data.gsub(/•/, '')
end
def remove_extra_space(extracted_data)
clean_data = extracted_data.gsub(/\n\n\n/, "\n")
clean_data = clean_data.gsub(/\t\n/, "\n")