lib/virastar.rb in virastar-0.0.2 vs lib/virastar.rb in virastar-0.0.3
- old
+ new
@@ -37,14 +37,14 @@
# replace three dots with ellipsis
text.gsub!(/\s*\.{3,}/,'…') if @fix_three_dots
# replace English quotes with their Persian equivalent
- text.gsub!(/(["'`]+)(.+)(\1)/, '«\2»') if @fix_english_quotes
+ text.gsub!(/(["'`]+)(.+?)(\1)/, '«\2»') if @fix_english_quotes
# should convert ه ی to ه
- text.gsub!(/(\S)(ه[\s]+ی)(\s)/, '\1هٔ\3') if @fix_hamzeh
+ text.gsub!(/(\S)(ه[\s]+[یي])(\s)/, '\1هٔ\3') if @fix_hamzeh
# remove unnecessary zwnj char that are succeeded/preceded by a space
text.gsub!(/\s+|\s+/,' ') if @cleanup_zwnj
# should fix spacing for () [] {} “” «»
@@ -78,11 +78,11 @@
end
# put zwnj between word and suffix (*tar *tarin *ha *haye)
# there's a possible bug here: های and تر could be separate nouns and not suffix
if @fix_suffix_spacing
- text.gsub!(/\s+(تر(ین)?|ها(ی)?)\s+/,'\1 ')
+ text.gsub!(/\s+(تر(ی(ن)?)?|ها(ی)?)\s+/,'\1 ') # in case you can not read it: \s+(tar(i(n)?)?|ha(ye)?)\s+
end
# -- Aggressive Editing ------------------------------------------
if @aggresive
@@ -98,16 +98,16 @@
end
# ----------------------------------------------------------------
# : ; , . ! ? and their persian equivalents should have one space after and no space before
if @fix_spacing_for_braces_and_quotes
- text.gsub!(/\s*([:;,؛،.؟!]{1})\s*/, '\1 ')
+ text.gsub!(/[ ]*([:;,؛،.؟!]{1})[ ]*/, '\1 ')
end
# should replace more than one space with just a single one
if @cleanup_spacing
text.gsub!(/[ ]+/,' ')
- text.gsub!(/\s*[\n]+\s*/," \n")
+ #text.gsub!(/\s*[\n]+\s*/," \n")
end
# remove spaces, tabs, and new lines from the beginning and enf of file
text.strip! if @cleanup_begin_and_end
\ No newline at end of file