lib/mmfont.rb in mmfont-1.0.0 vs lib/mmfont.rb in mmfont-1.0.1

- old
+ new

@@ -1,123 +1,146 @@ -require "mmfont/version" +#-- +# Copyright (c) 2014 Ye Lin Aung +# Zawgyi<>Unicode converter python module +# Based on rules from Parabaik Myanmar Text Converter +# Copyright (C) 2014 Ngwe Tun (Solveware Solution) +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +#-- +require 'mmfont/version' + # Mmfont module module Mmfont # Your code goes here... class Converter # Convert given text from unicode to zawgyi def uni512zg1(input_text) output_text = input_text - output_text = output_text.gsub(/\u104e\u1004\u103a\u1038/ ,"\u104e") - output_text = output_text.gsub(/\u102b\u103a/ ,"\u105a") - output_text = output_text.gsub(/\u102d\u1036/ ,"\u108e") - output_text = output_text.gsub(/\u103f/ ,"\u1086") - output_text = output_text.gsub(/(?<=\u102f)\u1037/ ,"\u1094") - output_text = output_text.gsub(/(?<=\u102f\u1036)\u1037/ ,"\u1094") - output_text = output_text.gsub(/(?<=\u1030)\u1037/ ,"\u1094") - output_text = output_text.gsub(/(?<=\u1030\u1036)\u1037/ ,"\u1094") - output_text = output_text.gsub(/(?<=\u1014)\u1037/ ,"\u1094") - output_text = output_text.gsub(/(?<=\u1014[\u103a\u1032])\u1037/ ,"\u1094") - output_text = output_text.gsub(/(?<=\u103b)\u1037/ ,"\u1095") - output_text = output_text.gsub(/(?<=\u103b[\u1032\u1036])\u1037/ ,"\u1095") - output_text = output_text.gsub(/(?<=\u103d)\u1037/ ,"\u1095") - output_text = output_text.gsub(/(?<=\u103d[\u1032])\u1037/ ,"\u1095") - output_text = output_text.gsub(/(?<=[\u103b\u103c\u103d])\u102f/ ,"\u1033") - output_text = output_text.gsub(/(?<=[\u103b\u103c\u103d][\u102d\u1036])\u102f/ ,"\u1033") - output_text = output_text.gsub(/(?<=(\u1039[\u1000-\u1021]))\u102f/ ,"\u1033") - output_text = output_text.gsub(/(?<=(\u1039[\u1000-\u1021])[\u102d\u1036])\u102f/ ,"\u1033") - output_text = output_text.gsub(/(?<=[\u100a\u100c\u1020\u1025\u1029])\u102f/ ,"\u1033") - output_text = output_text.gsub(/(?<=[\u100a\u100c\u1020\u1025\u1029][\u102d\u1036])\u102f/ ,"\u1033") - output_text = output_text.gsub(/(?<=[\u103b\u103c])\u1030/ ,"\u1034") - output_text = output_text.gsub(/(?<=[\u103b\u103c][\u103d])\u1030/ ,"\u1034") - output_text = output_text.gsub(/(?<=[\u103b\u103c][\u103e])\u1030/ ,"\u1034") - output_text = output_text.gsub(/(?<=[\u103b\u103c][\u102d\u1036])\u1030/ ,"\u1034") - output_text = output_text.gsub(/(?<=[\u103b\u103c][\u103d][\u103e])\u1030/ ,"\u1034") - output_text = output_text.gsub(/(?<=[\u103b\u103c][\u103d][\u102d\u1036])\u1030/ ,"\u1034") - output_text = output_text.gsub(/(?<=[\u103b\u103c][\u103e][\u102d\u1036])\u1030/ ,"\u1034") - output_text = output_text.gsub(/(?<=[\u103b\u103c][\u103d][\u103e][\u102d\u1036])\u1030/ ,"\u1034") - output_text = output_text.gsub(/(?<=(\u1039[\u1000-\u1021]))\u1030/ ,"\u1034") - output_text = output_text.gsub(/(?<=(\u1039[\u1000-\u1021])[\u102d\u1036])\u1030/ ,"\u1034") - output_text = output_text.gsub(/(?<=[\u100a\u100c\u1020\u1025\u1029])\u1030/ ,"\u1034") - output_text = output_text.gsub(/(?<=[\u100a\u100c\u1020\u1025\u1029][\u102d\u1036])\u1030/ ,"\u1034") - output_text = output_text.gsub(/(?<=\u103c)\u103e/ ,"\u1087") - output_text = output_text.gsub(/\u1009(?=[\u103a])/ ,"\u1025") - output_text = output_text.gsub(/\u1009(?=\u1039[\u1000-\u1021])/ ,"\u1025") - output_text = output_text.gsub(/([\u1000-\u1021\u1029])((?:\u1039[\u1000-\u1021])?)((?:[\u103b-\u103e\u1087]*)?)\u1031/ ,"\u1031\\1\\2\\3") - output_text = output_text.gsub(/([\u1000-\u1021\u1029])((?:\u1039[\u1000-\u1021\u1000-\u1021])?)(\u103c)/ ,"\\3\\1\\2") - output_text = output_text.gsub(/\u1004\u103a\u1039/ ,"\u1064") - output_text = output_text.gsub(/(\u1064)((?:\u1031)?)((?:\u103c)?)([\u1000-\u1021])\u102d/ ,"\\2\\3\\4\u108b") - output_text = output_text.gsub(/(\u1064)((?:\u1031)?)((?:\u103c)?)([\u1000-\u1021])\u102e/ ,"\\2\\3\\4\u108c") - output_text = output_text.gsub(/(\u1064)((?:\u1031)?)((?:\u103c)?)([\u1000-\u1021])\u1036/ ,"\\2\\3\\4\u108d") - output_text = output_text.gsub(/(\u1064)((?:\u1031)?)((?:\u103c)?)([\u1000-\u1021])/ ,"\\2\\3\\4\u1064") - output_text = output_text.gsub(/\u100a(?=[\u1039\u102f\u1030])/ ,"\u106b") - output_text = output_text.gsub(/\u100a/ ,"\u100a") - output_text = output_text.gsub(/\u101b(?=[\u102f\u1030])/ ,"\u1090") - output_text = output_text.gsub(/\u101b/ ,"\u101b") - output_text = output_text.gsub(/\u1014(?=[\u1039\u103d\u103e\u102f\u1030])/ ,"\u108f") - output_text = output_text.gsub(/\u1014/ ,"\u1014") - output_text = output_text.gsub(/\u1039\u1000/ ,"\u1060") - output_text = output_text.gsub(/\u1039\u1001/ ,"\u1061") - output_text = output_text.gsub(/\u1039\u1002/ ,"\u1062") - output_text = output_text.gsub(/\u1039\u1003/ ,"\u1063") - output_text = output_text.gsub(/\u1039\u1005/ ,"\u1065") - output_text = output_text.gsub(/\u1039\u1006/ ,"\u1066") - output_text = output_text.gsub(/(?<=[\u1001\u1002\u1004\u1005\u1007\u1012\u1013\u108f\u1015\u1016\u1017\u1019\u101d])\u1066/ ,"\u1067") - output_text = output_text.gsub(/\u1039\u1007/ ,"\u1068") - output_text = output_text.gsub(/\u1039\u1008/ ,"\u1069") - output_text = output_text.gsub(/\u1039\u100f/ ,"\u1070") - output_text = output_text.gsub(/\u1039\u1010/ ,"\u1071") - output_text = output_text.gsub(/(?<=[\u1001\u1002\u1004\u1005\u1007\u1012\u1013\u108f\u1015\u1016\u1017\u1019\u101d])\u1071/ ,"\u1072") - output_text = output_text.gsub(/\u1039\u1011/ ,"\u1073") - output_text = output_text.gsub(/(?<=[\u1001\u1002\u1004\u1005\u1007\u1012\u1013\u108f\u1015\u1016\u1017\u1019\u101d])\u1073/ ,"\u1074") - output_text = output_text.gsub(/\u1039\u1012/ ,"\u1075") - output_text = output_text.gsub(/\u1039\u1013/ ,"\u1076") - output_text = output_text.gsub(/\u1039\u1014/ ,"\u1077") - output_text = output_text.gsub(/\u1039\u1015/ ,"\u1078") - output_text = output_text.gsub(/\u1039\u1016/ ,"\u1079") - output_text = output_text.gsub(/\u1039\u1017/ ,"\u107a") - output_text = output_text.gsub(/\u1039\u1018/ ,"\u107b") - output_text = output_text.gsub(/\u1039\u1019/ ,"\u107c") - output_text = output_text.gsub(/\u1039\u101c/ ,"\u1085") - output_text = output_text.gsub(/\u100f\u1039\u100d/ ,"\u1091") - output_text = output_text.gsub(/\u100b\u1039\u100c/ ,"\u1092") - output_text = output_text.gsub(/\u1039\u100c/ ,"\u106d") - output_text = output_text.gsub(/\u100b\u1039\u100b/ ,"\u1097") - output_text = output_text.gsub(/\u1039\u100b/ ,"\u106c") - output_text = output_text.gsub(/\u100e\u1039\u100d/ ,"\u106f") - output_text = output_text.gsub(/\u100d\u1039\u100d/ ,"\u106e") - output_text = output_text.gsub(/\u1009(?=\u103a)/ ,"\u1025") - output_text = output_text.gsub(/\u1025(?=[\u1039\u102f\u1030])/ ,"\u106a") - output_text = output_text.gsub(/\u1025/ ,"\u1025") - output_text = output_text.gsub(/\u103a/ ,"\u1039") - output_text = output_text.gsub(/\u103b\u103d\u103e/ ,"\u107d\u108a") - output_text = output_text.gsub(/\u103d\u103e/ ,"\u108a") - output_text = output_text.gsub(/\u103b/ ,"\u103a") - output_text = output_text.gsub(/\u103c/ ,"\u103b") - output_text = output_text.gsub(/\u103d/ ,"\u103c") - output_text = output_text.gsub(/\u103e/ ,"\u103d") - output_text = output_text.gsub(/\u103a(?=[\u103c\u103d\u108a])/ ,"\u107d") - output_text = output_text.gsub(/(?<=\u100a(?:[\u102d\u102e\u1036\u108b\u108c\u108d\u108e]))\u103d/ ,"\u1087") - output_text = output_text.gsub(/(?<=\u100a)\u103d/ ,"\u1087") - output_text = output_text.gsub(/\u103b(?=[\u1000\u1003\u1006\u100f\u1010\u1011\u1018\u101a\u101c\u101e\u101f\u1021])/ ,"\u107e") - output_text = output_text.gsub(/\u107e([\u1000-\u1021\u108f])(?=[\u102d\u102e\u1036\u108b\u108c\u108d\u108e])/ ,"\u1080\\1") - output_text = output_text.gsub(/\u107e([\u1000-\u1021\u108f])(?=[\u103c\u108a])/ ,"\u1082\\1") - output_text = output_text.gsub(/\u103b([\u1000-\u1021\u108f])(?=[\u102d\u102e\u1036\u108b\u108c\u108d\u108e])/ ,"\u107f\\1") - output_text = output_text.gsub(/\u103b([\u1000-\u1021\u108f])(?=[\u103c\u108a])/ ,"\u1081\\1") - output_text = output_text.gsub(/(?<=\u1014)\u1037/ ,"\u1094") - output_text = output_text.gsub(/(?<=\u1014[\u103a\u1032])\u1037/ ,"\u1094") - output_text = output_text.gsub(/(?<=\u1033)\u1094/ ,"\u1095") - output_text = output_text.gsub(/(?<=\u1033[\u1036])\u1094/ ,"\u1095") - output_text = output_text.gsub(/(?<=\u1034)\u1094/ ,"\u1095") - output_text = output_text.gsub(/(?<=\u1034[\u1036])\u1094/ ,"\u1095") - output_text = output_text.gsub(/(?<=[\u103c\u103d\u108a])\u1037/ ,"\u1095") - output_text = output_text.gsub(/(?<=[\u103c\u103d\u108a][\u1032])\u1037/ ,"\u1095") - # 'return' is not necessary but it's there to prevent 'end' from auto-indent - return output_text + output_text = output_text.gsub(/\u104e\u1004\u103a\u1038/, "\u104e") + output_text = output_text.gsub(/\u102b\u103a/, "\u105a") + output_text = output_text.gsub(/\u102d\u1036/, "\u108e") + output_text = output_text.gsub(/\u103f/, "\u1086") + output_text = output_text.gsub(/(?<=\u102f)\u1037/, "\u1094") + output_text = output_text.gsub(/(?<=\u102f\u1036)\u1037/, "\u1094") + output_text = output_text.gsub(/(?<=\u1030)\u1037/, "\u1094") + output_text = output_text.gsub(/(?<=\u1030\u1036)\u1037/, "\u1094") + output_text = output_text.gsub(/(?<=\u1014)\u1037/, "\u1094") + output_text = output_text.gsub(/(?<=\u1014[\u103a\u1032])\u1037/, "\u1094") + output_text = output_text.gsub(/(?<=\u103b)\u1037/, "\u1095") + output_text = output_text.gsub(/(?<=\u103b[\u1032\u1036])\u1037/, "\u1095") + output_text = output_text.gsub(/(?<=\u103d)\u1037/, "\u1095") + output_text = output_text.gsub(/(?<=\u103d[\u1032])\u1037/, "\u1095") + output_text = output_text.gsub(/(?<=[\u103b\u103c\u103d])\u102f/, "\u1033") + output_text = output_text.gsub(/(?<=[\u103b\u103c\u103d][\u102d\u1036])\u102f/, "\u1033") + output_text = output_text.gsub(/(?<=(\u1039[\u1000-\u1021]))\u102f/, "\u1033") + output_text = output_text.gsub(/(?<=(\u1039[\u1000-\u1021])[\u102d\u1036])\u102f/, "\u1033") + output_text = output_text.gsub(/(?<=[\u100a\u100c\u1020\u1025\u1029])\u102f/, "\u1033") + output_text = output_text.gsub(/(?<=[\u100a\u100c\u1020\u1025\u1029][\u102d\u1036])\u102f/, "\u1033") + output_text = output_text.gsub(/(?<=[\u103b\u103c])\u1030/, "\u1034") + output_text = output_text.gsub(/(?<=[\u103b\u103c][\u103d])\u1030/, "\u1034") + output_text = output_text.gsub(/(?<=[\u103b\u103c][\u103e])\u1030/, "\u1034") + output_text = output_text.gsub(/(?<=[\u103b\u103c][\u102d\u1036])\u1030/, "\u1034") + output_text = output_text.gsub(/(?<=[\u103b\u103c][\u103d][\u103e])\u1030/, "\u1034") + output_text = output_text.gsub(/(?<=[\u103b\u103c][\u103d][\u102d\u1036])\u1030/, "\u1034") + output_text = output_text.gsub(/(?<=[\u103b\u103c][\u103e][\u102d\u1036])\u1030/, "\u1034") + output_text = output_text.gsub(/(?<=[\u103b\u103c][\u103d][\u103e][\u102d\u1036])\u1030/, "\u1034") + output_text = output_text.gsub(/(?<=(\u1039[\u1000-\u1021]))\u1030/, "\u1034") + output_text = output_text.gsub(/(?<=(\u1039[\u1000-\u1021])[\u102d\u1036])\u1030/, "\u1034") + output_text = output_text.gsub(/(?<=[\u100a\u100c\u1020\u1025\u1029])\u1030/, "\u1034") + output_text = output_text.gsub(/(?<=[\u100a\u100c\u1020\u1025\u1029][\u102d\u1036])\u1030/, "\u1034") + output_text = output_text.gsub(/(?<=\u103c)\u103e/, "\u1087") + output_text = output_text.gsub(/\u1009(?=[\u103a])/, "\u1025") + output_text = output_text.gsub(/\u1009(?=\u1039[\u1000-\u1021])/, "\u1025") + output_text = output_text.gsub(/([\u1000-\u1021\u1029])((?:\u1039[\u1000-\u1021])?)((?:[\u103b-\u103e\u1087])?)\u1031/, "\u1031\\1\\2\\3") + output_text = output_text.gsub(/([\u1000-\u1021\u1029])((?:\u1039[\u1000-\u1021\u1000-\u1021])?)(\u103c)/, "\\3\\1\\2") + output_text = output_text.gsub(/\u1004\u103a\u1039/, "\u1064") + output_text = output_text.gsub(/(\u1064)((?:\u1031)?)((?:\u103c)?)([\u1000-\u1021])\u102d/, "\\2\\3\\4\u108b") + output_text = output_text.gsub(/(\u1064)((?:\u1031)?)((?:\u103c)?)([\u1000-\u1021])\u102e/, "\\2\\3\\4\u108c") + output_text = output_text.gsub(/(\u1064)((?:\u1031)?)((?:\u103c)?)([\u1000-\u1021])\u1036/, "\\2\\3\\4\u108d") + output_text = output_text.gsub(/(\u1064)((?:\u1031)?)((?:\u103c)?)([\u1000-\u1021])/, "\\2\\3\\4\u1064") + output_text = output_text.gsub(/\u100a(?=[\u1039\u102f\u1030])/, "\u106b") + output_text = output_text.gsub(/\u100a/, "\u100a") + output_text = output_text.gsub(/\u101b(?=[\u102f\u1030])/, "\u1090") + output_text = output_text.gsub(/\u101b/, "\u101b") + output_text = output_text.gsub(/\u1014(?=[\u1039\u103d\u103e\u102f\u1030])/, "\u108f") + output_text = output_text.gsub(/\u1014/, "\u1014") + output_text = output_text.gsub(/\u1039\u1000/, "\u1060") + output_text = output_text.gsub(/\u1039\u1001/, "\u1061") + output_text = output_text.gsub(/\u1039\u1002/, "\u1062") + output_text = output_text.gsub(/\u1039\u1003/, "\u1063") + output_text = output_text.gsub(/\u1039\u1005/, "\u1065") + output_text = output_text.gsub(/\u1039\u1006/, "\u1066") + output_text = output_text.gsub(/(?<=[\u1001\u1002\u1004\u1005\u1007\u1012\u1013\u108f\u1015\u1016\u1017\u1019\u101d])\u1066/, "\u1067") + output_text = output_text.gsub(/\u1039\u1007/, "\u1068") + output_text = output_text.gsub(/\u1039\u1008/, "\u1069") + output_text = output_text.gsub(/\u1039\u100f/, "\u1070") + output_text = output_text.gsub(/\u1039\u1010/, "\u1071") + output_text = output_text.gsub(/(?<=[\u1001\u1002\u1004\u1005\u1007\u1012\u1013\u108f\u1015\u1016\u1017\u1019\u101d])\u1071/, "\u1072") + output_text = output_text.gsub(/\u1039\u1011/, "\u1073") + output_text = output_text.gsub(/(?<=[\u1001\u1002\u1004\u1005\u1007\u1012\u1013\u108f\u1015\u1016\u1017\u1019\u101d])\u1073/, "\u1074") + output_text = output_text.gsub(/\u1039\u1012/, "\u1075") + output_text = output_text.gsub(/\u1039\u1013/, "\u1076") + output_text = output_text.gsub(/\u1039\u1014/, "\u1077") + output_text = output_text.gsub(/\u1039\u1015/, "\u1078") + output_text = output_text.gsub(/\u1039\u1016/, "\u1079") + output_text = output_text.gsub(/\u1039\u1017/, "\u107a") + output_text = output_text.gsub(/\u1039\u1018/, "\u107b") + output_text = output_text.gsub(/\u1039\u1019/, "\u107c") + output_text = output_text.gsub(/\u1039\u101c/, "\u1085") + output_text = output_text.gsub(/\u100f\u1039\u100d/, "\u1091") + output_text = output_text.gsub(/\u100b\u1039\u100c/, "\u1092") + output_text = output_text.gsub(/\u1039\u100c/, "\u106d") + output_text = output_text.gsub(/\u100b\u1039\u100b/, "\u1097") + output_text = output_text.gsub(/\u1039\u100b/, "\u106c") + output_text = output_text.gsub(/\u100e\u1039\u100d/, "\u106f") + output_text = output_text.gsub(/\u100d\u1039\u100d/, "\u106e") + output_text = output_text.gsub(/\u1009(?=\u103a)/, "\u1025") + output_text = output_text.gsub(/\u1025(?=[\u1039\u102f\u1030])/, "\u106a") + output_text = output_text.gsub(/\u1025/, "\u1025") + output_text = output_text.gsub(/\u103a/, "\u1039") + output_text = output_text.gsub(/\u103b\u103d\u103e/, "\u107d\u108a") + output_text = output_text.gsub(/\u103d\u103e/, "\u108a") + output_text = output_text.gsub(/\u103b/, "\u103a") + output_text = output_text.gsub(/\u103c/, "\u103b") + output_text = output_text.gsub(/\u103d/, "\u103c") + output_text = output_text.gsub(/\u103e/, "\u103d") + output_text = output_text.gsub(/\u103a(?=[\u103c\u103d\u108a])/, "\u107d") + output_text = output_text.gsub(/(?<=\u100a(?:[\u102d\u102e\u1036\u108b\u108c\u108d\u108e]))\u103d/, "\u1087") + output_text = output_text.gsub(/(?<=\u100a)\u103d/, "\u1087") + output_text = output_text.gsub(/\u103b(?=[\u1000\u1003\u1006\u100f\u1010\u1011\u1018\u101a\u101c\u101e\u101f\u1021])/, "\u107e") + output_text = output_text.gsub(/\u107e([\u1000-\u1021\u108f])(?=[\u102d\u102e\u1036\u108b\u108c\u108d\u108e])/, "\u1080\\1") + output_text = output_text.gsub(/\u107e([\u1000-\u1021\u108f])(?=[\u103c\u108a])/, "\u1082\\1") + output_text = output_text.gsub(/\u103b([\u1000-\u1021\u108f])(?=[\u102d\u102e\u1036\u108b\u108c\u108d\u108e])/, "\u107f\\1") + output_text = output_text.gsub(/\u103b([\u1000-\u1021\u108f])(?=[\u103c\u108a])/, "\u1081\\1") + output_text = output_text.gsub(/(?<=\u1014)\u1037/, "\u1094") + output_text = output_text.gsub(/(?<=\u1014[\u103a\u1032])\u1037/, "\u1094") + output_text = output_text.gsub(/(?<=\u1033)\u1094/, "\u1095") + output_text = output_text.gsub(/(?<=\u1033[\u1036])\u1094/, "\u1095") + output_text = output_text.gsub(/(?<=\u1034)\u1094/, "\u1095") + output_text = output_text.gsub(/(?<=\u1034[\u1036])\u1094/, "\u1095") + output_text = output_text.gsub(/(?<=[\u103c\u103d\u108a])\u1037/, "\u1095") + output_text = output_text.gsub(/(?<=[\u103c\u103d\u108a][\u1032])\u1037/, "\u1095") end # end of uni to zg1 - # Convert given text from zawgyi to unicode def zg12uni51(input_text) output_text = input_text output_text = output_text.gsub(/\u106a/ ,"\u1009") output_text = output_text.gsub(/\u1025(?=[\u1039\u102c])/ ,"\u1009") output_text = output_text.gsub(/\u1025\u102e/ ,"\u1026") @@ -125,10 +148,10 @@ output_text = output_text.gsub(/\u1090/ ,"\u101b") output_text = output_text.gsub(/\u1040/ ,"\u1040") output_text = output_text.gsub(/\u108f/ ,"\u1014") output_text = output_text.gsub(/\u1012/ ,"\u1012") output_text = output_text.gsub(/\u1013/ ,"\u1013") - output_text = output_text.gsub(/[\u103d\u1087]/ ,"\u103d") + output_text = output_text.gsub(/[\u103d\u1087]/ ,"\u103e") output_text = output_text.gsub(/\u103c/ ,"\u103d") output_text = output_text.gsub(/[\u103b\u107e\u107f\u1080\u1081\u1082\u1083\u1084]/ ,"\u103c") output_text = output_text.gsub(/[\u103a\u107d]/ ,"\u103b") output_text = output_text.gsub(/\u103d\u103b/ ,"\u103b\u103d") output_text = output_text.gsub(/\u108a/ ,"\u103d\u103d")