lib/srx/segment.srx in srx-languagetool-0.3.0 vs lib/srx/segment.srx in srx-languagetool-0.4.0
- old
+ new
@@ -1100,26 +1100,34 @@
<beforebreak>: </beforebreak>
<afterbreak>[—\-–] \p{Lu}</afterbreak>
</rule>
</languagerule>
<languagerule languagerulename="English">
+<rule break="no">
+<beforebreak>[\u00A0\s]</beforebreak>
+<afterbreak>\n</afterbreak>
+</rule>
<rule break="no"><!-- Hello (Hi! ) my name is Chris -->
-<beforebreak>[a-zA-Z][!\?]\s</beforebreak>
-<afterbreak>\)\s[a-zA-Z]</afterbreak>
+<beforebreak>[a-zA-Z][!\?][\s\u00A0]</beforebreak>
+<afterbreak>\)[\s\u00A0][a-zA-Z]</afterbreak>
</rule>
<rule break="no">
-<beforebreak>Yahoo!\s</beforebreak>
+<beforebreak>Yahoo![\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<rule break="no"><!-- U.S.A (no dot at end) -->
<beforebreak>[A-Z]\.[A-Z]\.</beforebreak>
<afterbreak>[A-Z]\b</afterbreak>
</rule>
<rule break="no"><!-- A.I (no dot at end) -->
<beforebreak>\bA\.</beforebreak>
<afterbreak>I\b</afterbreak>
</rule>
+<rule break="no"><!-- S.I (no dot at end) -->
+<beforebreak>\bS\.</beforebreak>
+<afterbreak>I\b</afterbreak>
+</rule>
<rule break="no"><!-- L.A (no dot at end) -->
<beforebreak>\bL\.</beforebreak>
<afterbreak>A\b</afterbreak>
</rule>
<rule break="no"><!-- U.S (no dot at end) -->
@@ -1133,100 +1141,100 @@
<rule break="no"><!-- Subdomains without "www." (e.g. foo.MyDomain.com)-->
<beforebreak>\b[A-Za-z0-9\-]+\.</beforebreak>
<afterbreak>[A-Za-z0-9\-]+\.(com|net|org|info|de|es|edu|co|eu|nl|io|cn|uk|gov|biz|ca|tk|ru|br|jp|pl|be|dev|co|fr|dk|se)(\.|\b)</afterbreak>
</rule>
<rule break="no"><!-- No. 5 -->
-<beforebreak>\b[nN]o\.\s</beforebreak>
+<beforebreak>\b[nN]o\.[\s\u00A0]</beforebreak>
<afterbreak>\p{N}</afterbreak>
</rule>
<rule break="no"><!-- Ph.D. -->
-<beforebreak>\bP[Hh]\.\s?</beforebreak>
+<beforebreak>\bP[Hh]\.[\s\u00A0]?</beforebreak>
<afterbreak>D\.?</afterbreak>
</rule>
<rule break="no"><!-- min. -->
-<beforebreak>\b([Ee]d|pp|[Vv]iz|i\.?\s*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Ii]ncl?|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Mm]in|max|[Gg]ovt|lb|lbf|ft|c\.?\s*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.\s</beforebreak>
+<beforebreak>\b([Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Ii]ncl?|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Mm]in|max|[Gg]ovt|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak>
<afterbreak>[^\p{Lu}]|I</afterbreak>
</rule>
<rule break="no"><!-- hr. -->
-<beforebreak>\b(hr)\.\s</beforebreak>
+<beforebreak>\b(hr)\.[\s\u00A0]</beforebreak>
<afterbreak>[^\p{Lu}]|I</afterbreak>
</rule>
<rule break="no"><!-- Fig. 8 -->
-<beforebreak>\b([Vv]ol|[Ff]ig|[Dd]ef|[Ee]q|[Ll]em|[Pp]rop|[Tt]hm)s?\.\s</beforebreak>
+<beforebreak>\b([Vv]ol|[Ff]ig|[Dd]ef|[Ee]q|[Ll]em|[Pp]rop|[Tt]hm)s?\.[\s\u00A0]</beforebreak>
<afterbreak>\p{N}|[IXV]+</afterbreak>
</rule>
<rule break="no"><!-- Fig. (8) -->
-<beforebreak>\b([Ff]ig|[Dd]ef|[Ee]q|[Ll]em|[Pp]rop|[Tt]hm)s?\.\s</beforebreak>
+<beforebreak>\b([Ff]ig|[Dd]ef|[Ee]q|[Ll]em|[Pp]rop|[Tt]hm)s?\.[\s\u00A0]</beforebreak>
<afterbreak>\(\p{N}\)</afterbreak>
</rule>
<rule break="no"><!-- I'm (...) great! -->
-<beforebreak>(…|\.\.\.)\s?\)\s</beforebreak>
+<beforebreak>(…|\.\.\.)[\s\u00A0]?\)[\s\u00A0]</beforebreak>
<afterbreak>[^\p{P}]</afterbreak>
</rule>
<rule break="no"><!-- I will work with someone (Chris or ...?). -->
-<beforebreak>(…|\.\.\.)\s?\?\)\s</beforebreak>
+<beforebreak>(…|\.\.\.)[\s\u00A0]?\?\)[\s\u00A0]</beforebreak>
<afterbreak>[^\p{P}]</afterbreak>
</rule>
<rule break="no"><!-- e.g. -->
-<beforebreak>\be\.g\.\s</beforebreak>
+<beforebreak>\be\.g\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no"><!-- vs. -->
-<beforebreak>\bvs\.\s</beforebreak>
+<beforebreak>\bvs\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no"><!-- esp. -->
-<beforebreak>\be[sx]p\.\s</beforebreak>
+<beforebreak>\be[sx]p\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!--"Etc." can end the sentence, so we check for the uppercase letter after it.-->
<rule break="no"><!-- Etc. -->
-<beforebreak>\b[Ee]tc\.\s</beforebreak>
+<beforebreak>\b[Ee]tc\.[\s\u00A0]</beforebreak>
<afterbreak>[^\p{Lu}]</afterbreak>
</rule>
<rule break="no"><!-- BTW (by the way) -->
-<beforebreak>\b([Bb]tw|BTW)\.\s</beforebreak>
+<beforebreak>\b([Bb]tw|BTW)\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bJan\.\s</beforebreak>
+<beforebreak>\bJan\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bFeb\.\s</beforebreak>
+<beforebreak>\bFeb\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bMar\.\s</beforebreak>
+<beforebreak>\bMar\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bApr\.\s</beforebreak>
+<beforebreak>\bApr\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bJu[nl]\.\s</beforebreak>
+<beforebreak>\bJu[nl]\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bAug\.\s</beforebreak>
+<beforebreak>\bAug\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bSept?\.\s</beforebreak>
+<beforebreak>\bSept?\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bOct\.\s</beforebreak>
+<beforebreak>\bOct\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bNov\.\s</beforebreak>
+<beforebreak>\bNov\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bDec\.\s</beforebreak>
+<beforebreak>\bDec\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
<beforebreak>(?i)FRITZ!</beforebreak>
<afterbreak>(?i)Box</afterbreak>
@@ -1234,99 +1242,99 @@
<rule break="no"><!-- https://de.wikipedia.org/wiki/VW_ID.3 -->
<beforebreak>ID.</beforebreak>
<afterbreak>3|Buzz|Crozz</afterbreak>
</rule>
<rule break="no"><!-- Ph.D. (see rule PH_D) -->
-<beforebreak>\bP[Hh]\.?\s?[Dd]\.\s</beforebreak>
+<beforebreak>\bP[Hh]\.?[\s\u00A0]?[Dd]\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no"><!-- "I have a B. Eng. degree" (see rule BACHELOR_ABBR) -->
-<beforebreak>\b(P[hH][dD]|BSc|BEng|BComp|BArch|MSc|MEng|MComp)\.\s</beforebreak>
+<beforebreak>\b(P[hH][dD]|BSc|BEng|BComp|BArch|MSc|MEng|MComp)\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no"><!-- "I have a LL.B degree." (see rule PH_D) -->
-<beforebreak>\bLL\.\s?[BM]\.\s</beforebreak>
+<beforebreak>\bLL\.[\s\u00A0]?[BM]\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no"><!-- B.Eng. (Bachelor of Engineering) -->
-<beforebreak>\b[BM]\.\s?</beforebreak>
+<beforebreak>\b[BM]\.[\s\u00A0]?</beforebreak>
<afterbreak>Eng\.?</afterbreak>
</rule>
<rule break="no"><!-- LL.B. (Bachelor of Laws) -->
-<beforebreak>\bLL\.\s?</beforebreak>
+<beforebreak>\bLL\.[\s\u00A0]?</beforebreak>
<afterbreak>[BM]\.?</afterbreak>
</rule>
<rule break="no"><!-- B.Sc. (Bachelor of Science) -->
-<beforebreak>\b[BM]\.\s?</beforebreak>
+<beforebreak>\b[BM]\.[\s\u00A0]?</beforebreak>
<afterbreak>Sc\.?</afterbreak>
</rule>
<rule break="no"><!-- B.Comp. (Bachelor of Computing) -->
-<beforebreak>\b[BM]\.\s?</beforebreak>
+<beforebreak>\b[BM]\.[\s\u00A0]?</beforebreak>
<afterbreak>Comp?\.?</afterbreak>
</rule>
<rule break="no"><!-- B.Arch. (Bachelor of Architecture) -->
-<beforebreak>\b[BM]\.\s?</beforebreak>
+<beforebreak>\b[BM]\.[\s\u00A0]?</beforebreak>
<afterbreak>Arch\.?</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b[BM]\.?\s?(Sc|Eng|Comp|Arch)\.\s</beforebreak>
+<beforebreak>\b[BM]\.?[\s\u00A0]?(Sc|Eng|Comp|Arch)\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bet\b\s\bal\.\s</beforebreak>
+<beforebreak>\bet\b[\s\u00A0]\bal\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
<beforebreak>\b(a(?:bbrev|uth|bl|bsol|bstr|cc|ccus|dv|dvb|dvs|gst|lt|phet|pp|ppos|cc|dj|djs|rch|rt|ttrib)|A(?:bbrev|uth|bd|berd|berdeensh|bol|borig|bp|br|bridg|bridgem|bsol|bst|bstr|cad|cc|ccept|ccomm|ccompl|ccs|cct|ccts|chievem|dd|ddit|ddr|dm|dmin|dmir|dmon|dmonit|dv|dvancem|dvert|dvoc|dvt|dvts|erodynam|eronaut|ff|ffect|fr|gric|lch|lg|lleg|llit|lm|lph|mer|nal|nalyt|nat|nc|necd|ng|ngl|nim|nn|nniv|nnot|nsw|nt|nthrop|nthropol|ntiq|poc|pol|pp|ppl|pplic|rch|rchaeol|rchipel|rchit|rgt|rith|rithm|rrangem|rtic|rtific|rtill|ssemb|ssoc|ssyriol|str|strol|stron|stronaut|tt|ttrib|ustral|uth|utobiog|utobiogr|yrsh|rab)|B(?:acteriol|edford|edfordsh|elg|erks|erksh|erw|erwicksh|ibliogr|iochem|iog|iogr|iol|ks|ord|ot|raz|rit|ucks|uild|ull|ur)|b(?:ef|etw)|c(?:ent|ollect|olloq|ompar|ompl|onc|oncr|omp|onj|ons|onst|ontempt|orresp|pd|ontr)|C(?:ontradict|ontrib|ontrov|onv|onvent|onversat|onvoc|ornw|oron|orr|orresp|ounc|ourtsh|raniol|raniom|rim|rit|rt|rts|ryptogr|rystallogr|umb|umberld|umbld|ycl|ytol|ollect|onn|al|alc|alend|alif|alligr|amb|ambr|ampanol|anad|anterb|artogr|atal|atech|ath|ent|eram|ert|ertif|hamb|har|harac|has|hem|hesh|hr|hron|hronol|hrons|inematogr|irc|lass|lassif|limatol|lin|oll|olloq|om|omb|ombs|omm|ommandm|ommend|ommerc|ommiss|ommonw|ommunic|omp|ompan|ompar|ompend|ompl|ompos|onc|onch|oncl|onf|onfid|onfl|onfut|ongr|ongreg|ongress|onsc|onsecr|onsid|onsol|onstit|onstr|ontemp|ontempl|ontend|ontent|ontin)|d(?:at|em|ial|im|yslog|ef|eriv|erog)|D(?:au|eb|eclar|ed|ef|eliv|emonstr|ep|epred|epredat|erbysh|escr|evel|evonsh|ial|ict|iffic|irect|is|isc|iscipl|iscov|iscrim|iscuss|iss|istemp|istill|istrib|iv|ivers|oc|octr|omest|urh)|e(?:tym|tymol|uphem|xc|ast|llipt|mph|rron)|E(?:val|vang|ven|vid|vol|xalt|xam|xch|xec|xerc|xhib|xped|xper|xplan|xplic|xplor|xpos|tymol|ccl|ccles|col|con|din|dinb|duc|dw|gypt|gyptol|lectr|lectro-magn|lectro-physiol|lem|liz|lizab|mb|mbryol|ncycl|ng|ngin|nglishw|nq|nt|nthus|ntom|ntomol|nzymol|pil|pisc|pist|pit|quip|ss|ssent|stabl|thnol)|f(?:em|req|ut|am|amil)|F(?:ifesh|ootpr|orfarsh|ortif|ortn|ound|ragm|ratern|riendsh|und|urnit|ab|am|arew)|G(?:ard|astron|az|eo|eog|eogr|eol|eom|eomorphol|er|lac|lasg|los|loss|louc|loucestersh|osp|ram|ynaecol)|g(?:erund|en)|H(?:aematol|ampsh|andbk|ants|eb|en|er|erb|eref|ereford|erefordsh|ertfordsh|ierogl|ist|istol|om|orol|ort|osp|ouseh|ousek|usb|ydraul|ydrol)|hist|I(?:nd|ndustr|nfl|nnoc|norg|nq|nst|ntell|ntellect|nterc|nterl|nternat|nterpr|chth|cthyol|deol|dol|llustr|mag|mpr|naug|nclos|nd|nstr|tal|ntro|ntrod|nv|nvent|nvertebr|nvestig|nvestm|nvoc|rel|mmunol)|i(?:nt|nterj|nterrog|ntr|ntrans|mp|mperf|mpers|mpf|mprop|nstr|nd|ndef|ndic|ndir|nfin|nfl|ron|rreg|mit)|J(?:ahrb|ap|as|rnl|rnls|urisd|urisdict|urispr|ustif|ustific)|joc|K(?:ent|ingd|nowl|pr)|L(?:ab|anc|ancash|ancs|ang|angs|at|d|ds|ect|eechd|eg|eicest|eicester|eicestersh|eics|et|ett|ex|ibr|imnol|incolnsh|incs|ing|inn|it|ithogr|ithol|iturg|ond)|m(?:asc|ed|etaphor|idl|ispr|od)|M(?:ach|ag|agn|an|anagem|anch|anip|anuf|ath|eas|easurem|ech|ed|edit|em|erc|erch|etall|etallif|etallogr|etamorph|etaph|eteorol|eth|etrop|ex|ich|icrobiol|icrosc|il|ilit|in|ineral|isc|iscell|od|onum|orphol|SS|tg|unic|unif|unim|us|yst|yth|ythol)|n(?:once-wd|orth|om)|N(?:arr|arrat|at|aut|av|avig|eighb|erv|eurol|eurosurg|ewc|ewspr|onconf|orf|orthamptonsh|orthants|orthumb|orthumbld|orthumbr|orw|orweg|otts|ucl|umism|on-conf)|o(?:ccas|pp|rig|bj|bl|bs)|O(?:bs|bserv|bstet|bstetr|ccas|ccup|ccurr|ceanogr|ff|ffic|kla|nt|phthalm|phthalmol|ppress|pt|rac|rd|rg|rig|rkn|rnith|rnithol|rthogr|utl|xf|xfordsh|xon|bed|bj)|p(?:ass|erf|ers|ersonif|honet|hr|op|lur|oet|ref|rep|riv|rob|oss|pl|ple|ples|rec|red|redic|ron|ronunc|rop|rov|ropr|seudo-arch|seudo-dial|seudo-Sc|erh|res)|P(?:eriodontol|redict|rerog|sych|sychoanal|sychoanalyt|sychol|sychopathol|ubl|urg|erf|alaeobot|alaeogr|alaeont|alaeontol|araphr|arasitol|arl|arnass|ath|athol|eculat|enins|ers|ersec|erthsh|etrogr|etrol|harm|harmaceut|harmacol|hil|hilad|hilol|hilos|hoen|honol|hotog|hotogr|hrenol|hys|hysiogr|hysiol|ict|oet|ol|olit|olytechn|op|orc|ort|osth|ostm|ott|ract|ref|reh|rehist|resb|reserv|rim|rinc|rint|robab|robl|roc|rod|rol|rov|rovid|rovinc|rovis|ronunc|rop|ros)|Qld|q(?:uot|uots)|r(?:edupl|eg|epr|het|efash|efl|el)|R(?:adiol|eas|eb|ebell|ec|eclam|ecoll|edempt|ef|efl|efus|efut|eg|egic|egist|egr|el|elig|eminisc|emonstr|enfrewsh|eprod|ept|epub|es|esid|et|etrosp|evol|het|ich|om|oxb|oy|udim|uss)|s(?:ing|outh|pec|tr|ubj|ubjunct|ubord|ubseq|ubst|uff|uperl|yll)|S(?:ubj|uff|ubscr|ubscript|uppl|upplic|uppress|urg|urv|ymmetr|ymp|yst|pan|ask|at|ax|cand|ch|ci|cot|cotl|cript|culpt|eismol|el|elect|er|erm|ess|ettlem|ev|hakes|haks|heph|hetl|hropsh|oc|ociol|om|onn|pec|pecif|pecim|pectrosc|taff|tafford|taffordsh|taffs|tand|tat|tatist|tratigr|truct|tud)|t(?:echn|rans|ransf|ransl)|T(?:ransl|ransubstant|rav|reas|reat|reatm|rib|rig|rigonom|rop|roub|roubl|ypog|ypogr|axon|rans|echn|echnol|el|elecomm|elegr|eleph|eratol|erminol|errestr|est|extbk|heat|heatr|heol|heoret|hermonucl|hes|opogr|rag)|U(?:niv|rin)|u(?:nkn|nstr|lt|su)|U(?:nnat|noffic|tilit)|V(?:ac|aledict)|v(?:ar|arr|ars|bl|bs|ulg)|V(?:eg|enet|ertebr|et|ic|ict|ind|indic|irg|irol|oc|ocab|ol|oy|ulg)|W(?:estm|estmld|estmorld|estmrld|ill|ilts|iltsh|is|isd|kly|ks|onderf|orc|orcestersh|orcs|rit|arwicksh)|west|Y(?:earbk|ng|orks|orksh|rs)|Z(?:eitschr|oogeogr|ool))\.\b</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(Atty|Sg?t|[SG]en|Ft|Gov|Hon|Prof|Mr?s|Mt|[DMJS]r|Col|Maj|L(ieu)?t|Brig|Capt|Cmdr|Cmnd|Revd?|Rep)\.\s</beforebreak>
+<beforebreak>\b(Atty|Sg?t|[SG]en|Ft|Gov|Hon|Prof|Mr?s|Mt|[DMJS]r|Col|Maj|L(ieu)?t|Brig|Capt|Cmdr|Cmnd|Revd?|Rep)\.[[\s\u00A0]\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(Atty|Sg?t|[SG]en|Gov|Hon|Prof|Mr?s|[DMJS]r|Col|Maj|L(ieu)?t|Brig|Capt|Cmdr|Cmnd|Revd?|Rep)\.\s[A-Z]\.\s</beforebreak>
+<beforebreak>\b(Atty|Sg?t|[SG]en|Gov|Hon|Prof|Mr?s|[DMJS]r|Col|Maj|L(ieu)?t|Brig|Capt|Cmdr|Cmnd|Revd?|Rep)\.[\s\u00A0][A-Z]\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(Drs|Messrs|Mmes)\.\s</beforebreak>
-<afterbreak>(and\s)|\p{Lu}\p{Ll}+</afterbreak>
+<beforebreak>\b(Drs|Messrs|Mmes)\.[\s\u00A0]</beforebreak>
+<afterbreak>(and[\s\u00A0])|\p{Lu}\p{Ll}+</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bcf\.\s</beforebreak>
+<beforebreak>\bcf\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bI(nc|NC)\.\s</beforebreak>
+<beforebreak>\bI(nc|NC)\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bCorp\.\s</beforebreak>
+<beforebreak>\bCorp\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bBros\.\s</beforebreak>
+<beforebreak>\bBros\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bDist\.\s</beforebreak>
+<beforebreak>\bDist\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bCo\.\s</beforebreak>
+<beforebreak>\bCo\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bo'clock\s</beforebreak>
+<beforebreak>\bo'clock[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bfo'c'sle\s</beforebreak>
+<beforebreak>\bfo'c'sle[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bLtd\.\s</beforebreak>
+<beforebreak>\bLtd\.[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}+</afterbreak>
</rule>
<rule break="no">
<beforebreak>[\[\(]*…[\]\)]* </beforebreak>
<afterbreak>\p{Ll}</afterbreak>
@@ -1338,83 +1346,83 @@
<rule break="no">
<beforebreak>[\.!?…]+\p{Pe} </beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<rule break="no">
-<beforebreak>["”'’]\s*</beforebreak>
-<afterbreak>\s*\p{Ll}</afterbreak>
+<beforebreak>["”'’][\s\u00A0]*</beforebreak>
+<afterbreak>[\s\u00A0]*\p{Ll}</afterbreak>
</rule>
<rule break="no">
-<beforebreak>['"„][\.!?…]['"”]\s</beforebreak>
+<beforebreak>['"„][\.!?…]['"”][\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b\p{L}\.\s</beforebreak>
-<afterbreak>\p{L}\.\s</afterbreak>
+<beforebreak>\b\p{L}\.[\s\u00A0]</beforebreak>
+<afterbreak>\p{L}\.[\s\u00A0]</afterbreak>
</rule>
<rule break="no">
<beforebreak>\b\p{L}\.</beforebreak>
<afterbreak>\p{L}\.</afterbreak>
</rule>
<rule break="no"><!-- Jones v. Smith -->
-<beforebreak>\p{Lu}\p{L}+\sv\.\s</beforebreak>
+<beforebreak>\p{Lu}\p{L}+[\s\u00A0]v\.[\s\u00A0]</beforebreak>
<afterbreak>\p{Lu}\p{L}+</afterbreak>
</rule>
<rule break="yes">
-<beforebreak>[^,][\s]\p{L}{2}\.\s</beforebreak>
-<afterbreak>\p{N}+\)\s</afterbreak>
+<beforebreak>[^,][\s\u00A0]\p{L}{2}\.[\s\u00A0]</beforebreak>
+<afterbreak>\p{N}+\)[\s\u00A0]</afterbreak>
</rule>
<rule break="yes">
-<beforebreak>\bOK\.\s</beforebreak>
+<beforebreak>\bOK\.[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}+</afterbreak>
</rule>
<rule break="no">
-<beforebreak>[\.\s](?!(on|it|of|to|be|by|at|he|we|so|do|if|up|my|me|us|go|am))\p{L}{1,2}\.\s</beforebreak><!-- not 'no'/'in', these could be abbreviations-->
+<beforebreak>[\.\s\u00A0](?!(on|it|of|to|be|by|at|he|we|so|do|if|up|my|me|us|go|am))\p{L}{1,2}\.[\s\u00A0]</beforebreak><!-- not 'no'/'in', these could be abbreviations-->
<afterbreak>[\p{N}\p{Ll}]</afterbreak>
</rule>
<rule break="no">
<beforebreak>[\[\(]*\.\.\.[\]\)]* </beforebreak>
<afterbreak>[^\p{Lu}]</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b\p{Lu}\.\s\p{Lu}\.\s</beforebreak>
+<beforebreak>\b\p{Lu}\.[\s\u00A0]\p{Lu}\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b\p{Lu}\.\p{Lu}\.\s</beforebreak>
+<beforebreak>\b\p{Lu}\.\p{Lu}\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>[^\.]\s[A-Z]\.\s</beforebreak>
+<beforebreak>[^\.][\s\u00A0][A-Z]\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(:?Blvd|Ave|Mts?)\.\s</beforebreak>
+<beforebreak>\b(:?Blvd|Ave|Mts?)\.[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}+</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(?:Kan|Ill|M[ai]ss)\.\s</beforebreak>
+<beforebreak>\b(?:Kan|Ill|M[ai]ss)\.[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}+</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\(\p{Ll}+\.\s</beforebreak>
+<beforebreak>\(\p{Ll}+\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no"><!-- i.e. -->
-<beforebreak>i\.e\.\s</beforebreak><!-- "i.e." is never at end of sentence -->
+<beforebreak>i\.e\.[\s\u00A0]</beforebreak><!-- "i.e." is never at end of sentence -->
<afterbreak></afterbreak>
</rule>
<rule break="yes">
-<beforebreak>[\.!?…][\u00BB\u2019\u201D\u203A"'\p{Pe}\u0002¹²³]*\s</beforebreak>
+<beforebreak>[\.!?…][\u00BB\u2019\u201D\u203A"'\p{Pe}\u0002¹²³]*[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="yes">
<beforebreak>[\.!?…]['"\u00BB\u2019\u201D\u203A\p{Pe}\u0002]*</beforebreak>
<afterbreak>\p{Lu}[^\p{Lu}]</afterbreak>
</rule>
<rule break="yes">
-<beforebreak>\s\p{L}[\.!?…]\s</beforebreak>
+<beforebreak>[\s\u00A0]\p{L}[\.!?…][\s\u00A0]</beforebreak>
<afterbreak>\p{Lu}\p{Ll}</afterbreak>
</rule>
</languagerule>
<languagerule languagerulename="Romanian">
<rule break="no">
@@ -1509,10 +1517,15 @@
<beforebreak>\s\p{L}[\.!?…]\s</beforebreak>
<afterbreak>\p{Lu}\p{Ll}</afterbreak>
</rule>
</languagerule>
<languagerule languagerulename="Dutch">
+<rule break="no">
+<!-- .Net -->
+<beforebreak>\s[.]</beforebreak>
+<afterbreak>[Nn][Ee][Tt](\b|-)</afterbreak>
+</rule>
<rule break="no"><!-- quoted sentence in sentence -->
<beforebreak>[.?!][’'"]</beforebreak>
<afterbreak> [a-z]</afterbreak>
</rule>
<rule break="no"><!-- URLs without "www."-->
@@ -1727,10 +1740,35 @@
</rule>
<rule break="yes">
<beforebreak>[?!.]\s</beforebreak>
<afterbreak>['"\u00BB\u2019\u201D\u203A\u00AB\p{Pe}\u0002][A-Z][a-z]</afterbreak>
</rule>
+<rule break="no">
+<!-- "E. coli etc. -->
+<beforebreak>"[A-Z][.]\s</beforebreak>
+<afterbreak>[a-z]</afterbreak>
+</rule>
+<rule break="no">
+<!-- Cornelisz. -->
+<beforebreak>[A-Z][a-z].*sz[.]\s</beforebreak>
+<afterbreak>[a-z]</afterbreak>
+</rule>
+<rule break="no">
+<!-- De n. XIV/vagus (nervus) -->
+<beforebreak>De n[.]\s</beforebreak>
+<afterbreak>[a-z]|[XIV]</afterbreak>
+</rule>
+<rule break="no">
+<!-- MOL.E -->
+<beforebreak>[A-Z]{2,5}[.]</beforebreak>
+<afterbreak>[A-Z]</afterbreak>
+</rule>
+<rule break="no">
+<!-- ..." betekent -->
+<beforebreak>\.\.</beforebreak>
+<afterbreak>" [a-z]</afterbreak>
+</rule>
<!-- ##### end of Dutch #### -->
</languagerule>
<languagerule languagerulename="Slovak">
<rule break="no">
<beforebreak>\b(Bc|Mgr|RNDr|PharmDr|PhDr|JUDr|PaedDr|ThDr|Ing|MUDr|MDDr|MVDr|Dr|ThLic|PhD|ArtD|ThDr|Dr|DrSc|CSs|prof)\.\s</beforebreak>
@@ -4554,150 +4592,150 @@
<afterbreak>\p{Lu}\p{Ll}</afterbreak>
</rule>
</languagerule>
<languagerule languagerulename="Catalan">
<rule break="no">
-<beforebreak>Yahoo!\s</beforebreak>
+<beforebreak>Yahoo![\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<rule break="yes">
-<beforebreak>\w['’][nNtT]\.\s</beforebreak>
+<beforebreak>\w['’][nNtT]\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="yes">
-<beforebreak>\.\[\d+\]\s</beforebreak>
+<beforebreak>\.\[\d+\][\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- initials: A. C. Jones. Problem: [...] d'Alfons I. Ell era [...] -->
<rule break="no">
-<beforebreak>\b[A-ZÀÉÈÍÓÒÚ]\.\s</beforebreak>
+<beforebreak>\b[A-ZÀÉÈÍÓÒÚ]\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- Abbreviations that cannot finish sentences-->
<rule break="no">
-<beforebreak>\b(dc|(?iu)(n|Mr|C|Dr|Dra|Dra\. Ma|Sta\. Ma|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd))\.\s</beforebreak>
+<beforebreak>\b(dc|(?iu)(n|Mr|C|Dr|Dra|Dra\. Ma|Sta\. Ma|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd))\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- Abbreviations that can finish sentences -->
<rule break="no">
-<beforebreak>\b(s|ca)\.\s</beforebreak>
+<beforebreak>\b(s|ca)\.[\s\u00A0]</beforebreak>
<afterbreak>[XIV]+\b</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(min|m|ca)\.\s</beforebreak>
+<beforebreak>\b(min|m|ca)\.[\s\u00A0]</beforebreak>
<afterbreak>[0-9]+\b</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b([Cc]ap|[Aa]rts?|pp|[Vv]ol)\.\s</beforebreak>
+<beforebreak>\b([Cc]ap|[Aa]rts?|pp|[Vv]ol)\.[\s\u00A0]</beforebreak>
<afterbreak>[XIV\d]+\b</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b([Ee]ds?|[Cc]oords?|\d+(r|n|t|è|é|ns|es)|masc|fem|sing|pl|adj|adv|g|kg|m|km|cm|ha|u|h|hrs|s|ss|alt|cant|cast|cert|com|dir|gr|nom|parc|pres|set|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*\s</beforebreak>
+<beforebreak>\b([Ee]ds?|[Cc]oords?|\d+(r|n|t|è|é|ns|es)|masc|fem|sing|pl|adj|adv|g|kg|m|km|cm|ha|u|h|hrs|s|ss|alt|cant|cast|cert|com|dir|gr|nom|parc|pres|set|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
<afterbreak>[\-¡¿«»"'\u2018\u201C\p{Ps}\u2012\u2013\u2014\u2015\u2053]*\p{Ll}</afterbreak>
</rule>
<!-- Any word in acronyms like U.S.A.F or F. B. I. or C. or c.s.p. or p. e. -->
<rule break="no">
-<beforebreak>\b(\p{L}\.)+[\p{Pe}\p{Pf}\p{Pd}"']*\s</beforebreak>
+<beforebreak>\b(\p{L}\.)+[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<!-- Any word in acronyms like EE.UU. or BB. DD. -->
<rule break="no">
-<beforebreak>\b([\p{Lu}]{2}\.)+[\p{Pe}\p{Pf}\p{Pd}"']*\s</beforebreak>
+<beforebreak>\b([\p{Lu}]{2}\.)+[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bEE\.\s?</beforebreak>
-<afterbreak>UU</afterbreak>
+<beforebreak>\b\p{Lu}{2}\.[\s\u00A0]?</beforebreak>
+<afterbreak>\p{Lu}{2}</afterbreak>
</rule>
<rule break="no">
-<beforebreak>EE\.\s?UU\.\s?</beforebreak>
+<beforebreak>EE\.[\s\u00A0]?UU\.[\s\u00A0]?</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<!-- max min etc -->
<rule break="no">
-<beforebreak>\b([Ee]tc|m[aáà]x|m[ií]n|aprox|\d+o)\.[\p{Pe}\p{Pf}\p{Pd}"']*\s</beforebreak>
+<beforebreak>\b([Ee]tc|m[aáà]x|m[ií]n|aprox|\d+o)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<!-- Composed abbrev. -->
<rule break="no">
-<beforebreak>\bet al\.[\p{Pe}\p{Pf}\p{Pd}"']*\s</beforebreak>
+<beforebreak>\bet al\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- Units -->
<rule break="no">
-<beforebreak>\b([Pp]ta[s]?|K[gm][s]|[mc]?[gmls]|[Hh](rs)?)\.[\p{Pe}\p{Pf}\p{Pd}"']*\s</beforebreak>
+<beforebreak>\b([Pp]ta[s]?|K[gm][s]|[mc]?[gmls]|[Hh](rs)?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<!-- Ellipsis: ... lowercase -->
<rule break="no">
-<beforebreak>[^\s](\Q...\E|…)\s</beforebreak>
+<beforebreak>[^\s\u00A0](\Q...\E|…)[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<!-- (enum...) -->
<rule break="no">
-<beforebreak>\b(\Q...\E|…)[\p{Pe}»"’”]\s</beforebreak>
+<beforebreak>\b(\Q...\E|…)[\p{Pe}»"’”][\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<!-- pero ¡ah! no estaba
<rule break="no">
-<beforebreak>\b¡\p{L}+!\s</beforebreak>
+<beforebreak>\b¡\p{L}+![\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
-->
<rule break="yes">
-<beforebreak>[\.…][\u00BB\u2019\u201D\u203A"'\u0002]*\s</beforebreak>
+<beforebreak>[\.…][\u00BB\u2019\u201D\u203A"'\u0002]*[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="yes">
-<beforebreak>\b[\p{L}'’·\-]+[\p{Pf}\p{Pe}\u00BB\u2019\u201D\u203A"'\u0002]*[\.:!?…]+\s</beforebreak>
+<beforebreak>\b[\p{L}'’·\-]+[\p{Pf}\p{Pe}\u00BB\u2019\u201D\u203A"'\u0002]*[\.:!?…]+[\s\u00A0]</beforebreak>
<afterbreak>[¡¿«»"'\u2018\u201C"\p{Ps}]*\p{Lu}\p{L}*</afterbreak>
</rule>
<!-- paragraphs with opening "»" in dialogs-->
<rule break="yes">
-<beforebreak>[\.:!?…»]+\s</beforebreak>
-<afterbreak>»[^\s\.:!?…]</afterbreak>
+<beforebreak>[\.:!?…»]+[\s\u00A0]</beforebreak>
+<afterbreak>»[^\u00A0\s\.:!?…]</afterbreak>
</rule>
</languagerule>
<languagerule languagerulename="Spanish">
<rule break="no">
-<beforebreak>Yahoo!\s</beforebreak>
+<beforebreak>Yahoo![\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<rule break="yes">
-<beforebreak>\.\[\d+\]\s</beforebreak>
+<beforebreak>\.\[\d+\][\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- initials: A. C. Jones. Problem: [...] de Alfons I. Él era [...] -->
<rule break="no">
-<beforebreak>\b[A-ZÀÉÈÍÓÒÚ]\.\s</beforebreak>
+<beforebreak>\b[A-ZÀÉÈÍÓÒÚ]\.[\s\u00A0]</beforebreak>
<afterbreak/>
</rule>
<!-- Ellipsis: ... lowercase -->
<rule break="no">
-<beforebreak>[^\s](\Q...\E|…)\s</beforebreak>
+<beforebreak>[^\s\u00A0](\Q...\E|…)[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<!-- (enum...) -->
<rule break="no">
-<beforebreak>\b(\Q...\E|…)[\p{Pe}»"’”]\s</beforebreak>
+<beforebreak>\b(\Q...\E|…)[\p{Pe}»"’”][\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<!-- Abbreviations that can finish sentences -->
<rule break="no">
-<beforebreak>\b(s|ca)\.\s</beforebreak>
+<beforebreak>\b(s|ca)\.[\s\u00A0]</beforebreak>
<afterbreak>[XIV]+\b</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(min|m|ca)\.\s</beforebreak>
+<beforebreak>\b(min|m|ca)\.[\s\u00A0]</beforebreak>
<afterbreak>[0-9]+\b</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b([Cc]ap|[Aa]rts?|pp|[Vv]ol|p|[Pp][aá]gs?|ps)\.\s</beforebreak>
+<beforebreak>\b([Cc]ap|[Aa]rts?|pp|[Vv]ol|p|[Pp][aá]gs?|ps)\.[\s\u00A0]</beforebreak>
<afterbreak>[XIV\d]+\b</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(\d+(r|er|n|ero|era|mo|ma|vo|va|no|na|to|ta|do|da|h|hr|gr|grs|o|a)s?|g|kg|m|km|cm|ha|u|h|hrs|H|HR|HRS|s|ss|alt|cant|cast|cert|com|dir|gr|nom|parc|pres|set|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*\s</beforebreak>
+<beforebreak>\b(\d+(r|er|n|ero|era|mo|ma|vo|va|no|na|to|ta|do|da|h|hr|gr|grs|o|a)s?|g|kg|m|km|cm|ha|u|h|hrs|H|HR|HRS|s|ss|alt|cant|cast|cert|com|dir|gr|nom|parc|pres|set|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
<afterbreak>[\-¡¿«»"'\u2018\u201C\p{Ps}\u2012\u2013\u2014\u2015\u2053]*\p{Ll}</afterbreak>
</rule>
<rule break="no">
<!-- URLs without "www."-->
<beforebreak>\b(https?|ftp|file|chrome|chromium|android|(chrome|moz)\-extension):///?[A-Za-z0-9\-]+\.</beforebreak>
@@ -4708,79 +4746,79 @@
<beforebreak>\b[A-Za-z0-9\-]+\.</beforebreak>
<afterbreak>[A-Za-z0-9\-]+\.(com|net|org|info|de|es|edu|co|eu|nl|io|cn|uk|gov|biz|ca|tk|ru|br|jp|pl)(\.|\b)</afterbreak>
</rule>
<!-- Abbreviations that cannot finish sentences-->
<rule break="no">
-<beforebreak>\b(dc|(?iu)(n|[Aa]yto|Mr|C|Dr|Dra|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Sras|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd))\.\s</beforebreak>
+<beforebreak>\b(dc|(?iu)(n|[Aa]yto|Mr|C|Dr|Dra|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Sras|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd))\.[\s\u00A0]</beforebreak>
<afterbreak/>
</rule>
<rule break="no">
-<beforebreak>\b([Aa]vda|[Pp][ol]|Pl?za|[Aa]dm|[Dd]pto|Sr|Mr|Srta|ej)\.\s</beforebreak>
+<beforebreak>\b([Aa]vda|[Pp][ol]|Pl?za|[Aa]dm|[Dd]pto|Sr|Mr|Srta|ej)\.[\s\u00A0]</beforebreak>
<afterbreak/>
</rule>
<rule break="no">
-<beforebreak>\b(Dña|Dr[a]?|Sra|Sto|S(ri)?ta|Ldo|Ing|Prof|Excmo|Ilmo|Mgfco|admdor|admdora)\.\s</beforebreak>
+<beforebreak>\b(Dña|Dr[a]?|Sra|Sto|S(ri)?ta|Ldo|Ing|Prof|Excmo|Ilmo|Mgfco|admdor|admdora)\.[\s\u00A0]</beforebreak>
<afterbreak/>
</rule>
<rule break="no">
-<beforebreak>\b([Aa]rt|[Cc]ód|[Ss]ecc|[Tt]ít)\.\s</beforebreak>
+<beforebreak>\b([Aa]rt|[Cc]ód|[Ss]ecc|[Tt]ít)\.[\s\u00A0]</beforebreak>
<afterbreak/>
</rule>
<rule break="no">
-<beforebreak>\b([Ee]d(it)?|[Nn]o|n|[Nn]úm|[Pp]ág|p|c|\d+er)|[V\.]gr\.\s</beforebreak>
+<beforebreak>\b([Ee]d(it)?|[Nn]o|n|[Nn]úm|[Pp]ág|p|c|\d+er)|[V\.]gr\.[\s\u00A0]</beforebreak>
<afterbreak/>
</rule>
<!-- Abbreviations that can finish sentences -->
<rule break="no">
-<beforebreak>\b([Ee]ds?|[Cc]oords?|grs?|Sr|Jr|Admón|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*\s</beforebreak>
+<beforebreak>\b([Ee]ds?|[Cc]oords?|grs?|Sr|Jr|Admón|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
<afterbreak>[\-¡¿«»"'\u2018\u201C\p{Ps}\u2012\u2013\u2014\u2015\u2053]*\p{Ll}</afterbreak>
</rule>
<!-- Any word in acronyms like U.S.A.F or F. B. I. or C. or c.s.p. or p. e. -->
<rule break="no">
-<beforebreak>\b(\p{L}\.)+[\p{Pe}\p{Pf}\p{Pd}"']*\s</beforebreak>
+<beforebreak>\b(\p{L}\.)+[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<!-- Any word in acronyms like EE.UU. or BB. DD. -->
<rule break="no">
-<beforebreak>\b([\p{Lu}]{2}\.)+[\p{Pe}\p{Pf}\p{Pd}"']*\s</beforebreak>
+<beforebreak>\b([\p{Lu}]{2}\.)+[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bEE\.\s?</beforebreak>
-<afterbreak>UU</afterbreak>
+<beforebreak>\b\p{Lu}{2}\.[\s\u00A0]?</beforebreak>
+<afterbreak>\p{Lu}{2}</afterbreak>
</rule>
<rule break="no">
-<beforebreak>EE\.\s?UU\.\s?</beforebreak>
+<beforebreak>EE\.[\s\u00A0]?UU\.[\s\u00A0]?</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<!-- max min etc -->
<rule break="no">
-<beforebreak>\b([Ee]tc|m[aá]x|m[ií]n|aprox|\d+o)\.[\p{Pe}\p{Pf}\p{Pd}"']*\s</beforebreak>
+<beforebreak>\b([Ee]tc|m[aá]x|m[ií]n|aprox|\d+o)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<!-- Composed abbrev. -->
<rule break="no">
-<beforebreak>\bet al\.[\p{Pe}\p{Pf}\p{Pd}"']*\s</beforebreak>
+<beforebreak>\bet al\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
<afterbreak/>
</rule>
<!-- Units -->
<rule break="no">
-<beforebreak>\b([Pp]ta[s]?|K[gm][s]|[mc]?[gmls]|[Hh](rs)?)\.[\p{Pe}\p{Pf}\p{Pd}"']*\s</beforebreak>
+<beforebreak>\b([Pp]ta[s]?|K[gm][s]|[mc]?[gmls]|[Hh](rs)?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<rule break="yes">
-<beforebreak>[\.…][\u00BB\u2019\u201D\u203A"'\u0002]*\s</beforebreak>
+<beforebreak>[\.…][\u00BB\u2019\u201D\u203A"'\u0002]*[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="yes">
-<beforebreak>\b[\p{L}'’·\-]+[\p{Pf}\p{Pe}\u00BB\u2019\u201D\u203A"'\u0002]*[\.:!?…]+\s</beforebreak>
+<beforebreak>\b[\p{L}'’·\-]+[\p{Pf}\p{Pe}\u00BB\u2019\u201D\u203A"'\u0002]*[\.:!?…]+[\s\u00A0]</beforebreak>
<afterbreak>[¡¿«»"'\u2018\u201C"\p{Ps}]*\p{Lu}\p{L}*</afterbreak>
</rule>
<!-- paragraphs with opening "»" in dialogs-->
<rule break="yes">
-<beforebreak>[\.:!?…»]+\s</beforebreak>
-<afterbreak>»[^\s\.:!?…]</afterbreak>
+<beforebreak>[\.:!?…»]+[\s\u00A0]</beforebreak>
+<afterbreak>»[^\u00A0\s\.:!?…]</afterbreak>
</rule>
</languagerule>
<languagerule languagerulename="German">
<rule break="no"><!-- URLs without "www."-->
<beforebreak>\b(https?|ftp|file|chrome|chromium|android|(chrome|moz)\-extension):///?[A-Za-z0-9\-]+\.</beforebreak>
@@ -4790,21 +4828,21 @@
<beforebreak>\b[A-Za-z0-9\-]+\.</beforebreak>
<afterbreak>[A-Za-z0-9\-]+\.(com|net|org|info|de|es|edu|co|eu|nl|io|cn|uk|gov|biz|ca|tk|ru|br|jp|pl)(\.|\b)</afterbreak>
</rule>
<!--support simple lists in markdown style-->
<rule break="yes">
-<beforebreak>\r?\n\s*[-*]+\s</beforebreak>
+<beforebreak>\r?\n[\u00A0\s]*[-*]+[\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- Split at e.g. "1a. Und ..." -->
<rule break="yes">
-<beforebreak>\d+[a-z]\.\s</beforebreak>
+<beforebreak>\d+[a-z]\.[\u00A0\s]</beforebreak>
<afterbreak>\p{Lu}</afterbreak>
</rule>
<!-- Don't split at e.g. "d. h." -->
<rule break="no">
-<beforebreak>[^-\p{L}'’/]\p{L}[\.!?…]['|"|“|«|\)|\]|\}]?\s</beforebreak>
+<beforebreak>[^-\p{L}'’/]\p{L}[\.!?…]['|"|“|«|\)|\]|\}]?[\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
<beforebreak>Ust.</beforebreak><!-- needed for German rule UST_ID -->
<afterbreak>Id</afterbreak>
@@ -4824,11 +4862,11 @@
<rule break="no"><!-- https://de.wikipedia.org/wiki/VW_ID.3 -->
<beforebreak>ID.</beforebreak>
<afterbreak>3|Buzz|Crozz</afterbreak>
</rule>
<rule break="no">
-<beforebreak>[1-3]\.\s</beforebreak>
+<beforebreak>[1-3]\.[\u00A0\s]</beforebreak>
<afterbreak>Liga|Bundesliga|Fußball(-B|b)undesliga</afterbreak>
</rule>
<rule break="no">
<beforebreak>\bP[Hh]\.</beforebreak>
<afterbreak>D\.</afterbreak>
@@ -4839,138 +4877,138 @@
<afterbreak></afterbreak>
</rule>
<!-- Don't split after a white-space followed by a single letter followed
by a dot followed by another whitespace. e.g. " p. " -->
<rule break="no">
-<beforebreak>\s\p{L}\.\s</beforebreak>
+<beforebreak>[\u00A0\s]\p{L}\.[\u00A0\s]</beforebreak>
<afterbreak>\p{L}\.</afterbreak>
</rule>
<!-- Don't split at "bla bla... yada yada" -->
<rule break="no">
-<beforebreak>[\[\(]?\.\.\.[\]\)]?\s</beforebreak>
+<beforebreak>[\[\(]?\.\.\.[\]\)]?[\u00A0\s]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<!-- Don't split [.?!] when they're quoted -->
<rule break="no">
-<beforebreak>['"„][\.!?…]['"“]\s</beforebreak>
+<beforebreak>['"„][\.!?…]['"“][\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- Don't break after quote unless there's a capital letter
e.g.: "That's right!" he said. -->
<rule break="no">
-<beforebreak>["'“]\s</beforebreak>
+<beforebreak>["'“][\u00A0\s]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<!-- e.g. "Das ist . so." - assume one sentence. -->
<rule break="no">
-<beforebreak>\s([\.!?]{1,3}|…)['|"|“|«|\)|\]|\}]?\s</beforebreak>
+<beforebreak>[\u00A0\s]([\.!?]{1,3}|…)['|"|“|«|\)|\]|\}]?[\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- Numbers, dates e.g. "3.10. datiert" -->
<rule break="no">
-<beforebreak>\b\d+\.\s</beforebreak>
+<beforebreak>\b\d+\.[\u00A0\s]</beforebreak>
<afterbreak>\p{Ll}|\p{Lu}{2,}</afterbreak>
</rule>
<!-- z.B. "Das hier ist ein(!) Satz." -->
<rule break="no">
-<beforebreak>[\(\[][!?]{1,3}[\]\)]\s</beforebreak>
+<beforebreak>[\(\[][!?]{1,3}[\]\)][\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- z.B. "Das hier ist (genau!) ein Satz." -->
<rule break="no">
-<beforebreak>[!?]{1,3}[\)\]]\s</beforebreak>
+<beforebreak>[!?]{1,3}[\)\]][\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- z.B. "bla (...) blubb" -> kein Satzende -->
<rule break="no">
-<beforebreak>[\(\)\[\]]\s</beforebreak>
+<beforebreak>[\(\)\[\]][\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- don't split at cases like "Friedrich II. wird auch..." -->
<rule break="no">
-<beforebreak>[\s ][IVX]+\.\s</beforebreak>
+<beforebreak>[\u00A0\s ][IVX]+\.[\u00A0\s]</beforebreak>
<afterbreak>[^\p{Lu}]+</afterbreak>
</rule>
<!-- don't split at cases like "im 13. oder 14. Jahrhundert" -->
<rule break="no">
-<beforebreak>\d+\.\s</beforebreak>
-<afterbreak>(und|oder|bis)\s</afterbreak>
+<beforebreak>\d+\.[\u00A0\s]</beforebreak>
+<afterbreak>(und|oder|bis)[\u00A0\s]</afterbreak>
</rule>
<!-- einige deutsche Monate, vor denen eine Zahl erscheinen kann,
ohne dass eine Satzgrenze erkannt wird
(z.B. "am 13. Dezember" -> keine Satzgrenze) -->
<rule break="no">
-<beforebreak>\d+\.\s</beforebreak>
+<beforebreak>\d+\.[\u00A0\s]</beforebreak>
<afterbreak>Januar|Jänner|Februar|März|Merz|April|Mai|Ju[ln]i|August|September|Oktober|November|Dezember</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\d+\.\s</beforebreak>
+<beforebreak>\d+\.[\u00A0\s]</beforebreak>
<afterbreak>J[aä]n|Febr?|Mär|Apr|Mai|Ju[nl]|Aug|Sept?|Okt|Nov|Dez</afterbreak>
</rule>
<rule break="no">
-<beforebreak>(Jan|Jän|Febr?|Mär|Apr|Mai|Ju[nl]|Aug|Sept?|Okt|Nov|Dez)\.\s</beforebreak>
+<beforebreak>(Jan|Jän|Febr?|Mär|Apr|Mai|Ju[nl]|Aug|Sept?|Okt|Nov|Dez)\.[\u00A0\s]</beforebreak>
<afterbreak>\d\d(\d\d)?</afterbreak>
</rule>
<!-- ähnliche Fälle außerhalb der Monatsnamen -->
<rule break="no">
-<beforebreak>\d+\.\s</beforebreak>
+<beforebreak>\d+\.[\u00A0\s]</beforebreak>
<afterbreak>Amtsperiode|Breitengrads?|Breitengrades|Jubiläum|Jhd?|Jhdts?|Konferenz|(Jahres|Partei)(-K|k)onferenz|Längengrade?s?|Tags?|Tages|(Jahres|Spiel|Partei|Geburts)tag|(Jahres|Spiel|Partei|Geburts)tages|(Jahres|Spiel|Partei|Geburts)tags|Jahrhunderts?|Jahrtausend|Platz|Platzes|Lebensjahrs?|Lebensjahres|Lochs?|Loches|Grads|Grades|Obergeschoss|Stock(werk)?s?|Etage|Klasse|Runde|Bezirk|Etappe|Staffel|Sinfonie</afterbreak>
</rule>
<!-- English abbreviations - but these work globally for all languages -->
<rule break="no">
-<beforebreak>\b(Mrs?|No|pp|St|no|Sr|Jr|Bros|etc|[Bb]tw|vs|esp|[Ff]ig|Jan|Feb|Mar|Apr|Ju[nl]|Aug|Sept?|O[ck]t|Nov|Dec|PhD|BSc|BEng|BComp|BArch|al|cf|Inc|Ms|MEng|MSc|MComp|Gen|Sen|Prof|Corp|Co|co|Ltd)\.\s</beforebreak>
+<beforebreak>\b(Mrs?|No|pp|St|no|Sr|Jr|Bros|etc|[Bb]tw|vs|esp|[Ff]ig|Jan|Feb|Mar|Apr|Ju[nl]|Aug|Sept?|O[ck]t|Nov|Dec|PhD|BSc|BEng|BComp|BArch|al|cf|Inc|Ms|MEng|MSc|MComp|Gen|Sen|Prof|Corp|Co|co|Ltd)\.[\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- Latin abbreviations - but these work globally for all languages -->
<rule break="no">
-<beforebreak>\b(spp?)\.\s</beforebreak>
+<beforebreak>\b(spp?)\.[\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- German abbreviations -->
<rule break="no">
-<beforebreak>\b(ggü|Mag|mtl|versch|d|Übers|usw|Bzw|bzw|Ab[hkst]|abzgl|Abzw|ahd|Akk|aktual|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|autom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|bspw|btto|bw)\.\s</beforebreak>
+<beforebreak>\b(ggü|Mag|mtl|versch|d|Übers|usw|Bzw|bzw|Ab[hkst]|abzgl|bezgl|Abzw|ahd|Akk|aktual|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|autom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|bspw|btto|bw)\.[\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(cts?|Ca|ca|chem|chin|Chr|cresc|dat|Dat|desgl|ders|dgl|Dipl|Dir?|Doz?|durchg|durchges|Dr|dt|ebd|Ed|eigtl|Eigtl|Engl|engl|Erg|al|et[cw]|Etw|ev(tl)?|Evtl|exkl|Expl|Exz)\.\s</beforebreak>
+<beforebreak>\b(cts?|Ca|ca|chem|chin|Chr|cresc|dat|Dat|desgl|ders|dgl|Dipl|Dir?|Doz?|durchg|durchges|Dr|dt|ebd|Ed|eigtl|Eigtl|eigl|Eigl|akt|Engl|engl|Erg|al|et[cw]|Etw|ev(tl)?|Evtl|exkl|Expl|Exz)\.[\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bDipl\.-[A-Z][a-z]{2,4}\.\s</beforebreak>
+<beforebreak>\bDipl\.-[A-Z][a-z]{2,4}\.[\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(ff|Fa|fachspr|fam|fem|Fem|Fr|franz|frz?|frdl|Frl|Fut|Gd|gebr?|Gebr|geh|geleg|gen|Gen|germ|gesch|ges|get|ggf|Ggf|Ggs|ggT|Gr|[Gg]rds|griech)\.\s</beforebreak>
+<beforebreak>\b(ff|Fa|fachspr|fam|fem|Fem|Fr|franz|frz?|frdl|Frl|Fut|Gd|gebr?|Gebr|geh|geleg|gen|Gen|germ|gesch|ges|get|ggf|Ggf|Ggs|ggT|Gr|[Gg]rds|griech)\.[\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(hebr|hg|hl|Hrsg|Hg|hist|hochd|hochspr|Hptst|Hr|hrsg|Allg|IdNr|ill|inkl|incl|Ind|Inf|Ing|ital|Tr|jap|Jb|Jg|Jhd?|Jhdts?|jmd[mns]?|jur|Kap|kart|kath|kfm|kaufm|Kfm|kgl|Kl|Konj|königl|Krs?|Kto)\.\s</beforebreak>
+<beforebreak>\b(hebr|hg|hl|Hrsg|Hg|hist|hochd|hochspr|Hptst|Hr|hrsg|Allg|IdNr|ill|inkl|incl|Ind|Inf|Ing|ital|Tr|jap|Jb|Jg|Jhd?|Jhdts?|jmd[mns]?|jur|Kap|kart|kath|kfm|kaufm|Kfm|kgl|Kl|Konj|königl|Krs?|Kto)\.[\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(lat|lfd|Lit|lt|Lz|Mask|mask|max|Mrd|mdal|me[dt]|phil|mhd|Mio?|mind?|Mo|mod|nachm|nördlBr|neutr|Nhd|Nom|Nrn?|Num|Obj|od|dgl|offz)\.\s</beforebreak>
+<beforebreak>\b(lat|lfd|Lit|lt|Lz|Mask|mask|max|Mrd|mdal|me[dt]|phil|mhd|Mio?|mind?|Mo|mod|nachm|nördlBr|neutr|Nhd|Nom|Nrn?|Num|Obj|od|dgl|offz)\.[\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(Part|Per[fs]|Pfd|Pl(ur)?|pl|Plusq|Pos|pp|Prä[ps]|Prät|Pro[vf]|rd|reg|resp|Rhld|rit|Sa|südl|Br|se[ln]|Sept|Sing|sign|So|sog|Sp|Std?|stacc|Str|stud|Subst|sva|svw|sZ)\.\s</beforebreak>
+<beforebreak>\b(Part|Per[fs]|Pfd|Pl(ur)?|pl|Plusq|Pos|pp|Prä[ps]|Prät|Pro[vf]|rd|reg|resp|Rhld|rit|Sa|südl|Br|se[ln]|Sept|Sing|sign|So|sog|Sp|Std?|stacc|Str|stud|Subst|sva|svw|sZ)\.[\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(Tel|teilw|Temp|trans|Tsd|übertr|übl|ff|überarb|ugs|univ|unveränd|urspr|USt|UST|USt\-IdNr|sw|vgl|vlt|Vlt|vllt|Vllt|Vgl|Vol|vollst|vorm|Vp|Vs|vs|wesentl|wg|Whg|Hd|Ztr|zus|Zus|zzt?|zzgl|zB|zb|Zz|Zt|zw|Min|Bzgl|bzgl|bezügl|Frhr|ggfs|insb|autom|Mw[sS]t)\.\s</beforebreak>
+<beforebreak>\b(Tel|teilw|Temp|trans|Tsd|übertr|übl|ff|überarb|ugs|univ|unveränd|urspr|USt|UST|USt\-IdNr|sw|vgl|vll|Vll|vlt|Vlt|vllt|Vllt|Vgl|Vol|vollst|vorm|Vp|Vs|vs|wesentl|wg|Whg|Hd|Ztr|zus|Zus|zzt?|zzgl|zB|zb|Zz|Zt|zw|Min|Bzgl|bzgl|bezügl|Frhr|ggfs|insb|autom|Mw[sS]t)\.[\u00A0\s]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- Break rules -->
<rule break="yes">
-<beforebreak>[\.!?…][\u0002|'|"|“|«|‹|\)|\]|\}¹²³]?\s+</beforebreak>
+<beforebreak>[\.!?…][\u0002|'|"|“|«|‹|\)|\]|\}¹²³]?[\u00A0\s]+</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="yes">
<beforebreak>[\.!?…]['"“\p{Pe}\u00BB\u201D]?</beforebreak>
<afterbreak>\p{Lu}[^\p{Lu}]</afterbreak>
</rule>
<rule break="yes">
-<beforebreak>\s\p{L}[\.!?…]\s</beforebreak>
+<beforebreak>[\u00A0\s]\p{L}[\.!?…][\u00A0\s]</beforebreak>
<afterbreak>\p{Lu}\p{Ll}</afterbreak>
</rule>
<!-- z.B. 2 sentences: “Liebst du mich?” “Ja!” -->
<rule break="yes">
<beforebreak>[\.!?][”“]</beforebreak>
@@ -5182,15 +5220,23 @@
<afterbreak>\p{Lu}\p{Ll}</afterbreak>
</rule>
</languagerule>
<languagerule languagerulename="French">
<rule break="no">
-<beforebreak>Yahoo!\s</beforebreak>
+<beforebreak>[\s\u00A0]</beforebreak>
+<afterbreak>[»”’"'›]</afterbreak>
+</rule>
+<rule break="yes">
+<beforebreak>[\.!?][\s\u00A0][»”’"'›][\s\u00A0]</beforebreak>
+<afterbreak>[«“‘‹"'\p{Lu}]</afterbreak>
+</rule>
+<rule break="no">
+<beforebreak>Yahoo![\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<rule break="yes">
-<beforebreak>\.\[\d+\]\s</beforebreak>
+<beforebreak>\.\[\d+\][\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no"><!-- URLs without "www."-->
<beforebreak>\b(https?|ftp|file|chrome|chromium|android|(chrome|moz)\-extension):///?[A-Za-z0-9\-]+\.</beforebreak>
<afterbreak>[A-Za-z0-9\-]+(\.|\b)</afterbreak>
@@ -5204,19 +5250,19 @@
<beforebreak>\b[A-Za-z0-9\-]+\.</beforebreak>
<afterbreak>[A-Za-z]{2,5}(\.|\b)</afterbreak>
</rule>
<!-- French abbreviations -->
<rule break="no">
-<beforebreak>\b((?iu)J\.\-C|art|app|cf|chap|e(nv|tc)|fém|fig|masc|p|sing|suiv|suppl|tél|op)\.\s</beforebreak>
+<beforebreak>\b((?iu)J\.\-C|art|app|cf|chap|e(nv|tc)|fém|fig|masc|p|sing|suiv|suppl|tél|op|ex)\.[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(etc)\.\)\s</beforebreak>
+<beforebreak>\b(etc)\.\)[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(apr|ave?|boul|Mr?|Mrs|MM?|Mlle)\.\s</beforebreak>
+<beforebreak>\b(apr|ave?|boul|Mr?|Mrs|MM?|Mlle)\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
<beforebreak>[\[\(]*…[\]\)]* </beforebreak>
<afterbreak>\p{Ll}</afterbreak>
@@ -5228,75 +5274,75 @@
<rule break="no">
<beforebreak>[\.!?…]+\p{Pe} </beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<rule break="no">
-<beforebreak>["”'’]\s*</beforebreak>
-<afterbreak>\s*\p{Ll}</afterbreak>
+<beforebreak>["”'’][\s\u00A0]*</beforebreak>
+<afterbreak>[\s\u00A0]*\p{Ll}</afterbreak>
</rule>
<rule break="no">
-<beforebreak>['"„][\.!?…]['"”]\s</beforebreak>
+<beforebreak>['"„][\.!?…]['"”][\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b\p{L}\.\s</beforebreak>
-<afterbreak>\p{L}\.\s</afterbreak>
+<beforebreak>\b\p{L}\.[\s\u00A0]</beforebreak>
+<afterbreak>\p{L}\.[\s\u00A0]</afterbreak>
</rule>
<rule break="no">
<beforebreak>\b\p{L}\.</beforebreak>
<afterbreak>\p{L}\.</afterbreak>
</rule>
<rule break="no"><!-- Je suis (...) Chris. -->
-<beforebreak>(…|\.\.\.)\s?\)\s</beforebreak>
+<beforebreak>(…|\.\.\.)[\s\u00A0]?\)[\s\u00A0]</beforebreak>
<afterbreak>[^\p{P}]</afterbreak>
</rule>
<rule break="no"><!-- Je suis (...?) Chris. -->
-<beforebreak>(…|\.\.\.)\s?\?\)\s</beforebreak>
+<beforebreak>(…|\.\.\.)[\s\u00A0]?\?\)[\s\u00A0]</beforebreak>
<afterbreak>[^\p{P}]</afterbreak>
</rule>
<rule break="no"><!-- Jones v. Smith -->
-<beforebreak>\p{Lu}\p{L}+\sv\.\s</beforebreak>
+<beforebreak>\p{Lu}\p{L}+[\s\u00A0]v\.[\s\u00A0]</beforebreak>
<afterbreak>\p{Lu}\p{L}+</afterbreak>
</rule>
<rule break="yes">
-<beforebreak>[^,][\s]\p{L}{2}\.\s</beforebreak>
-<afterbreak>\p{N}+\)\s</afterbreak>
+<beforebreak>[^,][\s\u00A0]\p{L}{2}\.[\s\u00A0]</beforebreak>
+<afterbreak>\p{N}+\)[\s\u00A0]</afterbreak>
</rule>
<rule break="no">
-<beforebreak>[\.\s]\p{L}{1,2}\.\s</beforebreak>
+<beforebreak>[\.\s\u00A0]\p{L}{1,2}\.[\s\u00A0]</beforebreak>
<afterbreak>[\p{N}\p{Ll}]</afterbreak>
</rule>
<rule break="no">
<beforebreak>[\[\(]*\.\.\.[\]\)]* </beforebreak>
<afterbreak>[^\p{Lu}]</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b\p{Lu}\.\s\p{Lu}\.\s</beforebreak>
+<beforebreak>\b\p{Lu}\.[\s\u00A0]\p{Lu}\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b\p{Lu}\.\p{Lu}\.\s</beforebreak>
+<beforebreak>\b\p{Lu}\.\p{Lu}\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>[^\.]\s[A-Z]\.\s</beforebreak>
+<beforebreak>[^\.][\s\u00A0][A-Z]\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(:?Blvd|Ave|Mts?)\.\s</beforebreak>
+<beforebreak>\b(:?Blvd|Ave|Mts?)\.[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}+</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b(?:Kan|Ill|M[ai]ss)\.\s</beforebreak>
+<beforebreak>\b(?:Kan|Ill|M[ai]ss)\.[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}+</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\(\p{Ll}+\.\s</beforebreak>
+<beforebreak>\(\p{Ll}+\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no"><!-- i.e. -->
-<beforebreak>i\.e\.\s</beforebreak><!-- "i.e." is never at end of sentence -->
+<beforebreak>i\.e\.[\s\u00A0]</beforebreak><!-- "i.e." is never at end of sentence -->
<afterbreak></afterbreak>
</rule>
<rule break="no"><!-- U.S.A (no dot at end) -->
<beforebreak>[A-Z]\.[A-Z]\.</beforebreak>
<afterbreak>[A-Z]\b</afterbreak>
@@ -5308,32 +5354,32 @@
<rule break="no"><!-- U.S (no dot at end) -->
<beforebreak>\bU\.</beforebreak>
<afterbreak>[SK]\b</afterbreak>
</rule>
<rule break="no"><!-- No. 5 -->
-<beforebreak>\b[nN]o\.\s</beforebreak>
+<beforebreak>\b[nN]o\.[\s\u00A0]</beforebreak>
<afterbreak>\p{N}</afterbreak>
</rule>
<rule break="no"><!-- Ph.D. -->
-<beforebreak>\bP[Hh]\.\s?</beforebreak>
+<beforebreak>\bP[Hh]\.[\s\u00A0]?</beforebreak>
<afterbreak>D\.?</afterbreak>
</rule>
<rule break="no"><!-- e.g. -->
-<beforebreak>\be\.g\.\s</beforebreak>
+<beforebreak>\be\.g\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no"><!-- vs. -->
-<beforebreak>\bvs\.\s</beforebreak>
+<beforebreak>\bvs\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!--"Etc." can end the sentence, so we check for the uppercase letter after it.-->
<rule break="no"><!-- Etc. -->
-<beforebreak>\b[Ee]tc\.\s</beforebreak>
+<beforebreak>\b[Ee]tc\.[\s\u00A0]</beforebreak>
<afterbreak>[^\p{Lu}]</afterbreak>
</rule>
<rule break="no"><!-- BTW (by the way) -->
-<beforebreak>\b([Bb]tw|BTW)\.\s</beforebreak>
+<beforebreak>\b([Bb]tw|BTW)\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
<beforebreak>(?i)FRITZ!</beforebreak>
<afterbreak>(?i)Box</afterbreak>
@@ -5341,76 +5387,76 @@
<rule break="no"><!-- https://de.wikipedia.org/wiki/VW_ID.3 -->
<beforebreak>ID.</beforebreak>
<afterbreak>3|Buzz|Crozz</afterbreak>
</rule>
<rule break="no"><!-- Ph.D. (see rule PH_D) -->
-<beforebreak>\bP[Hh]\.?\s?[Dd]\.\s</beforebreak>
+<beforebreak>\bP[Hh]\.?[\s\u00A0]?[Dd]\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no"><!-- "I have a B. Eng. degree" (see rule BACHELOR_ABBR) -->
-<beforebreak>\b(P[hH][dD]|BSc|BEng|BComp|BArch|MSc|MEng|MComp)\.\s</beforebreak>
+<beforebreak>\b(P[hH][dD]|BSc|BEng|BComp|BArch|MSc|MEng|MComp)\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no"><!-- "I have a LL.B degree." (see rule PH_D) -->
-<beforebreak>\bLL\.\s?[BM]\.\s</beforebreak>
+<beforebreak>\bLL\.[\s\u00A0]?[BM]\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no"><!-- B.Eng. (Bachelor of Engineering) -->
-<beforebreak>\b[BM]\.\s?</beforebreak>
+<beforebreak>\b[BM]\.[\s\u00A0]?</beforebreak>
<afterbreak>Eng\.?</afterbreak>
</rule>
<rule break="no"><!-- LL.B. (Bachelor of Laws) -->
-<beforebreak>\bLL\.\s?</beforebreak>
+<beforebreak>\bLL\.[\s\u00A0]?</beforebreak>
<afterbreak>[BM]\.?</afterbreak>
</rule>
<rule break="no"><!-- B.Sc. (Bachelor of Science) -->
-<beforebreak>\b[BM]\.\s?</beforebreak>
+<beforebreak>\b[BM]\.[\s\u00A0]?</beforebreak>
<afterbreak>Sc\.?</afterbreak>
</rule>
<rule break="no"><!-- B.Comp. (Bachelor of Computing) -->
-<beforebreak>\b[BM]\.\s?</beforebreak>
+<beforebreak>\b[BM]\.[\s\u00A0]?</beforebreak>
<afterbreak>Comp?\.?</afterbreak>
</rule>
<rule break="no"><!-- B.Arch. (Bachelor of Architecture) -->
-<beforebreak>\b[BM]\.\s?</beforebreak>
+<beforebreak>\b[BM]\.[\s\u00A0]?</beforebreak>
<afterbreak>Arch\.?</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b[BM]\.?\s?(Sc|Eng|Comp|Arch)\.\s</beforebreak>
+<beforebreak>\b[BM]\.?[\s\u00A0]?(Sc|Eng|Comp|Arch)\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bI(nc|NC)\.\s</beforebreak>
+<beforebreak>\bI(nc|NC)\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bCorp\.\s</beforebreak>
+<beforebreak>\bCorp\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bBros\.\s</beforebreak>
+<beforebreak>\bBros\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bLtd\.\s</beforebreak>
+<beforebreak>\bLtd\.[\s\u00A0]</beforebreak>
<afterbreak>\p{Ll}+</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\bCo\.\s</beforebreak>
+<beforebreak>\bCo\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- Break rules -->
<rule break="yes">
-<beforebreak>[\.!?…][\u0002|'|"|«|\)|\]|\}¹²³]?\s+</beforebreak>
+<beforebreak>[\.!?…][\u0002|'|"|«|\)|\]|\}¹²³]?[\s\u00A0]+</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="yes">
<beforebreak>[\.!?…]['"\p{Pe}\u00BB\u201D]?</beforebreak>
<afterbreak>\p{Lu}[^\p{Lu}]</afterbreak>
</rule>
<rule break="yes">
-<beforebreak>\s\p{L}[\.!?…]\s</beforebreak>
+<beforebreak>[\s\u00A0]\p{L}[\.!?…][\s\u00A0]</beforebreak>
<afterbreak>\p{Lu}\p{Ll}</afterbreak>
</rule>
</languagerule>
<languagerule languagerulename="Ukrainian">
@@ -5554,21 +5600,25 @@
<beforebreak>(\([^)]*|\[[^\]]*|,[\h\v]*)\b(див)\.[\h\v]*</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- abbreviation with proper noun: проф. Грицько, о. Лісове -->
<rule break="no">
-<beforebreak>\b([Аа]кад|[Пп]роф|[Дд]оц|[Аа]сист|[Рр]еж|[Аа]рх|[Сс]вв?|о|ім|упоряд|чл\.-кор)\.[\h\v]*</beforebreak>
+<beforebreak>\b([Аа]кад|[Пп]роф|[Дд]оц|[Аа]сист|[Рр]еж|[Аа]рх|[Сс]вв?|о|ім|упоряд|чл\.-кор|[Пп]реп)\.[\h\v]*</beforebreak>
<afterbreak>[\h\v]*[А-ЯІЇЄҐA-Z]</afterbreak>
</rule>
-<!-- смерть гр. Болтаровича -->
<rule break="no">
+<beforebreak>\bМан\.[\h\v]*</beforebreak>
+<afterbreak>[\h\v]*([Сс]іті|[Юю]н)</afterbreak>
+</rule>
+<!-- смерть гр. Болтаровича, but not "9 гр." -->
+<rule break="no">
<beforebreak>[^0-9][\h\v]+[Гг]р\.[\h\v]*</beforebreak>
<afterbreak>[\h\v]*[А-ЯІЇЄҐA-Z]</afterbreak>
</rule>
<!-- арт. - артикул -->
<!-- TODO: арт. - артист -->
<rule break="no">
-<beforebreak>\bарт\.[\h\v]*</beforebreak>
+<beforebreak>\b([Аа]рт|[Мм]ал|[Рр]ис)\.[\h\v]*</beforebreak>
<afterbreak>[\h\v]*[0-9]</afterbreak>
</rule>
<!-- ХІІ р., 3-6 арт. -->
<rule break="no">
<beforebreak>[0-9][\h\v]+арт\.[\h\v]*</beforebreak>