lib/srx/segment.srx in srx-languagetool-0.11.0 vs lib/srx/segment.srx in srx-languagetool-0.12.0
- old
+ new
@@ -3,11 +3,11 @@
<header segmentsubflows="yes" cascade="yes">
<formathandle type="start" include="no"></formathandle>
<formathandle type="end" include="yes"></formathandle>
<formathandle type="isolated" include="no"></formathandle>
<okpsrx:options oneSegmentIncludesAll="no" trimLeadingWhitespaces="no" trimTrailingWhitespaces="no" useJavaRegex="yes" useIcu4JBreakRules="no" treatIsolatedCodesAsWhitespace="no"></okpsrx:options>
-<okpsrx:sample language="nl" useMappedRules="yes">De les is in jul. en aug. op maandagavond.</okpsrx:sample>
+<okpsrx:sample language="nl" useMappedRules="yes">Wat God buiten Christus is. 2.</okpsrx:sample>
<okpsrx:rangeRule></okpsrx:rangeRule>
</header>
<body>
<languagerules>
<languagerule languagerulename="Greek">
@@ -1162,11 +1162,11 @@
<rule break="no">
<beforebreak>\bP[Hh]\.[\s\u00A0]?</beforebreak>
<afterbreak>D\.?</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b([Aa]vg|[Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Ii]ncl?|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Ll]tda|[Mm]in|[Mm]ax|[Gg]ovt|[Rr]etd|Ing|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak>
+<beforebreak>\b([Aa]vg|[Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Aa]cq|[Ii]ncl?|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Ll]tda|[Mm]in|[Mm]ax|[Gg]ovt|[Rr]etd|Ing|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak>
<afterbreak>[^\p{Lu}]|I</afterbreak>
</rule>
<rule break="no">
<beforebreak>\b(hr)\.[\s\u00A0]</beforebreak>
<afterbreak>[^\p{Lu}]|I</afterbreak>
@@ -1539,10 +1539,14 @@
<afterbreak>\p{Lu}\p{Ll}</afterbreak>
</rule>
</languagerule>
<languagerule languagerulename="Dutch">
<rule break="yes">
+<beforebreak>[ ]is[.][ ]</beforebreak>
+<afterbreak>[0-9]\.($|[ ])</afterbreak>
+</rule>
+<rule break="yes">
<beforebreak>(^| )O\.\s</beforebreak>
<afterbreak>([A-Z][a-z]{1,3}[ ,:;.!?]|Indië|Wanneer|Kunnen|Sorry)</afterbreak>
</rule>
<rule break="yes">
<beforebreak>\sEd[.]\s</beforebreak>
@@ -1659,10 +1663,14 @@
</rule>
<rule break="yes">
<beforebreak>°C\.\s</beforebreak>
<afterbreak>[A-Z][a-z]</afterbreak>
</rule>
+<rule break="yes">
+<beforebreak>[A-Z]&[A-Z]\.\s</beforebreak>
+<afterbreak>[A-Z][a-z]</afterbreak>
+</rule>
<rule break="no">
<beforebreak>\b([A-Z]|Adr|Chr|Fr|Fred|IJ|Jac|Joh|Ph|St|Th|Tj|v|v\.(\s)?d)\.(\s)?</beforebreak>
<afterbreak>\p{Lu}</afterbreak>
</rule>
<rule break="no">
@@ -4702,11 +4710,11 @@
<rule break="no">
<beforebreak>\b([Cc]ap|[Aa]rts?|pp|[Vv]ol)\.[\s\u00A0]</beforebreak>
<afterbreak>[XIV\d]+\b</afterbreak>
</rule>
<rule break="no">
-<beforebreak>\b([Ee]ds?|[Cc]oords?|\d+(r|n|t|è|é|a|rs|ns|es)|seg|masc|fem|sing|pl|adj|adv|g|kg|m|km|cm|ha|u|h|hrs|s|ss|alt|cant|cast|cert|com|dir|gr|nom|parc|pres|set|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
+<beforebreak>\b(Ltd|[Ee]ds?|[Cc]oords?|\d+(r|n|t|è|é|a|rs|ns|es)|seg|masc|fem|sing|pl|adj|adv|g|kg|m|km|cm|ha|u|h|hrs|s|ss|alt|cant|cast|cert|com|dir|grs?|nom|parc|pres|set|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
<afterbreak>[\-¡¿«»"'\u2018\u201C\p{Ps}\u2012\u2013\u2014\u2015\u2053]*\p{Ll}</afterbreak>
</rule>
<!-- Any word in acronyms like U.S.A.F or F. B. I. or C. or c.s.p. or p. e. -->
<rule break="no">
<beforebreak>\b(\p{L}\.)+[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
@@ -4852,11 +4860,11 @@
<beforebreak>\b([Ee]d(it)?|[Nn]o|n|[Nn]úm|[Pp]ág|p|c|\d+er)|[V\.]gr\.[\s\u00A0]</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- Abbreviations that can finish sentences -->
<rule break="no">
-<beforebreak>\b([Ee]ds?|[Cc]oords?|grs?|Sr|Jr|Admón|Inc|Co|Hnos|Vda|[VUuv]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
+<beforebreak>\b(Ltd|[Ee]ds?|[Cc]oords?|\d+(r|n|t|è|é|a|rs|ns|es)|seg|masc|fem|sing|pl|adj|adv|g|kg|m|km|cm|ha|u|h|hrs|s|ss|alt|cant|cast|cert|com|dir|grs?|nom|parc|pres|set|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
<afterbreak>[\-¡¿«»"'\u2018\u201C\p{Ps}\u2012\u2013\u2014\u2015\u2053]*\p{Ll}</afterbreak>
</rule>
<!-- Any word in acronyms like U.S.A.F or F. B. I. or C. or c.s.p. or p. e. -->
<rule break="no">
<beforebreak>\b(\p{L}\.)+[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
@@ -5056,10 +5064,10 @@
<beforebreak>\b(spp?)\.[\u00A0\s]{1,2}</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- German abbreviations -->
<rule break="no">
-<beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Dd]bzgl[Ss]tellv|d|Übers|usw|[Bb]zw|Ab[hkst]|[Aa]bzü?gl|\d+-tlg|tlg|ggfls|[Ll]tda|[Ee]inschl|[Vv]mtl|Ev|bezgl|lit|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|Blvd|bspw|btto|bw|Dtl|Dez|[Jj]gdfr|[Ee]ff)\.[\u00A0\s]{1,2}</beforebreak>
+<beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Dd]bzgl[Ss]tellv|d|Übers|usw|[Bb]zw|Ab[hkst]|[Aa]bzü?gl|\d+-tlg|tlg|ggfls|[Ll]tda|[Ee]inschl|[Vv]mtl|Ev|bezgl|lit|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|Blvd|[Bb]spw|btto|bw|Dtl|[Gg]esetzl|Dez|[Jj]gdfr|[Ee]ff)\.[\u00A0\s]{1,2}</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
<beforebreak>\b(cts?|[Cc]a|chem|chin|Chr|cresc|[Dd]at|desgl|ders|dgl|Dipl|Dir?|Doz?|durchg|durchges|Dr|dt|ebd|Ed|[Ee]igt?l|akt|[Ee]ngl|Erg|al|et[cw]|Etw|ev|[Ee]vtl?|[Ee]xkl|Expl|Exz)\.[\u00A0\s]{1,2}</beforebreak>
<afterbreak></afterbreak>