lib/srx/segment.srx in srx-languagetool-0.10.0 vs lib/srx/segment.srx in srx-languagetool-0.11.0
- old
+ new
@@ -3,11 +3,11 @@
<header segmentsubflows="yes" cascade="yes">
<formathandle type="start" include="no"></formathandle>
<formathandle type="end" include="yes"></formathandle>
<formathandle type="isolated" include="no"></formathandle>
<okpsrx:options oneSegmentIncludesAll="no" trimLeadingWhitespaces="no" trimTrailingWhitespaces="no" useJavaRegex="yes" useIcu4JBreakRules="no" treatIsolatedCodesAsWhitespace="no"></okpsrx:options>
-<okpsrx:sample language="nl" useMappedRules="yes">De organisatie Doe! is een rare.</okpsrx:sample>
+<okpsrx:sample language="nl" useMappedRules="yes">De les is in jul. en aug. op maandagavond.</okpsrx:sample>
<okpsrx:rangeRule></okpsrx:rangeRule>
</header>
<body>
<languagerules>
<languagerule languagerulename="Greek">
@@ -1538,10 +1538,18 @@
<beforebreak>\s\p{L}[\.!?…]\s</beforebreak>
<afterbreak>\p{Lu}\p{Ll}</afterbreak>
</rule>
</languagerule>
<languagerule languagerulename="Dutch">
+<rule break="yes">
+<beforebreak>(^| )O\.\s</beforebreak>
+<afterbreak>([A-Z][a-z]{1,3}[ ,:;.!?]|Indië|Wanneer|Kunnen|Sorry)</afterbreak>
+</rule>
+<rule break="yes">
+<beforebreak>\sEd[.]\s</beforebreak>
+<afterbreak>([A-Z][a-z]{1,3}|Wanneer|Misschien|Daarna|Zoals|Zelfs|Bedankt|Zullen|Vooral|Volgens|Vervolgens)(\s|[,:;])</afterbreak>
+</rule>
<rule break="no">
<beforebreak>\b(sp|SP)</beforebreak>
<afterbreak>\.[aA]\b</afterbreak>
</rule>
<rule break="no">
@@ -1643,10 +1651,18 @@
</rule>
<rule break="no">
<beforebreak>\b(prov|pseud|psych|qty|red|ref|resp|soc|st|tab|tel|temp|prof|tk)\.\s</beforebreak>
<afterbreak></afterbreak>
</rule>
+<rule break="yes">
+<beforebreak>\sgraden C\.\s</beforebreak>
+<afterbreak>[A-Z]</afterbreak>
+</rule>
+<rule break="yes">
+<beforebreak>°C\.\s</beforebreak>
+<afterbreak>[A-Z][a-z]</afterbreak>
+</rule>
<rule break="no">
<beforebreak>\b([A-Z]|Adr|Chr|Fr|Fred|IJ|Jac|Joh|Ph|St|Th|Tj|v|v\.(\s)?d)\.(\s)?</beforebreak>
<afterbreak>\p{Lu}</afterbreak>
</rule>
<rule break="no">
@@ -1691,10 +1707,14 @@
</rule>
<rule break="no">
<beforebreak>\b\p{L}\.</beforebreak>
<afterbreak>\p{L}\.</afterbreak>
</rule>
+<rule break="yes">
+<beforebreak>\sik\.\s</beforebreak>
+<afterbreak>ik\s</afterbreak>
+</rule>
<rule break="no">
<beforebreak>[\.\s]\p{L}{1,2}\.\s</beforebreak>
<afterbreak>[\p{N}\p{Ll}]</afterbreak>
</rule>
<rule break="no">
@@ -1744,10 +1764,18 @@
</rule>
<rule break="no">
<beforebreak>\s[A-Z].+!\s</beforebreak>
<afterbreak>[a-z]</afterbreak>
</rule>
+<rule break="no">
+<beforebreak>\b(jan|mrt|mar|jun|jul|aug|sept|okt|sep|spt|nov|dec|.*opp)\.\s</beforebreak>
+<afterbreak>[a-z]</afterbreak>
+</rule>
+<rule break="no">
+<beforebreak>Groen!\s</beforebreak>
+<afterbreak>[a-z]</afterbreak>
+</rule>
<rule break="yes">
<beforebreak>[.!?…][’'"\u00BB\u2019\u201D\u203A\u00AB\p{Pe}\u0002¹²³]*\s</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="yes">
@@ -5028,11 +5056,11 @@
<beforebreak>\b(spp?)\.[\u00A0\s]{1,2}</beforebreak>
<afterbreak></afterbreak>
</rule>
<!-- German abbreviations -->
<rule break="no">
-<beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|[Pp]arl|Bsp|versch|[Ss]tellv|d|Übers|usw|[Bb]zw|Ab[hkst]|[Aa]bzü?gl|[Ll]tda|[Ee]inschl|[Vv]mtl|Ev|bezgl|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|bspw|btto|bw|Dtl|Dez)\.[\u00A0\s]{1,2}</beforebreak>
+<beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Dd]bzgl[Ss]tellv|d|Übers|usw|[Bb]zw|Ab[hkst]|[Aa]bzü?gl|\d+-tlg|tlg|ggfls|[Ll]tda|[Ee]inschl|[Vv]mtl|Ev|bezgl|lit|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|Blvd|bspw|btto|bw|Dtl|Dez|[Jj]gdfr|[Ee]ff)\.[\u00A0\s]{1,2}</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
<beforebreak>\b(cts?|[Cc]a|chem|chin|Chr|cresc|[Dd]at|desgl|ders|dgl|Dipl|Dir?|Doz?|durchg|durchges|Dr|dt|ebd|Ed|[Ee]igt?l|akt|[Ee]ngl|Erg|al|et[cw]|Etw|ev|[Ee]vtl?|[Ee]xkl|Expl|Exz)\.[\u00A0\s]{1,2}</beforebreak>
<afterbreak></afterbreak>
@@ -6566,10 +6594,10 @@
<beforebreak>\d+\.\s</beforebreak>
<afterbreak>степен(у)</afterbreak>
</rule>
<!-- German abbreviations -->
<rule break="no">
-<beforebreak>\b(versch|d|Übers|usw|Ab[hkts]|ahd|Akk|aktual|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|Az|Bat|bayr|Bd|Bde|bearb|Bed|Bem|bes|bez|Bez|Bhf|bspw|btto|bw|bzw)\.\s</beforebreak>
+<beforebreak>\b(versch|d|Übers|usw|Ab[hkts]|ahd|Akk|aktual|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|Az|Bat|bayr|Bd|Bde|bearb|Bed|Bem|bes|bez|Bez|Bhf|bspw|btto|bw|bzw|pot)\.\s</beforebreak>
<afterbreak></afterbreak>
</rule>
<!--Српске скраћенице-->
<rule break="no">
<beforebreak>\b(одн|тј)\.\s+</beforebreak>