lib/srx/segment.srx in srx-languagetool-0.10.0 vs lib/srx/segment.srx in srx-languagetool-0.11.0

- old
+ new

@@ -3,11 +3,11 @@ <header segmentsubflows="yes" cascade="yes"> <formathandle type="start" include="no"></formathandle> <formathandle type="end" include="yes"></formathandle> <formathandle type="isolated" include="no"></formathandle> <okpsrx:options oneSegmentIncludesAll="no" trimLeadingWhitespaces="no" trimTrailingWhitespaces="no" useJavaRegex="yes" useIcu4JBreakRules="no" treatIsolatedCodesAsWhitespace="no"></okpsrx:options> -<okpsrx:sample language="nl" useMappedRules="yes">De organisatie Doe! is een rare.</okpsrx:sample> +<okpsrx:sample language="nl" useMappedRules="yes">De les is in jul. en aug. op maandagavond.</okpsrx:sample> <okpsrx:rangeRule></okpsrx:rangeRule> </header> <body> <languagerules> <languagerule languagerulename="Greek"> @@ -1538,10 +1538,18 @@ <beforebreak>\s\p{L}[\.!?…]\s</beforebreak> <afterbreak>\p{Lu}\p{Ll}</afterbreak> </rule> </languagerule> <languagerule languagerulename="Dutch"> +<rule break="yes"> +<beforebreak>(^| )O\.\s</beforebreak> +<afterbreak>([A-Z][a-z]{1,3}[ ,:;.!?]|Indië|Wanneer|Kunnen|Sorry)</afterbreak> +</rule> +<rule break="yes"> +<beforebreak>\sEd[.]\s</beforebreak> +<afterbreak>([A-Z][a-z]{1,3}|Wanneer|Misschien|Daarna|Zoals|Zelfs|Bedankt|Zullen|Vooral|Volgens|Vervolgens)(\s|[,:;])</afterbreak> +</rule> <rule break="no"> <beforebreak>\b(sp|SP)</beforebreak> <afterbreak>\.[aA]\b</afterbreak> </rule> <rule break="no"> @@ -1643,10 +1651,18 @@ </rule> <rule break="no"> <beforebreak>\b(prov|pseud|psych|qty|red|ref|resp|soc|st|tab|tel|temp|prof|tk)\.\s</beforebreak> <afterbreak></afterbreak> </rule> +<rule break="yes"> +<beforebreak>\sgraden C\.\s</beforebreak> +<afterbreak>[A-Z]</afterbreak> +</rule> +<rule break="yes"> +<beforebreak>°C\.\s</beforebreak> +<afterbreak>[A-Z][a-z]</afterbreak> +</rule> <rule break="no"> <beforebreak>\b([A-Z]|Adr|Chr|Fr|Fred|IJ|Jac|Joh|Ph|St|Th|Tj|v|v\.(\s)?d)\.(\s)?</beforebreak> <afterbreak>\p{Lu}</afterbreak> </rule> <rule break="no"> @@ -1691,10 +1707,14 @@ </rule> <rule break="no"> <beforebreak>\b\p{L}\.</beforebreak> <afterbreak>\p{L}\.</afterbreak> </rule> +<rule break="yes"> +<beforebreak>\sik\.\s</beforebreak> +<afterbreak>ik\s</afterbreak> +</rule> <rule break="no"> <beforebreak>[\.\s]\p{L}{1,2}\.\s</beforebreak> <afterbreak>[\p{N}\p{Ll}]</afterbreak> </rule> <rule break="no"> @@ -1744,10 +1764,18 @@ </rule> <rule break="no"> <beforebreak>\s[A-Z].+!\s</beforebreak> <afterbreak>[a-z]</afterbreak> </rule> +<rule break="no"> +<beforebreak>\b(jan|mrt|mar|jun|jul|aug|sept|okt|sep|spt|nov|dec|.*opp)\.\s</beforebreak> +<afterbreak>[a-z]</afterbreak> +</rule> +<rule break="no"> +<beforebreak>Groen!\s</beforebreak> +<afterbreak>[a-z]</afterbreak> +</rule> <rule break="yes"> <beforebreak>[.!?…][’'"\u00BB\u2019\u201D\u203A\u00AB\p{Pe}\u0002¹²³]*\s</beforebreak> <afterbreak></afterbreak> </rule> <rule break="yes"> @@ -5028,11 +5056,11 @@ <beforebreak>\b(spp?)\.[\u00A0\s]{1,2}</beforebreak> <afterbreak></afterbreak> </rule> <!-- German abbreviations --> <rule break="no"> -<beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|[Pp]arl|Bsp|versch|[Ss]tellv|d|Übers|usw|[Bb]zw|Ab[hkst]|[Aa]bzü?gl|[Ll]tda|[Ee]inschl|[Vv]mtl|Ev|bezgl|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|bspw|btto|bw|Dtl|Dez)\.[\u00A0\s]{1,2}</beforebreak> +<beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Dd]bzgl[Ss]tellv|d|Übers|usw|[Bb]zw|Ab[hkst]|[Aa]bzü?gl|\d+-tlg|tlg|ggfls|[Ll]tda|[Ee]inschl|[Vv]mtl|Ev|bezgl|lit|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|Blvd|bspw|btto|bw|Dtl|Dez|[Jj]gdfr|[Ee]ff)\.[\u00A0\s]{1,2}</beforebreak> <afterbreak></afterbreak> </rule> <rule break="no"> <beforebreak>\b(cts?|[Cc]a|chem|chin|Chr|cresc|[Dd]at|desgl|ders|dgl|Dipl|Dir?|Doz?|durchg|durchges|Dr|dt|ebd|Ed|[Ee]igt?l|akt|[Ee]ngl|Erg|al|et[cw]|Etw|ev|[Ee]vtl?|[Ee]xkl|Expl|Exz)\.[\u00A0\s]{1,2}</beforebreak> <afterbreak></afterbreak> @@ -6566,10 +6594,10 @@ <beforebreak>\d+\.\s</beforebreak> <afterbreak>степен(у)</afterbreak> </rule> <!-- German abbreviations --> <rule break="no"> -<beforebreak>\b(versch|d|Übers|usw|Ab[hkts]|ahd|Akk|aktual|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|Az|Bat|bayr|Bd|Bde|bearb|Bed|Bem|bes|bez|Bez|Bhf|bspw|btto|bw|bzw)\.\s</beforebreak> +<beforebreak>\b(versch|d|Übers|usw|Ab[hkts]|ahd|Akk|aktual|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|Az|Bat|bayr|Bd|Bde|bearb|Bed|Bem|bes|bez|Bez|Bhf|bspw|btto|bw|bzw|pot)\.\s</beforebreak> <afterbreak></afterbreak> </rule> <!--Српске скраћенице--> <rule break="no"> <beforebreak>\b(одн|тј)\.\s+</beforebreak>