lib/srx/segment.srx in srx-languagetool-0.5.0 vs lib/srx/segment.srx in srx-languagetool-0.6.0

- old
+ new

@@ -1157,11 +1157,11 @@ <rule break="no"><!-- Ph.D. --> <beforebreak>\bP[Hh]\.[\s\u00A0]?</beforebreak> <afterbreak>D\.?</afterbreak> </rule> <rule break="no"><!-- min. --> -<beforebreak>\b([Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Ii]ncl?|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Mm]in|max|[Gg]ovt|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak> +<beforebreak>\b([Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Ii]ncl?|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Mm]in|[Mm]ax|[Gg]ovt|[Rr]etd|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak> <afterbreak>[^\p{Lu}]|I</afterbreak> </rule> <rule break="no"><!-- hr. --> <beforebreak>\b(hr)\.[\s\u00A0]</beforebreak> <afterbreak>[^\p{Lu}]|I</afterbreak> @@ -1185,13 +1185,17 @@ <rule break="no"><!-- e.g. --> <beforebreak>\be\.g\.[\s\u00A0]</beforebreak> <afterbreak></afterbreak> </rule> <rule break="no"><!-- vs. --> -<beforebreak>\bvs\.[\s\u00A0]</beforebreak> +<beforebreak>\b[Vv]s\.[\s\u00A0]</beforebreak> <afterbreak></afterbreak> </rule> +<rule break="no"><!-- pp. --> +<beforebreak>\b(pp|PP)\.[\s\u00A0]</beforebreak> +<afterbreak></afterbreak> +</rule> <rule break="no"><!-- esp. --> <beforebreak>\be[sx]p\.[\s\u00A0]</beforebreak> <afterbreak></afterbreak> </rule> <!--"Etc." can end the sentence, so we check for the uppercase letter after it.--> @@ -1247,11 +1251,11 @@ <beforebreak>(?i)FRITZ!</beforebreak> <afterbreak>(?i)Box</afterbreak> </rule> <rule break="no"><!-- https://de.wikipedia.org/wiki/VW_ID.3 --> <beforebreak>ID.</beforebreak> -<afterbreak>3|Buzz|Crozz</afterbreak> +<afterbreak>3|4|Buzz|Crozz</afterbreak> </rule> <rule break="no"><!-- Ph.D. (see rule PH_D) --> <beforebreak>\bP[Hh]\.?[\s\u00A0]?[Dd]\.[\s\u00A0]</beforebreak> <afterbreak></afterbreak> </rule> @@ -1548,11 +1552,11 @@ <rule break="no"><!-- Subdomains without "www." (e.g. foo.MyDomain.com)--> <beforebreak>\b[A-Za-z0-9\-]+\.</beforebreak> <afterbreak>[A-Za-z0-9\-]+\.(com|net|org|info|de|es|edu|co|eu|nl|io|cn|uk|gov|biz|ca|tk|ru|br|jp|pl)(\.|\b)</afterbreak> </rule> <rule break="no"> -<beforebreak>\b(Drs|Art|Afr|Am|Ar|Br|Cie|Comp|Dhr|([Pp]rof\.)?[Dd]r|Em|Fa|Kon|Bros)\.\s</beforebreak> +<beforebreak>\b(Drs|Art|Afr|Am|Ar|Br|Cie|Comp|Dhr|([Pp]rof\.)?[Dd]r|Em|Fa|Kon|Bros|Stb)\.\s</beforebreak> <afterbreak></afterbreak> </rule> <rule break="no"> <beforebreak>\b(Mej|Mevr|Mgr|Mw|Ndl|Ned|Nl|No|Prof|Secr|Chr|Jac)\.\s</beforebreak> <afterbreak></afterbreak> @@ -1568,10 +1572,14 @@ <rule break="no"> <beforebreak>\b(al|ald|alg|amb|ambt|anat|antrop|apoth)\.\s</beforebreak> <afterbreak></afterbreak> </rule> <rule break="no"> +<beforebreak>\b(alc|bro|opm|acc)\.\s</beforebreak> +<afterbreak></afterbreak> +</rule> +<rule break="no"> <beforebreak>\b(arch|archeol|art|bc|betr|bez|bibl|bijl|bijv)\.\s</beforebreak> <afterbreak></afterbreak> </rule> <rule break="no"> <beforebreak>\b(bijz|blz|bw|ca|cat|centr|cf|cfr|cmpl)\.\s</beforebreak> @@ -4640,11 +4648,11 @@ <rule break="no"> <beforebreak>\b([Cc]ap|[Aa]rts?|pp|[Vv]ol)\.[\s\u00A0]</beforebreak> <afterbreak>[XIV\d]+\b</afterbreak> </rule> <rule break="no"> -<beforebreak>\b([Ee]ds?|[Cc]oords?|\d+(r|n|t|è|é|ns|es)|masc|fem|sing|pl|adj|adv|g|kg|m|km|cm|ha|u|h|hrs|s|ss|alt|cant|cast|cert|com|dir|gr|nom|parc|pres|set|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak> +<beforebreak>\b([Ee]ds?|[Cc]oords?|\d+(r|n|t|è|é|a|rs|ns|es)|masc|fem|sing|pl|adj|adv|g|kg|m|km|cm|ha|u|h|hrs|s|ss|alt|cant|cast|cert|com|dir|gr|nom|parc|pres|set|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak> <afterbreak>[\-¡¿«»"'\u2018\u201C\p{Ps}\u2012\u2013\u2014\u2015\u2053]*\p{Ll}</afterbreak> </rule> <!-- Any word in acronyms like U.S.A.F or F. B. I. or C. or c.s.p. or p. e. --> <rule break="no"> <beforebreak>\b(\p{L}\.)+[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak> @@ -4711,10 +4719,14 @@ <languagerule languagerulename="Spanish"> <rule break="no"> <beforebreak>Yahoo![\s\u00A0]</beforebreak> <afterbreak>\p{Ll}</afterbreak> </rule> +<rule break="no"> +<beforebreak>40dB.[\s\u00A0]</beforebreak> +<afterbreak>\p{Ll}</afterbreak> +</rule> <rule break="yes"> <beforebreak>\.\[\d+\][\s\u00A0]</beforebreak> <afterbreak></afterbreak> </rule> <!-- initials: A. C. Jones. Problem: [...] de Alfons I. Él era [...] --> @@ -4872,11 +4884,11 @@ <beforebreak>(?i)FRITZ!</beforebreak> <afterbreak>(?i)Box</afterbreak> </rule> <rule break="no"><!-- https://de.wikipedia.org/wiki/VW_ID.3 --> <beforebreak>ID.</beforebreak> -<afterbreak>3|Buzz|Crozz</afterbreak> +<afterbreak>3|4|Buzz|Crozz</afterbreak> </rule> <rule break="no"> <beforebreak>[1-3]\.[\u00A0\s]</beforebreak> <afterbreak>Liga|Bundesliga|Fußball(-B|b)undesliga</afterbreak> </rule> @@ -4976,11 +4988,11 @@ <beforebreak>\b(spp?)\.[\u00A0\s]</beforebreak> <afterbreak></afterbreak> </rule> <!-- German abbreviations --> <rule break="no"> -<beforebreak>\b(ggü|Mag|mtl|versch|d|Übers|usw|Bzw|bzw|Ab[hkst]|abzgl|[Ee]inschl|[Vv]mtl|bezgl|Abzw|[Vv]sl|ahd|Akk|aktual|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|autom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|bspw|btto|bw|Dtl|Dez)\.[\u00A0\s]</beforebreak> +<beforebreak>\b(ggü|Mag|mtl|versch|d|Übers|usw|Bzw|bzw|Ab[hkst]|abzgl|[Ee]inschl|[Vv]mtl|Ev|bezgl|Abzw|[Vv]sl|ahd|Akk|aktual|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|autom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|bspw|btto|bw|Dtl|Dez)\.[\u00A0\s]</beforebreak> <afterbreak></afterbreak> </rule> <rule break="no"> <beforebreak>\b(cts?|Ca|ca|chem|chin|Chr|cresc|dat|Dat|desgl|ders|dgl|Dipl|Dir?|Doz?|durchg|durchges|Dr|dt|ebd|Ed|eigtl|Eigtl|eigl|Eigl|akt|Engl|engl|Erg|al|et[cw]|Etw|ev(tl)?|Evtl|Evt|evt|exkl|Expl|Exz)\.[\u00A0\s]</beforebreak> <afterbreak></afterbreak> @@ -5397,11 +5409,11 @@ <beforebreak>(?i)FRITZ!</beforebreak> <afterbreak>(?i)Box</afterbreak> </rule> <rule break="no"><!-- https://de.wikipedia.org/wiki/VW_ID.3 --> <beforebreak>ID.</beforebreak> -<afterbreak>3|Buzz|Crozz</afterbreak> +<afterbreak>3|4|Buzz|Crozz</afterbreak> </rule> <rule break="no"><!-- Ph.D. (see rule PH_D) --> <beforebreak>\bP[Hh]\.?[\s\u00A0]?[Dd]\.[\s\u00A0]</beforebreak> <afterbreak></afterbreak> </rule> @@ -5593,12 +5605,12 @@ <rule break="no"> <!-- no break only for дол. США --> <beforebreak>\bдол\.[\h\v]*</beforebreak> <afterbreak>США</afterbreak> </rule> -<!-- п. 10 від 11.10.1933 --> +<!-- п. 10 від 11.10.1933, д. Василь --> <rule break="no"> -<beforebreak>(?&lt;!т\.[\h\v]?)\bп\.[\h\v]*</beforebreak> +<beforebreak>(?&lt;!т\.[\h\v]?)\b[пд]\.[\h\v]*</beforebreak> <afterbreak></afterbreak> </rule> <!-- усталені скорочення, що збігаються з нескороченими словами --> <rule break="no"> <beforebreak>\b(див)\.[\h\v]</beforebreak>