class EmbeddedEmailMatcher # On Wed, Sep 25, 2013, at 03:57 PM, jorge_castro wrote: # On Thursday, June 27, 2013, knwang via Discourse Meta wrote: # On Wed, 2015-12-02 at 13:58 +0000, Tom Newsom wrote: # On 10/12/15 12:30, Jeff Atwood wrote: # ---- On Tue, 22 Dec 2015 14:17:36 +0530 Sam Saffron<info@discourse.org> wrote ---- # Op 24 aug. 2013 om 16:48 heeft ven88 via Discourse Meta het volgende geschreven: # Le 4 janv. 2016 19:03, "Neil Lalonde" a écrit : # Dnia 14 lip 2015 o godz. 00:25 Michael Downey napisał(a): # Em seg, 27 de jul de 2015 17:13, Neil Lalonde escreveu: # El jueves, 21 de noviembre de 2013, codinghorror escribió: # At 6/16/2016 08:32 PM, you wrote: ON_DATE_SOMEONE_WROTE_REGEXES ||= [ # Chinese /^[[:blank:]<>-]*在 ((?!\b(在|写道)\b)[\s\S])+?写道[[:blank:].:>-]*$/i, # Dutch /^[[:blank:]<>-]*Op ((?!\b(Op|het\svolgende\sgeschreven|schreef)\b)[\s\S])+?(het\svolgende\sgeschreven|schreef[^:]+)[[:blank:].:>-]*$/i, # English /^[[:blank:]<>-]*In message ((?!\b(In message|writes)\b)[\s\S])+?writes[[:blank:].:>-]*$/i, /^[[:blank:]<>-]*(On|At) ((?!\b(On|wrote|writes|says|said)\b)[\s\S])+?(wrote|writes|says|said)[[:blank:].:>-]*$/i, # French /^[[:blank:]<>-]*Le ((?!\b(Le|nous\sa\sdit|a\s+écrit)\b)[\s\S])+?(nous\sa\sdit|a\s+écrit)[[:blank:].:>-]*$/i, # German /^[[:blank:]<>-]*Am ((?!\b(Am|schrieben\sSie)\b)[\s\S])+?schrieben\sSie[[:blank:].:>-]*$/i, /^[[:blank:]<>-]*Am ((?!\b(Am|geschrieben)\b)[\s\S])+?(geschrieben|schrieb[^:]+)[[:blank:].:>-]*$/i, # Italian /^[[:blank:]<>-]*Il ((?!\b(Il|ha\sscritto)\b)[\s\S])+?ha\sscritto[[:blank:].:>-]*$/i, # Polish /^[[:blank:]<>-]*(Dnia|Dňa) ((?!\b(Dnia|Dňa|napisał)\b)[\s\S])+?napisał(\(a\))?[[:blank:].:>-]*$/i, # Portuguese /^[[:blank:]<>-]*Em ((?!\b(Em|escreveu)\b)[\s\S])+?escreveu[[:blank:].:>-]*$/i, # Spanish /^[[:blank:]<>-]*El ((?!\b(El|escribió)\b)[\s\S])+?escribió[[:blank:].:>-]*$/i, ] # Op 10 dec. 2015 18:35 schreef "Arpit Jalan" : # Am 18.09.2013 um 16:24 schrieb codinghorror : # Den 15. jun. 2016 kl. 20.42 skrev Jeff Atwood : # søn. 30. apr. 2017 kl. 00.26 skrev David Taylor : ON_DATE_WROTE_SOMEONE_MARKERS = [ # Dutch ["Op", "schreef"], # German ["Am", "schrieb"], # Norwegian ["Den", "skrev"], # Dutch ["søn\.", "skrev"], ] ON_DATE_WROTE_SOMEONE_REGEXES = ON_DATE_WROTE_SOMEONE_MARKERS.map do |on, wrote| /^[[:blank:]>]*#{on}\s.+\s#{wrote}\s[^:]+:/ end # суббота, 14 марта 2015 г. пользователь etewiah написал: DATE_SOMEONE_WROTE_MARKERS = [ # Russian ["пользователь", "написал"], # Polish ["", "napisał\\(a\\)"], # Ukrainian ["", "пише"], ] DATE_SOMEONE_WROTE_REGEXES = DATE_SOMEONE_WROTE_MARKERS.map do |user, wrote| user.size == 0 ? /^.*\d{4}.*?(?:(?!#{wrote})[\s\S])*#{wrote}:/ : /^.*\d{4}.*?#{user}.*?(?:(?!#{wrote})[\s\S])*#{wrote}:/ end # Max Mustermann schrieb am Fr., 28. Apr. 2017 um 11:53 Uhr: SOMEONE_WROTE_ON_DATE_REGEXES ||= [ # English /^.+\bwrote\b[[:space:]]+\bon\b.+[^:]+:/, # German /^.+\bschrieb\b[[:space:]]+\bam\b.+[^:]+:/, ] # 2016-03-03 17:21 GMT+01:00 Some One ISO_DATE_SOMEONE_REGEX = /^[[:blank:]>]*20\d\d-\d\d-\d\d \d\d:\d\d GMT\+\d\d:\d\d [\w[:blank:]]+$/ # 2015-10-18 0:17 GMT+03:00 Matt Palmer : # 2013/10/2 camilohollanda # вт, 5 янв. 2016 г. в 23:39, Erlend Sogge Heggen : # ср, 1 апр. 2015, 18:29, Denis Didkovsky : DATE_SOMEONE_EMAIL_REGEX = /^.*\d{4}.+\s?<[^@<>]+@[^@<>.]+\.[^@<>]+>:?$/ # codinghorror via Discourse Meta wrote: # codinghorror via Discourse Meta schrieb: SOMEONE_VIA_SOMETHING_WROTE_MARKERS = [ # English "wrote", # German "schrieb", ] SOMEONE_VIA_SOMETHING_WROTE_REGEXES = SOMEONE_VIA_SOMETHING_WROTE_MARKERS.map do |wrote| /^.+ via .+ #{wrote}:?[[:blank:]]*$/ end # Some One wrote: # Gavin Sinclair (gsinclair@soyabean.com.au) wrote: SOMEONE_EMAIL_WROTE_REGEX = /^.+\b[\w.+-]+@[\w.-]+\.\w{2,}\b.+wrote:?$/ # Posted by mpalmer on 01/21/2016 POSTED_BY_SOMEONE_ON_DATE_REGEX = /^[[:blank:]>]*Posted by .+ on \d{2}\/\d{2}\/\d{4}$/i # Begin forwarded message: # Reply Message # ----- Forwarded Message ----- # ----- Original Message ----- # -----Original Message----- # *----- Original Message -----* # ----- Reply message ----- # ------------------ 原始邮件 ------------------ FORWARDED_EMAIL_REGEXES = [ # English /^[[:blank:]>]*Begin forwarded message:/i, /^[[:blank:]>*]*-{2,}[[:blank:]]*(Forwarded|Original|Reply) Message[[:blank:]]*-{2,}/i, # French /^[[:blank:]>]*Début du message transféré :/i, /^[[:blank:]>*]*-{2,}[[:blank:]]*Message transféré[[:blank:]]*-{2,}/i, # German /^[[:blank:]>*]*-{2,}[[:blank:]]*Ursprüngliche Nachricht[[:blank:]]*-{2,}/i, # Spanish /^[[:blank:]>*]*-{2,}[[:blank:]]*Mensaje original[[:blank:]]*-{2,}/i, # Chinese /^[[:blank:]>*]*-{2,}[[:blank:]]*原始邮件[[:blank:]]*-{2,}/i, ] EMBEDDED_REGEXES = [ ON_DATE_SOMEONE_WROTE_REGEXES, ON_DATE_WROTE_SOMEONE_REGEXES, DATE_SOMEONE_WROTE_REGEXES, DATE_SOMEONE_EMAIL_REGEX, SOMEONE_WROTE_ON_DATE_REGEXES, ISO_DATE_SOMEONE_REGEX, SOMEONE_VIA_SOMETHING_WROTE_REGEXES, SOMEONE_EMAIL_WROTE_REGEX, POSTED_BY_SOMEONE_ON_DATE_REGEX, FORWARDED_EMAIL_REGEXES, ].flatten def self.match?(line) EMBEDDED_REGEXES.any? { |r| line =~ r } end end