=begin Copyright (C) 2005 Jeff Rose Copyright (C) 2005 Sam Roberts This library is free software; you can redistribute it and/or modify it under the same terms as the ruby language itself, see the file COPYING for details. =end require 'date' require 'uri' module Icalendar class Parser < Icalendar::Base # 1*(ALPHA / DIGIT / "=") NAME = '[-a-z0-9]+' # <"> <"> QSTR = '"[^"]*"' # * PTEXT = '[^";:,]*' # param-value = ptext / quoted-string PVALUE = "#{PTEXT}|#{QSTR}" # Contentline LINE = "(#{NAME})([^:]*)\:(.*)" # param = name "=" param-value *("," param-value) PARAM = ";(#{NAME})(=?)((?:#{PVALUE})(?:,#{PVALUE})*)" # date = date-fullyear ["-"] date-month ["-"] date-mday # date-fullyear = 4 DIGIT # date-month = 2 DIGIT # date-mday = 2 DIGIT DATE = '(\d\d\d\d)-?(\d\d)-?(\d\d)' # time = time-hour [":"] time-minute [":"] time-second [time-secfrac] [time-zone] # time-hour = 2 DIGIT # time-minute = 2 DIGIT # time-second = 2 DIGIT # time-secfrac = "," 1*DIGIT # time-zone = "Z" / time-numzone # time-numzome = sign time-hour [":"] time-minute TIME = '(\d\d):?(\d\d):?(\d\d)(\.\d+)?(Z|[-+]\d\d:?\d\d)?' def initialize(src) @@logger.debug("New Calendar Parser") # Setup the parser method hash table setup_parsers() # Define the next line method different depending on whether # this is a string or an IO object so we can be efficient about # parsing large files... # Just do the unfolding work in one shot if its a whole string if src.respond_to?(:split) unfolded = [] # Split into an array of lines, then unfold those into a new array src.split(/\r?\n/).each do |line| # If it's a continuation line, add it to the last. # If it's an empty line, drop it from the input. if( line =~ /^[ \t]/ ) unfolded << unfolded.pop + line[1, line.size-1] elsif( line =~ /^$/ ) else unfolded << line end end @lines = unfolded @index = 0 # Now that we are unfolded we can just iterate through the array. # Dynamically define next line for a string. def next_line if @index == @lines.size return nil else line = @lines[@index] @index += 1 return line end end # If its a file we need to read and unfold on the go to save from reading # large amounts of data into memory. elsif src.respond_to?(:gets) @file = src @prev_line = src.gets if !@prev_line.nil? @prev_line.chomp! end # Dynamically define next line for an IO object def next_line line = @prev_line if line.nil? return nil end # Loop through until we get to a non-continuation line... loop do nextLine = @file.gets if !nextLine.nil? nextLine.chomp! end # If it's a continuation line, add it to the last. # If it's an empty line, drop it from the input. if( nextLine =~ /^[ \t]/ ) line << nextLine[1, nextLine.size] elsif( nextLine =~ /^$/ ) else @prev_line = nextLine break end end line end else raise ArgumentError, "CalendarParser.new cannot be called with a #{src.class} type!" end end # Parse the calendar into an object representation def parse calendars = [] # Outer loop for Calendar objects while (line = next_line) fields = parse_line(line) # Just iterate through until we find the beginning of a calendar object if fields[:name] == "BEGIN" and fields[:value] == "VCALENDAR" cal = parse_component calendars << cal end end calendars end private # Parse a single VCALENDAR object # -- This should consist of the PRODID, VERSION, option METHOD & CALSCALE, # and then one or more calendar components: VEVENT, VTODO, VJOURNAL, # VFREEBUSY, VTIMEZONE def parse_component(component = Calendar.new) while (line = next_line) fields = parse_line(line) name = fields[:name] # Although properties are supposed to come before components, we should # be able to handle them in any order... if name == "END" break elsif name == "BEGIN" # New component case(fields[:value]) when "VEVENT" component.events << parse_component(Event.new) when "VTODO" component.todos << parse_component(Todo.new) when "VJOURNAL" component.journals << parse_component(Journal.new) when "VFREEBUSY" component.freebusys << parse_component(Freebusy.new) when "VTIMEZONE" component.timezones << parse_component(Timezone.new) when "VALARM" component.alarms << parse_component(Alarm.new) end else # If its not a component then it should be a property # Just set the properties so that the parser can still # parse invalid files... @@logger.debug("Setting #{name} => #{fields[:value]}") # Lookup the property name to see if we have a string to # object parser for this property type. if @parsers.has_key?(name.upcase) val = @parsers[name.upcase].call(name, fields[:params], fields[:value]) else val = fields[:value] end if component.multi_property?(name.upcase) && component.properties val = [val] if fields[:params].empty? params = [nil] else params = fields[:params] end if component.properties.has_key?(name) component.properties[name] += val component.property_params[name] += params else component.properties[name] = val component.property_params[name] = params end else component.properties[name] = val unless fields[:params].empty? component.property_params[name] = fields[:params] end end end end component end def parse_line(line) unless line =~ %r{#{LINE}}i # Case insensitive match for a valid line raise "Invalid line in calendar string!" end name = $1.upcase # The case insensitive part is upcased for easier comparison... paramslist = $2 value = $3 params = {} # Collect the params, if any. if paramslist.size > 1 # v3.0 and v2.1 params paramslist.scan( %r{#{PARAM}}i ) do # param names are case-insensitive, and multi-valued pname = $1 pvals = $3 # v2.1 pvals have no '=' sign, figure out what kind of param it # is (either its a known encoding, or we treat it as a 'type' # param). if $2 == "" pvals = $1 case $1 when /quoted-printable/i pname = 'encoding' when /base64/i pname = 'encoding' else pname = 'type' end end unless params.key? pname params[pname] = [] end pvals.scan( %r{(#{PVALUE})} ) do if $1.size > 0 params[pname] << $1 end end end end {:name => name, :params => params, :value => value} end ## Following is a collection of parsing functions for various ## icalendar property value data types... First we setup ## a hash with property names pointing to methods... def setup_parsers @parsers = {} # Integer properties m = self.method(:parse_integer) @parsers["PERCENT-COMPLETE"] = m @parsers["PRIORITY"] = m @parsers["REPEAT"] = m @parsers["SEQUENCE"] = m # Dates and Times m = self.method(:parse_datetime) @parsers["COMPLETED"] = m @parsers["DTEND"] = m @parsers["DUE"] = m @parsers["DTSTART"] = m @parsers["RECURRENCE-ID"] = m @parsers["EXDATE"] = m @parsers["RDATE"] = m @parsers["CREATED"] = m @parsers["DTSTAMP"] = m @parsers["LAST-MODIFIED"] = m # URI's m = self.method(:parse_uri) @parsers["TZURL"] = m @parsers["ATTENDEE"] = m @parsers["ORGANIZER"] = m @parsers["URL"] = m # This is a URI by default, and if its not a valid URI # it will be returned as a string which works for binary data # the other possible type. @parsers["ATTACH"] = m # GEO m = self.method(:parse_geo) @parsers["GEO"] = m end # Booleans # NOTE: It appears that although this is a valid data type # there aren't any properties that use it... Maybe get # rid of this in the future. def parse_boolean(name, params, value) if value.upcase == "FALSE" false else true end end # Dates, Date-Times & Times # NOTE: invalid dates & times will be returned as strings... def parse_datetime(name, params, value) begin DateTime.parse(value) rescue Exception value end end # Durations # TODO: Need to figure out the best way to represent durations # so just returning string for now. def parse_duration(name, params, value) value end # Floats # NOTE: returns 0.0 if it can't parse the value def parse_float(name, params, value) value.to_f end # Integers # NOTE: returns 0 if it can't parse the value def parse_integer(name, params, value) value.to_i end # Periods # TODO: Got to figure out how to represent periods also... def parse_period(name, params, value) value end # Calendar Address's & URI's # NOTE: invalid URI's will be returned as strings... def parse_uri(name, params, value) begin URI.parse(value) rescue Exception value end end # Geographical location (GEO) # NOTE: returns an array with two floats (long & lat) # if the parsing fails return the string def parse_geo(name, params, value) strloc = value.split(';') if strloc.size != 2 return value end val = [] val[0] = strloc[0].to_f val[1] = strloc[1].to_f val end end end