# -*- encoding : utf-8 -*- require_dependency 'card/chunk' # you could make the case that Card::Chunk should be Card::Content::Chunk, which would make the above unnecessary (but create noise elsewhere) class Card class Content < SimpleDelegator attr_reader :revision, :format, :chunks def initialize content, format_or_card @format = if Card===format_or_card Format.new format_or_card, :format=>nil else format_or_card end unless Array === content content = parse_content content end super content end def card format.card end def to_s case __getobj__ when Array; map(&:to_s)*'' when String; __getobj__ when NilClass; raise "Nil Card::Content" else __getobj__.to_s end end def inspect "<#{__getobj__.class}:#{card}:#{self}>" end def each_chunk return enum_for(:each_chunk) unless block_given? case __getobj__ when Hash; each { |k,v| yield v if Chunk::Abstract===v } when Array; each { |e| yield e if Chunk::Abstract===e } when String; # strings are all parsed in self, so no chunks in a String else Rails.logger.warn "error self is unrecognized type #{self.class} #{self.__getobj__.class}" end end def find_chunks chunk_type each_chunk.select { |chunk| chunk.kind_of?(chunk_type) } end def process_content_object &block each_chunk { |chunk| chunk.process_chunk &block } self end def parse_content content @chunks = [] if String===content position = last_position = 0 prefix_regexp = Chunk.get_prefix_regexp card.chunk_list interval_string = '' while prefix_match = content[position..-1].match( prefix_regexp ) prefix = prefix_match[0] # prefix of matched chunk chunk_start = prefix_match.begin(0) + position # content index of beginning of chunk if prefix_match.begin(0) > 0 # if matched chunk is not beginning of test string interval_string += content[ position..chunk_start-1 ] # hold onto the non-chunk part of the string end chunk_class = Chunk.find_class_by_prefix prefix # get the chunk class from the prefix match, offset = chunk_class.full_match content[chunk_start..-1], prefix # see whether the full chunk actually matches (as opposed to bogus prefix) context_ok = chunk_class.context_ok? content, chunk_start # make sure there aren't contextual reasons for ignoring this chunk position = chunk_start # move scanning position up to beginning of chunk if match # we have a chunk match position += ( match.end(0) - offset.to_i ) # move scanning position up to end of chunk if context_ok # @chunks << interval_string if interval_string.size > 0 # add the nonchunk string to the chunk list @chunks << chunk_class.new( match, self ) # add the chunk to the chunk list interval_string = '' # reset interval string for next go-round last_position = position # note that the end of the chunk was the last place where a chunk was found (so far) end else position += 1 # no match. look at the next character end if !match || !context_ok interval_string += content[chunk_start..position-1] # moving beyond the alleged chunk. append failed string to "nonchunk" string end end end if chunks.any? if last_position < content.size remainder = content[ last_position..-1] # handle any leftover nonchunk string at the end of content @chunks << remainder end chunks else content end end ALLOWED_TAGS = {} %w{ br i b pre cite caption strong em ins sup sub del ol hr ul li p div h1 h2 h3 h4 h5 h6 span table tr td th tbody thead tfoot }.each { |tag| ALLOWED_TAGS[tag] = [] } # allowed attributes ALLOWED_TAGS.merge!( 'a' => ['href', 'title', 'target' ], 'img' => ['src', 'alt', 'title'], 'code' => ['lang'], 'blockquote' => ['cite'] ) if Wagn.config.allow_inline_styles ALLOWED_TAGS['table'] += %w[ cellpadding align border cellspacing ] end ALLOWED_TAGS.each_key {|k| ALLOWED_TAGS[k] << 'class' ALLOWED_TAGS[k] << 'style' if Wagn.config.allow_inline_styles ALLOWED_TAGS[k] } ALLOWED_TAGS ATTR_VALUE_RE = [ /(?<=^')[^']+(?=')/, /(?<=^")[^"]+(?=")/, /\S+/ ] class << self ## Method that cleans the String of HTML tags ## and attributes outside of the allowed list. # this has been hacked for wagn to allow classes if # the class begins with "w-" def clean!( string, tags = ALLOWED_TAGS ) string.gsub( /<(\/*)(\w+)([^>]*)>/ ) do raw = $~ tag = raw[2].downcase if attrs = tags[tag] "<#{raw[1]}#{ attrs.inject([tag]) do |pcs, attr| q='"' rest_value=nil if raw[3] =~ /\b#{attr}\s*=\s*(?=(.))/i rest_value = $' idx = %w{' "}.index($1) and q = $1 re = ATTR_VALUE_RE[ idx || 2 ] if match = rest_value.match(re) rest_value = match[0] if attr == 'class' rest_value = rest_value.split(/\s+/).find_all {|s| s=~/^w-/i}*' ' end end end pcs << "#{attr}=#{q}#{rest_value}#{q}" unless rest_value.blank? pcs end * ' ' }>" else " " end end.gsub(/<\!--.*?-->/, '') end def truncatewords_with_closing_tags(input, words = 25, truncate_string = "...") if input.nil? then return end wordlist = input.to_s.split l = words.to_i - 1 l = 0 if l < 0 wordstring = wordlist.length > l ? wordlist[0..l].join(" ") : input.to_s # nuke partial tags at end of snippet wordstring.gsub!(/(<[^\>]+)$/,'') tags = [] # match tags with or without self closing (ie. ) wordstring.scan(/\<([^\>\s\/]+)[^\>]*?\>/).each { |t| tags.unshift(t[0]) } # match tags with self closing and mark them as closed wordstring.scan(/\<([^\>\s\/]+)[^\>]*?\/\>/).each { |t| if !(x=tags.index(t[0])).nil? then tags.slice!(x) end } # match close tags wordstring.scan(/\<\/([^\>\s\/]+)[^\>]*?\>/).each { |t| if !(x=tags.rindex(t[0])).nil? then tags.slice!(x) end } tags.each {|t| wordstring += "" } wordstring +='...' if wordlist.length > l # wordstring += '...' if wordlist.length > l wordstring.gsub! /<[\/]?br[\s\/]*>/, ' ' ## Also a hack -- get rid of
's -- they make line view ugly. wordstring.gsub! /<[\/]?p[^>]*>/, ' ' ## Also a hack -- get rid of
's -- they make line view ugly. wordstring end end end end