# -*- encoding : utf-8 -*- # TODO: move Card::Chunk to Card::Content::Chunk... require_dependency 'card/chunk' class Card class Content < SimpleDelegator attr_reader :revision, :format, :chunks, :opts def initialize content, format_or_card, opts={} @format = if format_or_card.is_a?(Card) Format.new format_or_card, format: nil else format_or_card end @opts = opts || {} unless Array === content content = parse_content content end super content end def card format.card end def chunk_list @opts[:chunk_list] || @format.chunk_list end def to_s case __getobj__ when Array then map(&:to_s) * '' when String then __getobj__ when NilClass then '' # raise "Nil Card::Content" else __getobj__.to_s end end def inspect "<#{__getobj__.class}:#{card}:#{self}>" end def each_chunk return enum_for(:each_chunk) unless block_given? case __getobj__ when Hash then each_value { |v| yield v if v.is_a?(Chunk::Abstract) } when Array then each { |e| yield e if e.is_a?(Chunk::Abstract) } when String # noop. strings are parsed in self, so no chunks in a String else Rails.logger.warn msg, 'error self is unrecognized type'\ " #{self.class} #{__getobj__.class}" end end def find_chunks chunk_type each_chunk.select { |chunk| chunk.is_a?(chunk_type) } end def process_content_object &block each_chunk { |chunk| chunk.process_chunk &block } self end def parse_content content @chunks = [] if content.is_a? String position = last_position = 0 prefix_regexp = Chunk.get_prefix_regexp chunk_list interval_string = '' while (prefix_match = content[position..-1].match(prefix_regexp)) prefix = prefix_match[0] # prefix of matched chunk chunk_start = prefix_match.begin(0) + position # content index of beginning of chunk if prefix_match.begin(0) > 0 # if matched chunk is not beginning of test string interval_string += content[position..chunk_start - 1] # hold onto the non-chunk part of the string end chunk_class = Chunk.find_class_by_prefix prefix # get the chunk class from the prefix match, offset = chunk_class.full_match content[chunk_start..-1], prefix # see whether the full chunk actually matches # (as opposed to bogus prefix) context_ok = chunk_class.context_ok? content, chunk_start # make sure there aren't contextual reasons for ignoring this chunk position = chunk_start # move scanning position up to beginning of chunk if match # we have a chunk match position += (match.end(0) - offset.to_i) # move scanning position up to end of chunk if context_ok @chunks << interval_string if interval_string.size > 0 # add the nonchunk string to the chunk list @chunks << chunk_class.new(match, self) # add the chunk to the chunk list interval_string = '' # reset interval string for next go-round last_position = position # note that the end of the chunk was the last place where a # chunk was found (so far) end else position += 1 # no match. look at the next character end if !match || !context_ok interval_string += content[chunk_start..position - 1] # moving beyond the alleged chunk. # append failed string to "nonchunk" string end end end if chunks.any? if last_position < content.size remainder = content[last_position..-1] # handle any leftover nonchunk string at the end of content @chunks << remainder end chunks else content end end ALLOWED_TAGS = {} %w{ br i b pre cite caption strong em ins sup sub del ol hr ul li p div h1 h2 h3 h4 h5 h6 span table tr td th tbody thead tfoot }.each { |tag| ALLOWED_TAGS[tag] = [] } # allowed attributes ALLOWED_TAGS.merge!( 'a' => ['href', 'title', 'target'], 'img' => ['src', 'alt', 'title'], 'code' => ['lang'], 'blockquote' => ['cite'] ) if Card.config.allow_inline_styles ALLOWED_TAGS['table'] += %w[ cellpadding align border cellspacing ] end ALLOWED_TAGS.each_key do |k| ALLOWED_TAGS[k] << 'class' ALLOWED_TAGS[k] << 'style' if Card.config.allow_inline_styles ALLOWED_TAGS[k] end ALLOWED_TAGS ATTR_VALUE_RE = [/(?<=^')[^']+(?=')/, /(?<=^")[^"]+(?=")/, /\S+/] class << self ## Method that cleans the String of HTML tags ## and attributes outside of the allowed list. # this has been hacked for card to allow classes if # the class begins with "w-" def clean!(string, tags=ALLOWED_TAGS) string.gsub(/<(\/*)(\w+)([^>]*)>/) do raw = $~ tag = raw[2].downcase if (attrs = tags[tag]) html_attribs = attrs.inject([tag]) do |pcs, attr| q = '"' rest_value = nil if raw[3] =~ /\b#{attr}\s*=\s*(?=(.))/i rest_value = $' (idx = %w{' "}.index($1)) && (q = $1) re = ATTR_VALUE_RE[idx || 2] if (match = rest_value.match(re)) rest_value = match[0] if attr == 'class' rest_value = rest_value.split(/\s+/).select do |s| s =~ /^w-/i end * ' ' end end end pcs << "#{attr}=#{q}#{rest_value}#{q}" unless rest_value.blank? pcs end * ' ' "<#{raw[1]}#{html_attribs}>" else ' ' end end.gsub(/<\!--.*?-->/, '') end if Card.config.space_last_in_multispace def clean_with_space_last! string, tags=ALLOWED_TAGS cwo = clean_without_space_last!(string, tags) cwo.gsub(/(?:^|\b) ((?: )+)/, '\1 ') end alias_method_chain :clean!, :space_last end def truncatewords_with_closing_tags input, words=25, truncate_string='...' if input.nil? then return end wordlist = input.to_s.split l = words.to_i - 1 l = 0 if l < 0 wordstring = wordlist.length > l ? wordlist[0..l].join(' ') : input.to_s # nuke partial tags at end of snippet wordstring.gsub!(/(<[^\>]+)$/, '') tags = [] # match tags with or without self closing (ie. ) wordstring.scan(/\<([^\>\s\/]+)[^\>]*?\>/).each do |t| tags.unshift(t[0]) end # match tags with self closing and mark them as closed wordstring.scan(/\<([^\>\s\/]+)[^\>]*?\/\>/).each do |t| if !(x = tags.index(t[0])).nil? then tags.slice!(x) end end # match close tags wordstring.scan(/\<\/([^\>\s\/]+)[^\>]*?\>/).each do |t| if !(x = tags.rindex(t[0])).nil? then tags.slice!(x) end end tags.each { |t| wordstring += "" } if wordlist.length > l wordstring += '...' end # wordstring += '...' if wordlist.length > l wordstring.gsub! /<[\/]?br[\s\/]*>/, ' ' # Also a hack -- get rid of
's -- they make line view ugly. wordstring.gsub! /<[\/]?p[^>]*>/, ' ' ## Also a hack -- get rid of
's -- they make line view ugly. wordstring end end end end