lib/readability.rb in ruby-readability-0.5.4 vs lib/readability.rb in ruby-readability-0.5.5

- old
+ new

@@ -129,9 +129,58 @@ def title title = @html.css("title").first title ? title.text : nil end + # Look through the @html document looking for the author + # Precedence Information here on the wiki: (TODO attach wiki URL if it is accepted) + # Returns nil if no author is detected + def author + # Let's grab this author: + # <meta name="dc.creator" content="Finch - http://www.getfinch.com" /> + author_elements = @html.xpath('//meta[@name = "dc.creator"]') + unless author_elements.empty? + author_elements.each do |element| + if element['content'] + return element['content'].strip + end + end + end + + # Now let's try to grab this + # <span class="byline author vcard"><span>By</span><cite class="fn">Austin Fonacier</cite></span> + # <div class="author">By</div><div class="author vcard"><a class="url fn" href="http://austinlivesinyoapp.com/">Austin Fonacier</a></div> + author_elements = @html.xpath('//*[contains(@class, "vcard")]//*[contains(@class, "fn")]') + unless author_elements.empty? + author_elements.each do |element| + if element.text + return element.text.strip + end + end + end + + # Now let's try to grab this + # <a rel="author" href="http://dbanksdesign.com">Danny Banks (rel)</a> + # TODO: strip out the (rel)? + author_elements = @html.xpath('//a[@rel = "author"]') + unless author_elements.empty? + author_elements.each do |element| + if element.text + return element.text.strip + end + end + end + + author_elements = @html.xpath('//*[@id = "author"]') + unless author_elements.empty? + author_elements.each do |element| + if element.text + return element.text.strip + end + end + end + end + def content(remove_unlikely_candidates = :default) @remove_unlikely_candidates = false if remove_unlikely_candidates == false prepare_candidates article = get_article(@candidates, @best_candidate)