Sha256: 4c92c3b132b888cc7a13e4fce3155d49c010ad2f0a1fb5a35cff84f8cc5b2831
Contents?: true
Size: 1.24 KB
Versions: 4
Compression:
Stored size: 1.24 KB
Contents
require "rubygems" require "nokogiri" require "hpricot" require "parsley" require "benchmark" require "pp" YELP_HTML = File.dirname(__FILE__) + "/yelp.html" def noko parse Nokogiri.Hpricot(File.open(YELP_HTML)) end def hpri parse Hpricot(File.open(YELP_HTML)) end def parse(doc) out = {} out["name"] = (doc / "h1").first.inner_text out["phone"] = (doc / "#bizPhone").first.inner_text out["address"] = (doc / "address").first.inner_text out["reviews"] = (doc / ".nonfavoriteReview").map do |node| review = {} review["date"] = (node / ".ieSucks .smaller").first.inner_text review["user_name"] = (node / ".reviewer_info a").first.inner_text review["comment"] = (node / ".review_comment").first.inner_text review end end def pars parselet = Parsley.new({ "name" => "h1", "phone" => "#bizPhone", "address" => "address", "reviews(.nonfavoriteReview)" => [ { "date" => ".ieSucks .smaller", "user_name" => ".reviewer_info a", "comment" => ".review_comment" } ] }) parselet.parse(:file => YELP_HTML) end Benchmark.bm do |x| x.report("nokogiri: ") { 3.times { noko } } x.report("hpricot: ") { 3.times { hpri } } x.report("parsley: ") { 3.times { pars } } end
Version data entries
4 entries across 4 versions & 4 rubygems