lib/site_checker/parse/page.rb in site_checker-0.3.0 vs lib/site_checker/parse/page.rb in site_checker-0.4.0
- old
+ new
@@ -1,82 +1,82 @@
module SiteChecker
- module Parse
- class Page
- def self.parse(content, ignore_list, root)
- links = []
- page = Nokogiri(content)
+ module Parse
+ class Page
+ def self.parse(content, ignore_list, root)
+ links = []
+ page = Nokogiri(content)
- links.concat(get_links(page, ignore_list, root))
- links.concat(get_images(page, ignore_list, root))
- links.concat(get_anchors(page))
- links.concat(local_pages_which_has_anchor_references(links, root))
+ links.concat(get_links(page, ignore_list, root))
+ links.concat(get_images(page, ignore_list, root))
+ links.concat(get_anchors(page))
+ links.concat(local_pages_which_has_anchor_references(links, root))
- links.uniq
- end
+ links.uniq
+ end
- private
- def self.get_links(page, ignore_list, root)
- links = []
- page.xpath("//a").reject {|a| ignored?(ignore_list, a['href'])}.each do |a|
- if a['href'].match(/(.*)#.+/) && !URI($1).absolute?
- kind = :anchor_ref
- else
- kind = :page
- end
- links << Link.create({:url => a['href'], :kind => kind})
- end
- set_location(links, root)
- end
+ private
+ def self.get_links(page, ignore_list, root)
+ links = []
+ page.xpath("//a").reject {|a| ignored?(ignore_list, a['href'])}.each do |a|
+ if a['href'].match(/(.*)#.+/) && !URI($1).absolute?
+ kind = :anchor_ref
+ else
+ kind = :page
+ end
+ links << Link.create({:url => a['href'], :kind => kind})
+ end
+ set_location(links, root)
+ end
- def self.get_images(page, ignore_list, root)
- links = []
- page.xpath("//img").reject {|img| ignored?(ignore_list, img['src'])}.each do |img|
- links << Link.create({:url => img['src'], :kind => :image})
- end
- set_location(links, root)
- end
+ def self.get_images(page, ignore_list, root)
+ links = []
+ page.xpath("//img").reject {|img| ignored?(ignore_list, img['src'])}.each do |img|
+ links << Link.create({:url => img['src'], :kind => :image})
+ end
+ set_location(links, root)
+ end
- def self.set_location(links, root)
- links.each do |link|
- uri = URI(link.url)
- if uri.to_s.start_with?(root)
- link.problem = "(absolute path)"
- link.location = :local
- else
- if uri.absolute?
- link.location = :remote
- else
- link.location = :local
- end
- end
- end
- end
+ def self.set_location(links, root)
+ links.each do |link|
+ uri = URI(link.url)
+ if uri.to_s.start_with?(root)
+ link.problem = "(absolute path)"
+ link.location = :local
+ else
+ if uri.absolute?
+ link.location = :remote
+ else
+ link.location = :local
+ end
+ end
+ end
+ end
- def self.ignored?(ignore_list, link)
- if link
- ignore_list.include? link
- else
- true
- end
- end
+ def self.ignored?(ignore_list, link)
+ if link
+ ignore_list.include? link
+ else
+ true
+ end
+ end
- def self.get_anchors(page)
- anchors = []
- page.xpath("//a").reject {|a| !a['id']}.each do |a|
- anchors << Link.create({:url => a['id'], :kind => :anchor})
- end
- anchors
- end
+ def self.get_anchors(page)
+ anchors = []
+ page.xpath("//a").reject {|a| !a['id']}.each do |a|
+ anchors << Link.create({:url => a['id'], :kind => :anchor})
+ end
+ anchors
+ end
- def self.local_pages_which_has_anchor_references(links, root)
- new_links = []
- links.find_all {|link| link.anchor_ref?}.each do |link|
- uri = URI(link.url)
- if link.url.match(/(.+)#/)
- new_links << Link.create({:url => $1, :kind => :page})
- end
- end
- set_location(new_links, root)
- end
- end
- end
+ def self.local_pages_which_has_anchor_references(links, root)
+ new_links = []
+ links.find_all {|link| link.anchor_ref?}.each do |link|
+ uri = URI(link.url)
+ if link.url.match(/(.+)#/)
+ new_links << Link.create({:url => $1, :kind => :page})
+ end
+ end
+ set_location(new_links, root)
+ end
+ end
+ end
end
\ No newline at end of file