require 'open3'
class HTMLAcceptanceResult
attr_accessor :resource, :html, :exceptions
def initialize(resource, html, datapath, options={})
@resource = resource
@html = html
@exceptions = ''
@datapath=datapath
@tidyopts = options[:tidy_opts] || "-qi"
valid?
end
# takes a .url.txt and loads the data into this object
def self.load_from_files(datapath)
resource = File.open("#{datapath}.resource.txt", 'r').read
html = File.open("#{datapath}.html.txt", 'r').read
HTMLAcceptanceResult.new(resource, html, datapath)
end
# Validates an html string using html tidy. If there are no warnings or exceptions, or
# there is a previously accepted exception string that matches exactly, valid? returns true
# Line numbers of exceptions are likely to change with any edit, so our validation
# compares the exception strings with the lines and columns removed. Name can be a filename,
# file system path, or url, so long it is uniquely associated with the passed in html.
def valid?
@exceptions = validate
File.delete(data_path("accepted.txt")) if File.exists?(data_path("accepted.txt")) if @exceptions == ''
valid=(@exceptions == '' or accepted?(@exceptions))
save_html_and_exceptions
valid
end
# Saves the exception string for the given url or file path. When next run, if the exception
# string is identical, valid? will return true. Note that #exceptions will still list the
# exception string, though, even if it is an accepted exception string.
def accept!
File.open(data_path("accepted.txt"), 'w') {|f| f.write(@exceptions) }
end
private
# We specifically prefer /usr/bin/tidyby default on *nix as there is another "tidy" programs
# that could end up earlier on the path. On snow leopard, tidy was installed at this location
# for me by default.
def tidy_command
is_windows = (RbConfig::CONFIG['host_os'] =~ /mswin|mingw|cygwin/)
bin=(is_windows or !File.exists?("/usr/bin/tidy")) ? 'tidy' : '/usr/bin/tidy'
"#{bin} #{@tidyopts}"
end
# get the filename for storing a type of data
def data_path(filetype)
"#{@datapath}.#{filetype}"
end
def save_html_and_exceptions
File.open(data_path("exceptions.txt"), 'w') {|f| f.write(@exceptions) }
File.open(data_path("html.txt"), 'w') {|f| f.write(@html) }
File.open(data_path("resource.txt"), 'w') {|f| f.write(@resource) }
end
# have we previously accepted this exact string for this path?
def accepted?(exception_str)
exception_str=filter(exception_str)
File.exists?(data_path('accepted.txt')) ? filter(File.open(data_path('accepted.txt'),"r").read) == exception_str : false
end
# Line numbers of exceptions are likely to change with any minor edit, so our validation
# compares the result strings with the lines and columns removed. This means that
# if the errors change position in the file (up or down b/c you add or remove code),
# accepted exception strings will remain valid.
def filter(str)
str.gsub!(/line [0-9]+ column [0-9]+ - Warning: trimming empty <[a-zA-Z]+>/, '') # the messages about empty are overzealous, and not invalid
str.gsub(/line [0-9]+ column [0-9]+ -/, '')
# /line [0-9]+ column [0-9]+ - / + =~ "line 1 column 1 - Warning: missing declaration"
end
def validate
stdin, stdout, stderr = Open3.popen3(tidy_command)
stdin.puts @html
stdin.close
result=stderr.read
end
end