module W3CValidators
class MarkupValidator < Validator
MARKUP_VALIDATOR_URI = 'http://validator.w3.org/check'
# Create a new instance of the MarkupValidator.
#
# ==== Options
# The +options+ hash allows you to set request parameters (see
# http://validator.w3.org/docs/api.html#requestformat) quickly. Request
# parameters can also be set using set_charset!, set_debug! and set_doctype!.
#
# You can pass in your own validator's URI (i.e.
# MarkupValidator.new(:validator_uri => 'http://localhost/check')).
#
# See Validator#new for proxy server options.
def initialize(options = {})
if options[:validator_uri]
@validator_uri = URI.parse(options[:validator_uri])
options.delete(options[:validator_uri])
else
@validator_uri = URI.parse(MARKUP_VALIDATOR_URI)
end
super(options)
end
# Specify the character encoding to use when parsing the document.
#
# When +only_as_fallback+ is +true+, the given encoding will only be
# used as a fallback value, in case the +charset+ is absent or unrecognized.
#
# +charset+ can be a string (e.g. set_charset!('utf-8')) or
# a symbol (e.g. set_charset!(:utf_8)) from the
# W3CValidators::CHARSETS hash.
#
# Has no effect when using validate_uri_quickly.
def set_charset!(charset, only_as_fallback = false)
if charset.kind_of?(Symbol)
if CHARSETS.has_key?(charset)
charset = CHARSETS[charset]
else
return
end
end
@options[:charset] = charset
@options[:fbc] = only_as_fallback
end
# Specify the Document Type (+DOCTYPE+) to use when parsing the document.
#
# When +only_as_fallback+ is +true+, the given document type will only be
# used as a fallback value, in case the document's +DOCTYPE+ declaration
# is missing or unrecognized.
#
# +doctype+ can be a string (e.g. set_doctype!('HTML 3.2')) or
# a symbol (e.g. set_doctype!(:html32)) from the
# W3CValidators::DOCTYPES hash.
#
# Has no effect when using validate_uri_quickly.
def set_doctype!(doctype, only_as_fallback = false)
if doctype.kind_of?(Symbol)
if DOCTYPES.has_key?(doctype)
doctype = DOCTYPES[doctype]
else
return
end
end
@options[:doctype] = doctype
@options[:fbd] = only_as_fallback
end
# When set the validator will output some extra debugging information on
# the validated resource (such as HTTP headers) and validation process
# (such as parser used, parse mode, etc.).
#
# Debugging information is stored in the Results +debug_messages+ hash.
# Custom debugging messages can be set with Results#add_debug_message.
#
# Has no effect when using validate_uri_quickly.
def set_debug!(debug = true)
@options[:debug] = debug
end
# Validate the markup of an URI using a +SOAP+ request.
#
# Returns W3CValidators::Results.
def validate_uri(uri)
return validate({:uri => uri}, false)
end
# Validate the markup of an URI using a +HEAD+ request.
#
# Returns W3CValidators::Results with an error count, not full error messages.
def validate_uri_quickly(uri)
return validate({:uri => uri}, true)
end
# Validate the markup of a string.
#
# Returns W3CValidators::Results.
def validate_text(text)
return validate({:fragment => text}, false)
end
# Validate the markup of a local file.
#
# +file_path+ may be either the fully-expanded path to the file or
# an IO object (like File).
#
# Returns W3CValidators::Results.
def validate_file(file_path)
if file_path.respond_to? :read
src = file_path.read
else
src = read_local_file(file_path)
end
return validate({:uploaded_file => src, :file_path => file_path}, false)
end
protected
def validate(options, quick = false) # :nodoc:
options = get_request_options(options)
if quick
response = send_request(options, :head)
@results = parse_head_response(response, options[:uri])
else
if options.has_key?(:uri)
response = send_request(options, :get)
else
response = send_request(options, :post)
end
@results = parse_soap_response(response.body)
end
@results
end
# Perform sanity checks on request params
def get_request_options(options) # :nodoc:
options = @options.merge(options)
options[:output] = SOAP_OUTPUT_PARAM
unless options[:uri] or options[:uploaded_file] or options[:fragment]
raise ArgumentError, "an uri, uploaded file or fragment is required."
end
# URI should be a string. If it is a URI object, .to_s will
# be seamless; if it is not an exception will be raised.
if options[:uri] and not options[:uri].kind_of?(String)
options[:uri] = options[:uri].to_s
end
# Convert booleans to integers
[:fbc, :fbd, :verbose, :debug, :ss, :outline].each do |k|
if options.has_key?(k) and not options[k].kind_of?(Fixnum)
options[k] = options[k] ? 1 : 0
end
end
options
end
# Parse the SOAP XML response.
#
# +response+ must be a Net::HTTPResponse.
#
# Returns W3CValidators::Results.
def parse_soap_response(response) # :nodoc:
doc = REXML::Document.new(response)
result_params = {}
{:doctype => 'm:doctype', :uri => 'm:uri', :charset => 'm:charset',
:checked_by => 'm:checkedby', :validity => 'm:validity'}.each do |local_key, remote_key|
if val = doc.elements["env:Envelope/env:Body/m:markupvalidationresponse/#{remote_key}"]
result_params[local_key] = val.text
end
end
results = Results.new(result_params)
{:warning => 'm:warnings/m:warninglist/m:warning', :error => 'm:errors/m:errorlist/m:error'}.each do |local_type, remote_type|
doc.elements.each("env:Envelope/env:Body/m:markupvalidationresponse/#{remote_type}") do |message|
message_params = {}
message.each_element_with_text do |el|
message_params[el.name.to_sym] = el.text
end
results.add_message(local_type, message_params)
end
end
doc.elements.each("env:Envelope/env:Body/env:Fault/env:Reason") do |message|
message.elements.each("env:Text") do |m|
results.add_message(:error, {:mesage => m.text})
end
end
doc.elements.each("env:Envelope/env:Body/m:markupvalidationresponse/m:debug") do |debug|
results.add_debug_message(debug.attribute('name').value, debug.text)
end
return results
rescue Exception => e
handle_exception e
end
# Parse the HEAD response into HTMLValidator::Results.
#
# +response+ must be a Net::HTTPResponse.
#
# Returns Results.
def parse_head_response(response, validated_uri = nil) # :nodoc:
validity = (response[HEAD_STATUS_HEADER].downcase == 'valid')
results = Results.new(:uri => validated_uri, :validity => validity)
# Fill the results with empty error messages so we can count them
errors = response[HEAD_ERROR_COUNT_HEADER].to_i
errors.times { results.add_error }
results
end
end
end