require 'nokogiri'
require 'addressable/uri'
require 'sequel'
require 'json'
require 'htmlentities'
require 'rack'
require 'sprockets'
# This gem provides mechanisms to allow ballons (or speech bubbles) to be
# added/removed/edited over images of a HTML or XHTML document and to be
# persisted. The edition of the ballons is possible by the javascript module
# provided by this gem. The persistence is allowed by the Ballonizer class.
# The Ballonizer class is basically a wrapper around the database used to
# persist the ballons, and offer methods to process the requests made by
# the client side (by a form created by the javascript module), and to modify
# a (X)HTML document adding the ballons of the image over it.
#
# This class lacks a lot of features like: access to an abstraction of the
# ballons, images and their relationship; control over users who edit the
# ballons; access to the old versions of the ballon set of a image (that
# are stored in the database, but only can be accessed directly by the
# Sequel::Database object). It's a work in progress, be warned to use
# carefully and motivated to contribute.
#
# The JavaScript library used to allow edition in the client side works
# as follows: double click over the image add a ballon, double click over
# a ballon allow edit the text, when the ballon lose the focus it returns
# to the non-edition state, a ballon without text (or only with spaces) it's
# automatically removed when lose focus, drag the ballon change its position
# (restricted to image space), drag ballon by the right-bottom handle
# resize the ballon (also restricted to image space). Any change in the ballons
# make visible a button fixed in the right-top corner of the browser viewport.
# Every time a ballons is changed (or added/removed) the json of a hidden
# form is updated. The button submits this json by POST request to the url
# configured by :form_handler_url setting.
#
# To the image be 'ballonized' it have to match the :img_to_ballonize_css_selector.
# The 'ballonized' term here means: have the ballons added over the image in
# ballonize_page.
#
# To use this class with your (rack isn't?) app you need to: create the
# necessary tables in a Sequel::Database object with Ballonizer.create_tables;
# create a ballonizer instance with the url where you gonna handle the ballon
# change requests and where provide the assets. Handle the ballon changes request
# in that url with process_submit. Call instance.ballonize_page over the html
# documents that can have the images to be ballonized. Check if the image match
# the css selector :img_to_ballonize_css_selector.
#
# What's explained above is basically the example you can access with
# 'rake example' and is in the examples/ballonizer_app/config.ru file.
# You can reset the database with 'rake db:reset' (and if you pass an argument
# as 'rake db:reset[postgres://user:password@host:port/database_name]'
# you can create the tables in the database already used by your app).
# The tables names are: images, ballons, ballonized_image_versions,
# ballonized_image_ballons.
#
# Changelog:
# v0.5.1:
# * js_load_snippet can take a settings arg too. Fixed ballonize_page to
# use the :form_handler_url from the settings argument.
# v0.5.0:
# * The *_html_links methods can take a settings argument.
# * Fixed bug where passing a new asset path to the ballonize_page don't
# settings parameter change the asset path that it uses.
# * Asset path settings now are parsed as real URIs (need to have a
# trailing slash if the intent is use as a dir).
# * Updated the rspec version used by the gem (fixed deprecation).
# v0.4.0:
# * Changed the way the Javascript module add containers in the page
# to avoid creating invalid HTML4.0.1/XHTML1.1/HTML5 documents.
# * Now the ballonize_page takes a mime-type argument to decide if
# the page has to be parsed as XML or HTML (trying to be in
# conformance with http://www.w3.org/TR/xhtml-media-types/).
# * The change in the ballon size now change the font-size of the
# ballon text.
# * Database schema change, as consequence of the font-size change,
# the database now stores the font-size. No migration provided for
# databases in the old format, but the font-size field can be null.
# The migration only require adding this column with null value to
# all records (see the create_tables code).
# * Fixed a bug in the Javascript module that give wrong position and
# size values to all ballons that aren't edited/added before submmiting
# (only if the image wasn't loaded before the javascript loading).
#
# @author Henrique Becker
class Ballonizer
# The superclass of any error explicitly raised by the Ballonizer class.
class Error < ArgumentError; end
# The class used in exceptions related to a invalid value for a submit.
class SubmitError < Error; end
attr_accessor :db, :settings
# @api private Don't use the methods of this module. They are for internal use only.
module Workaround
def self.join_uris(base, relative)
Addressable::URI.parse(base).join(relative).to_s
end
def self.deep_freeze(e)
e.each { | v | deep_freeze(v) } if e.is_a?(Enumerable)
e.freeze
end
def self.parse_html_or_xhtml(doc, mime_type)
# If you parse XHTML as HTML with Nokogiri, and use to_s after, the markup
# can be messed up, breaking the structural integrity of the xml
#
# Example:
# becomes
#
# In the other side if you parse HTML as a XML, and use to_s after, the
# Nokogiri make empty content tags self-close
#
# Example:
# becomes:
#
# What's even worse than the contrary (xml as html)
parsed_doc = nil
case mime_type
when /text\/html/
parsed_doc = Nokogiri::HTML(doc)
when /application\/xhtml\+xml/
options = Nokogiri::XML::ParseOptions::DEFAULT_XML &
Nokogiri::XML::ParseOptions::STRICT &
Nokogiri::XML::ParseOptions::NONET
begin
parsed_doc = Nokogiri::XML::Document.parse(doc, nil, nil, options)
rescue
return nil
end
else
fail Error, "the only mime-types accepted are text/html and" +
" application/xhtml+xml, the passed argument was " +
"'#{mime_type}'"
end
parsed_doc
end
end
private_constant :Workaround
# The load paths of assets inside the gem and the files inside each path,
# in the order they need to be included (the files of the first path need
# to be included before the files in the second path, and the files in the
# same path need to be included in the specified order).
# Give preference to the asset(s)_* and *_html_links methods over this constant.
ASSETS = Workaround.deep_freeze([
['vendor/assets/javascripts', [
'jquery-2.0.1.js',
'jquery.json-2.4.min.js',
'jquery-ui-1.10.3.custom.min.js']],
['lib/assets/javascripts', [
'ballonizer.js']],
['vendor/assets/stylesheets', [
'ui-lightness/jquery-ui-1.10.3.custom.min.css']],
['lib/assets/stylesheets', [
'ballonizer.css']]
])
# The default #settings
DEFAULT_SETTINGS = {
# The css selector used to define the elements to ballonize.
img_to_ballonize_css_selector: 'img.to_ballonize',
# A url to be used in the client-side action attribute of the form for
# ballon submition. The value will be used in the javascript snippet that
# initialize the ballonizer client javascript allowing ballon edition
# (and consequently creating the form).
form_handler_url: '#',
# Define if the javascript code that allow edition will be added to the page.
# (this don't refer to the jquery-* libs and the ballonizer.js only the
# snippet to execute when the page is ready)
add_js_for_edition: true,
# A path string to prefix each href of the css stylesheet links generated
# by the js_libs_html_links, and, possibly, added by the ballonize_page
# object. Example: if you use Ballonizer.assets_app mapped to '/assets'
# then use '/assets' here. This is used with the :add_required_css setting.
css_asset_path_for_link: nil,
# If the ballonize_page method will add or not the html generated by
# #css_html_links (require the :css_asset_path_for_link to be defined).
add_required_css: false,
# A path string to prefix each js source src generated by the
# object. Example: if you use Ballonizer.assets_app mapped to '/assets'
# then use '/assets' here. This is used with the
# :add_required_js_libs_for_edition setting.
js_asset_path_for_link: nil,
# If the ballonize_page method will add or not the html generated by
# #js_libs_html_links (require the :js_asset_path_for_link to be defined).
add_required_js_libs_for_edition: false,
# If true and the database argument don't have any of the tables used by
# the class call create_tables over the database argument. If false or the
# database has at leat one of the tables does nothing.
create_tables_if_none: false
}.freeze.each { | _, v| v.freeze }
USED_TABLES = [ :images, :ballons, :ballonized_image_ballons,
:ballonized_image_versions].freeze
private_constant :USED_TABLES
# The names (as symbols) of the tables used by instances of the class.
# @return [Array] An frozen array of symbols
def self.used_tables
USED_TABLES
end
# Create a new Ballonizer object from a Sequel Database (with the expected
# tables, that can be created with Ballonizer.create_tables) and a optional
# hash of settings.
# @param db [String, Sequel::Database] A Sequel::Database or a String to be
# used with Sequel::Database.connect. Is necessary to create the tables
# with Ballonizer.create_tables unless you have set the :create_table_if_none
# setting to true.
# @param settings [Hash{Symbol => String}] A optional hash of settings. The
# default value and explanation of each option are documented in the
# DEFAULT_SETTINGS constant.
# @return [Ballonizer] A new ballonizer instance.
# @see Ballonizer.create_tables
def initialize(db, settings = {})
@settings = DEFAULT_SETTINGS.merge(settings)
if db.is_a? String
db = Sequel::Database.connect(db)
end
if @settings[:create_tables_if_none] &&
! (self.class.used_tables.any? { | name | db.table_exists? name })
self.class.create_tables(db)
end
@db = db
end
# Convenience method for process_submit_json, extract the json from the
# request, validate and pass to the method.
# @param env A env Rack hash.
# @return [Ballonizer] The self, to allow chaining.
# @raise [JSON::ParserError, Ballonizer::SubmitError]
# @see process_submit_json
def process_submit(env, time = nil)
request = Rack::Request.new(env)
submit_json = request['ballonizer_data']
valid_submit_json?(submit_json, true)
process_submit_json(submit_json, time)
end
# Verify if the json is a valid output from the client counterpart.
# If the argument is valid untaint, otherwise taint (unless it's frozen).
# If the second parameter argument is true the method will throw
# exceptions when the input is invalid.
# @param submit_json [String] A JSON String.
# @param throw_exceptions [FalseClass,TrueClass] Define behaviour when the
# input is invalid. If true throw exceptions, otherwise only return false.
# Default value: false (don't throw exceptions).
# @return [true, false]
# @raise [JSON::ParserError, Ballonizer::SubmitError]
# @see valid_submit_hash?
# @note This is a instance method because, in the future, the validation
# can depend of instance settings.
def valid_submit_json?(submit_json, throw_exceptions=false)
parsed_submit = JSON.parse(submit_json)
valid_submit_hash?(parsed_submit, true)
submit_json.untaint unless submit_json.frozen?
true
rescue JSON::ParserError, SubmitError => e
submit_json.taint unless submit_json.frozen?
raise e if throw_exceptions
false
end
# Act as #valid_submit_json, but over a already parsed json and don't
# (un)taint the hash.
# @param submit_hash [Hash] A parsed JSON.
# @return [true, false]
# @raise [Ballonizer::SubmitError]
# @see valid_submit_json?
# @note This is a instance method because, in the future, the validation
# can depend of instance settings.
def valid_submit_hash?(submit_hash, throw_exceptions=false)
if submit_hash.empty?
fail SubmitError, "the submit request is empty"
end
submit_hash.each do | img_src, ballons |
unless img_src.is_a?(String)
# TODO: validate if valid URI?
# TODO: define img_src max lenght?
fail SubmitError, "the image src is a '#{img_src.class}' and not a String"
end
unless Addressable::URI.parse(img_src).absolute?
fail SubmitError, "the image src ('#{img_src.class}') is not an absolute URI"
end
unless ballons.is_a?(Array)
fail SubmitError, "the image with src '#{img_src}' is key of a " +
"'#{ballons.class}' and not a Array"
end
ballons.each do | ballon |
unless ballon["text"].is_a?(String)
fail SubmitError, "the ballon text is a '#{ballon.class}' and not" +
" a String"
end
if ballon["text"].empty?
fail SubmitError, "the ballon text is empty"
end
[:top, :left, :width, :height, :font_size].each do | numeric_attr_name |
numeric_attr = ballon[numeric_attr_name.to_s]
unless numeric_attr.is_a?(Fixnum) || numeric_attr.is_a?(Float)
fail SubmitError, "the #{numeric_attr_name} " +
"(#{numeric_attr}) isn't a Fixnum or " +
"Float (is a '#{numeric_attr.class}')"
end
end
[:top, :left, :width, :height].each do | bound_name |
bound = ballon[bound_name.to_s]
unless bound >= 0 && bound <= 1
fail SubmitError, "the #{bound_name.to_s} (#{bound.to_s}) isn't"
" between 0 and 1 (both inclusive)"
end
end
ballon_end = {}
ballon_end[:x] = ballon["left"] + ballon["width"]
ballon_end[:y] = ballon["top"] + ballon["height"]
[:x, :y].each do | axis |
if ballon_end[axis] > 1
side = { x: "right side", y: "bottom" }[axis]
fail SubmitError, "the ballon with text #{ballon["text"].to_s} " +
"is trespassing the #{side} of the image"
end
end
end
end
# if pass everything above return true
true
rescue SubmitError => exception
# HACK: "don't use exceptions for flow control", but this is the most DRY
# way...
if throw_exceptions then raise exception else false end
end
# Receive a untainted json (assume as validated by #valid_submit_json?)
# and add it to the database.
# @param submit_json [String] A untainted JSON string. Validated with #valid_submit_json?.
# @param time [Time] A Time instance to be used in place of Time.now. Optional.
# @return [Ballonizer] The self, to allow chaining.
# @raise [SecurityError] If the input is tainted.
def process_submit_json(submit_json, time = nil)
fail SecurityError, 'the input is tainted' if submit_json.tainted?
process_submit_hash(JSON.parse(submit_json), time)
end
# Behave as process_submit_json except that takes a already parsed json (hash)
# and don't check if it's tainted.
# @param submit_hash [Hash] A JSON hash. Validate with #valid_submit_json?.
# @param time [Time] A Time instance to be used in place of Time.now. Optional.
# @return [Ballonizer] The self, to allow chaining.
def process_submit_hash(submit_hash, time = nil)
time = Time.now unless time
self.db.transaction do
images = self.db[:images]
db_ballons = self.db[:ballons]
ballonized_image_versions = self.db[:ballonized_image_versions]
ballonized_image_ballons = self.db[:ballonized_image_ballons]
submit_hash.each do | img_src, ballons |
img_src = Addressable::URI.parse(img_src).normalize.to_s
db_image = images.first({img_src: img_src})
image_id, version = nil, nil
if db_image
image_id = db_image[:id]
version = ballonized_image_versions.where({image_id: image_id})
.max(:version) + 1
else
image_id = images.insert({img_src: img_src})
version = 1
end
ballonized_image_versions.insert({
image_id: image_id,
version: version,
time: time
})
ballons.each do | ballon |
db_ballon = db_ballons.first(ballon)
ballon_id = db_ballon ? db_ballon[:id] : db_ballons.insert(ballon)
ballonized_image_ballons.insert({
image_id: image_id,
version: version,
ballon_id: ballon_id,
})
end
end
end
end
# Wrap each image to ballonize with a container, add its ballons to the
# container and, possibly, add the css and js libs and snippet for the
# edition initialization. Don't make any change if the page has no images
# to ballonize. If the page can't be parsed (as HTML or X(HT)ML, depending
# of the mime-type) return the page argument without throwing any exceptions.
# Throw an exception if the mime-type doesn't match with html or xhtml.
# @param page [String] The (X)HTML page.
# @param page_url [String] The url of the page to be ballonized, necessary
# to make absolute the src attribute of img (if it's relative).
# @param settings [Hash{Symbol => String}] Optional. Hash to be merged with
# the instance #settings (this argument override the #settings ones).
# @param mime_type A string that have the substring 'text/html' or
# 'application/xhtml+xml'.
# @return [String] The ballonized page (new string), or the same string,
# if the parse has failed.
# @raise [Ballonizer::Error] If the mime-type don't match either 'text/html'
# or 'application/xhtml+xml'.
def ballonize_page(page, page_url, mime_type, settings = {})
settings = @settings.merge(settings)
# can raise Ballonizer::Error if the mime-type is invalid
parsed_page = Workaround.parse_html_or_xhtml(page, mime_type)
# if can't parse return the page unaltered
if parsed_page.nil?
return page
end
selector = settings[:img_to_ballonize_css_selector]
imgs = parsed_page.css(selector)
unless imgs.empty?
imgs.wrap('')
imgs.each do | img |
img_src = img['src']
absolute_normal_src = Addressable::URI.parse(page_url)
.join(img_src)
.normalize.to_s
ballons = last_ballon_set_of_image(absolute_normal_src)
ballons.each do | ballon |
img.add_previous_sibling(self.class.create_ballon_node(ballon))
end
end
head = parsed_page.at_css('head')
if settings[:add_required_css]
head.children.last.add_next_sibling(css_html_links(settings))
end
if settings[:add_required_js_libs_for_edition]
head.children.last.add_next_sibling(js_libs_html_links(settings))
end
if settings[:add_js_for_edition]
head.children.last.add_next_sibling(js_load_snippet(settings))
end
end
parsed_page.to_s
end
# @api private Don't use this method. It is for internal use only.
def self.create_ballon_node(ballon_data)
text = HTMLEntities.new.encode(ballon_data[:text])
style = ''
[:top, :left, :width, :height].each do | sym |
# transform ratio [0,1] to percent [0, 100]
style = style + "#{sym}: #{(ballon_data[sym] * 100)}%;"
end
style = style + "font-size: #{ballon_data[:font_size]}px;"
"#{text}"
end
# @api private
# Don't use this method. It is for internal use only.
# @note This method don't make distinction between a image in the database
# without any ballons (removed in the last version, by example) or a image
# that isn't in the database (both return a empty array).
def last_ballon_set_of_image(img_src)
db_image = self.db[:images].first({img_src: img_src})
if db_image
image_id = db_image[:id]
version = self.db[:ballonized_image_versions].where({image_id: image_id})
.max(:version)
self.db[:ballonized_image_ballons]
.join(:ballons, { ballonized_image_ballons__version: version,
ballonized_image_ballons__image_id: image_id,
ballonized_image_ballons__ballon_id: :ballons__id
}).select(:text, :top, :left, :width, :height,
:font_size).all
else
[]
end
end
# Return a String with the snippet added to the pages to allow edition in them.
# @param settings [Hash{Symbol => String}] Optional. Hash to be merged with
# the instance #settings (this argument override the #settings ones).
# @return [String] The added snippet. Already with the tag around it.
def js_load_snippet(settings = {})
settings = @settings.merge(settings)
<<-EOF
EOF
end
# Executes the create_table operations over the Sequel::Database argument.
# @param db [Sequel::Database] The database where create the tables.
# @return [void]
def self.create_tables(db)
db.create_table(:images) do
primary_key :id
String :img_src, :size => 255, :unique => true, :allow_null => false
end
db.create_table(:ballons) do
primary_key :id
String :text, :size => 255, :allow_null => false
Float :top, :allow_null => false
Float :left, :allow_null => false
Float :width, :allow_null => false
Float :height, :allow_null => false
# the font_size allow null to support databases migrated from old versions
# (that don't have this field)
Float :font_size, :allow_null => true
end
db.create_table(:ballonized_image_versions) do
Integer :version
foreign_key :image_id, :images
DateTime :time, :allow_null => false
primary_key [:version, :image_id]
end
db.create_table(:ballonized_image_ballons) do
Integer :version
foreign_key :image_id, :images
foreign_key :ballon_id, :ballons
foreign_key [:version, :image_id], :ballonized_image_versions
end
end
# The (X)HTML fragment with the link tags that are added to the page by
# ballonize_page if the :add_required_css setting is true (the default
# is false).
# @param settings [Hash{Symbol => String}] Optional. Hash to be merged with
# the instance #settings (this argument override the #settings ones).
# @return [String,NilClass] A String when the :css_asset_path_for_link is
# defined, nil otherwise.
def css_html_links(settings = {})
settings = @settings.merge(settings)
return nil unless settings[:css_asset_path_for_link]
link_template = ''
css_paths = self.class.asset_logical_paths.select do | p |
/^.+\.css$/.match(p)
end
links = css_paths.map do | p |
p = Workaround.join_uris(settings[:css_asset_path_for_link], p)
link_template.sub('PATH', p)
end
links.join('')
end
# The (X)HTML fragment with the script tags that are added to the page by
# ballonize_page if the :add_required_js_libs_for_edition setting is true
# (the default is false).
# @param settings [Hash{Symbol => String}] Optional. Hash to be merged with
# the instance #settings (this argument override the #settings ones).
# @return [String,NilClass] A String when the :js_asset_path_for_link is
# defined, nil otherwise.
def js_libs_html_links(settings = {})
settings = self.settings.merge(settings)
return nil unless settings[:js_asset_path_for_link]
link_template = ''
js_libs_paths = self.class.asset_logical_paths.select do | p |
/^.+\.js$/.match(p)
end
links = js_libs_paths.map do | p |
p = Workaround.join_uris(settings[:js_asset_path_for_link], p)
link_template.sub('PATH', p)
end
links.join('')
end
# List of paths (relative to the gem root directory) to the directories with
# the css and js provided by the gem.
# @return [Array] A frozen array of frozen strings.
def self.asset_load_paths
return @asset_load_paths if @asset_load_paths
absolute_lib_dir = File.dirname(File.realpath(__FILE__))
ballonizer_gem_root_dir = File.expand_path('../', absolute_lib_dir)
@asset_load_paths = ASSETS.map do | load_path_and_files |
load_path = load_path_and_files.first
File.expand_path(load_path, ballonizer_gem_root_dir)
end
@asset_load_paths.flatten!
@asset_load_paths.freeze
end
# List of logical paths to the css and js assets. The assets_app respond to
# any requisition to one of these paths.
# @return [Array] A frozen array of frozen strings.
def self.asset_logical_paths
return @asset_logical_paths if @asset_logical_paths
@asset_logical_paths = ASSETS.map do | load_path_and_files |
load_path_and_files.last
end
@asset_logical_paths.flatten!
@asset_logical_paths.freeze
end
# List of absolute filepaths to the css and js files needed by the client
# counterpart and provided by the gem. To all who not want to use assets_app.
# @return [Array] A frozen array of frozen strings.
# @see Ballonizer.assets_app
def self.asset_absolute_paths
return @asset_absolute_paths if @asset_absolute_paths
absolute_lib_dir = File.dirname(File.realpath(__FILE__))
ballonizer_gem_root_dir = File.expand_path('../', absolute_lib_dir)
@asset_absolute_paths = ASSETS.map do | load_path_and_files |
relative_load_path, filepaths = *load_path_and_files
absolute_load_path = File.expand_path(relative_load_path, ballonizer_gem_root_dir)
filepaths.map do | filepath |
File.expand_path(filepath, absolute_load_path)
end
end
@asset_absolute_paths.flatten!
@asset_absolute_paths.freeze
end
# A Rack app that provide the gem css and js. Each call to this method return
# a new object (clone). The Sprockets::Environment isn't frozen because it
# can't be used with 'run' in a rack app if frozen.
# @return [Sprockets::Environment]
# @see Ballonizer.assets_app
def self.assets_app
# dont freeze because run don't work in a frozen sprockets env
return @assets_app.clone if @assets_app
@assets_app = Sprockets::Environment.new
asset_load_paths.each do | load_path |
@assets_app.prepend_path load_path
end
@assets_app.clone
end
end