require 'set'
require 'ffi-icu' # Text file charset detection
require 'distorted-floor/monkey_business/encoding'
require 'distorted-floor/monkey_business/string' # String#map
require 'distorted-floor/modular_technology/pango'
require 'distorted-floor/modular_technology/ttfunk'
require 'distorted-floor/modular_technology/vips/save'
require 'distorted-floor/checking_you_out'
using ::DistorteD::CHECKING::YOU::OUT
module Cooltrainer; end
module Cooltrainer::DistorteD; end
module Cooltrainer::DistorteD::Molecule; end
module Cooltrainer::DistorteD::Molecule::Text
#TODO: Generate separate images per-size to stop text being blurry from resizing.
include Cooltrainer::DistorteD::Technology::TTFunk
include Cooltrainer::DistorteD::Technology::Pango
include Cooltrainer::DistorteD::Technology::Vips::Save
# Track supported fonts by codepage.
# Avoid renaming these from the original archives / websites.
# Try not to go nuts here bloating the size of our Gem for a
# very niche feature, but I want to ensure good coverage too.
#
# Treat codepage 8859 documents as codepage 1252 to avoid breaking smart-
# quotes and other printable chars in 1252 that are control chars in 8859.
# https://encoding.spec.whatwg.org/#names-and-labels
#
# Numeric key for UTF-8 is codepage 65001 like Win32:
# https://docs.microsoft.com/en-us/windows/win32/intl/code-page-identifiers
FONT_FILENAME = {
:anonpro => 'Anonymous Pro.ttf'.freeze,
:anonpro_b => 'Anonymous Pro B.ttf'.freeze,
:anonpro_bi => 'Anonymous Pro BI.ttf'.freeze,
:anonpro_i => 'Anonymous Pro I.ttf'.freeze,
:lessperfectdosvga => 'LessPerfectDOSVGA.ttf'.freeze,
:moreperfectdisvga => 'MorePerfectDOSVGA.ttf'.freeze,
:perfectdosvgawin => 'Perfect DOS VGA 437 Win.ttf'.freeze,
:mona => 'mona.ttf'.freeze,
:perfectdosvga => 'Perfect DOS VGA 437.ttf'.freeze,
:profont => 'ProFontWindows.ttf'.freeze,
:profont_b => 'ProFontWindows-Bold.ttf'.freeze,
}
# Certain fonts are more suitable for certain codepages,
# so track each codepage's available fonts…
CODEPAGE_FONT = {
65001 => [
:anonpro,
:anonpro_b,
:anonpro_bi,
:anonpro_i,
],
1252 => [
:lessperfectdosvga,
:moreperfectdosvga,
:perfectdosvgawin,
],
932 => [
:mona,
],
850 => [
:profont,
:profont_b,
],
437 => [
:perfectdosvga,
],
}
# TODO: Figure out what to do here. ProFont isn't suitable for many (most?) Encodings,
# but the gem would be way way too big if I tried to include coverage for everything.
# Using system fonts is probably the solution, but I need to be able to get a path to them for VIPS.
CODEPAGE_FONT.default = Array[:profont, :profont_b]
# …as well as the inverse, the numeric codepage for each font:
FONT_CODEPAGE = self::CODEPAGE_FONT.each_with_object(Hash.new([])) { |(key, values), memo|
values.each { |value| memo[value] = key }
}
LOWER_WORLD = {
::CHECKING::YOU::OUT::from_ietf_media_type('text/plain') => nil,
::CHECKING::YOU::OUT::from_ietf_media_type('text/x-nfo') => nil,
}.transform_values { |v| Hash[
:encoding => Cooltrainer::Compound.new(:encoding, valid: Encoding, blurb: 'Character encoding used in this document. (default: automatically detect)', default: nil),
]}
OUTER_LIMITS = {
::CHECKING::YOU::OUT::from_ietf_media_type('text/plain') => nil,
::CHECKING::YOU::OUT::from_ietf_media_type('text/x-nfo') => nil,
}.merge(
Cooltrainer::DistorteD::Technology::Vips::Save::OUTER_LIMITS.dup.transform_values{ |v| Hash[
:spacing => Cooltrainer::Compound.new(:spacing, blurb: 'Document-wide character spacing style.', valid: Set[:monospace, :proportional]),
:dpi => Cooltrainer::Compound.new(:dpi, blurb: 'Dots per inch for text rendering.', valid: Integer, default: 144),
:font => Cooltrainer::Compound.new(:font, blurb: 'Font to use for text rendering.', valid: self::FONT_FILENAME.keys.to_set),
]}
)
self::LOWER_WORLD.keys.each { |t|
define_method(t.distorted_file_method) { |dest_root, change|
p change.paths(dest_root)
copy_file(change.paths(dest_root).first)
}
}
# Return a Pango Markup escaped version of the document.
def to_pango
# https://developer.gnome.org/glib/stable/glib-Simple-XML-Subset-Parser.html#g-markup-escape-text
escaped = text_file_utf8_content.map{ |c|
g_markup_escape_char(c)
}
if font_spacing == :monospace
"" << escaped << ""
else
escaped
end
end
protected
# Returns a boolean guess of whether our document uses box-drawing characters of a given Encoding.
def oobe?(encoding)
# Re-interpret our raw source file's bytes as the given Encoding,
# then take the codepoints seven at a time and see if any of those
# septagrams consist of all box-drawing characters of our given Encoding.
text_file_content.force_encoding(encoding).each_codepoint.each_cons(7).map{ |septagram|
septagram.uniq.length == 1 and Encoding::OOBE.fetch(encoding, nil)&.include?(septagram.first)
}.select(&TrueClass.method(:===)).length >= 1
end
def text_file_content
# VIPS makes us provide the text content as a single variable,
# so we may as well just one-shot File.read() it into memory.
# https://kunststube.net/encoding/
@text_file_content ||= File.read(path)
end
def text_file_utf8_content
# https://ruby-doc.org/core/Encoding/Converter.html#method-c-new
@text_file_utf8_content ||= text_file_encoding == Encoding::UTF_8 ?
text_file_content :
Encoding::Converter.new(
text_file_encoding,
Encoding::UTF_8,
undef: :replace,
invalid: :replace,
).convert(text_file_content)
end
def text_file_encoding
# It's not easy or even possible in some cases to tell the "true" codepage
# we should use for any given text document, but using character detection
# is worth a shot if the user gave us nothing.
#
# FFI-ICU::CharDet returns a Struct, e.g.:
# #
@text_file_encoding ||= begin
Encoding::find(ICU::CharDet.detect(text_file_content).name).yield_self { |detected|
# Fix files with ASCII/ANSI art (like NFOs) from being detected as ISO-8859-1
# when they should be IBM437 to display properly.
[
type_mars.include?(::CHECKING::YOU::OUT::from_ietf_media_type('text/x-nfo')), # Only certain souce file types.
detected == Encoding::ISO_8859_1, # Only if ICU detects ISO-8859-1.
oobe?(Encoding::IBM437), # Does this look like IBM437 based on box-drawing characters?
].all? ? Encoding::IBM437 : detected
}
rescue ArgumentError
# Raised by Encoding::find if we give it an unknown Encoding name.
Encoding::UTF_8
end
end
def vips_font
# Set the shorthand Symbol key for our chosen font.
CODEPAGE_FONT[text_file_encoding&.code_page].first
end
def to_vips_image(change)
# Load font metadata directly from the file so we don't have to
# duplicate it here to feed to Vips/Pango.
#
# irb(main)> font_meta.name.font_name
# => ["Perfect DOS VGA 437", "\x00P\x00e\x00r\x00f\x00e\x00c\x00t\x00 \x00D\x00O\x00S\x00 \x00V\x00G\x00A\x00 \x004\x003\x007"]
# irb(main)> font_meta.name.font_family
# => ["Perfect DOS VGA 437", "\x00P\x00e\x00r\x00f\x00e\x00c\x00t\x00 \x00D\x00O\x00S\x00 \x00V\x00G\x00A\x00 \x004\x003\x007"]
# irb(main)> font_meta.name.font_subfamily
# => ["Regular", "\x00R\x00e\x00g\x00u\x00l\x00a\x00r"]
# irb(main)> font_meta.name.postscript_name
# => "PerfectDOSVGA437"
# irb(main)> font_meta.line_gap
# => 0
# It would be gross to pass this through so many methods in this mostly-untouched-since-0.5 code,
# so just stick these directly into the instance variables used for memoization.
unless change.encoding.nil?
# TODO: Turning the String arguments into an Encoding should be a centralized thing
# of some sort, probably in Cooltrainer::Compound.
@text_file_encoding = change.encoding.is_a?(Encoding) ? change.encoding : Encoding::find(change.encoding)
end
# https://libvips.github.io/libvips/API/current/libvips-create.html#vips-text
Vips::Image.text(
# This string must be well-escaped Pango Markup:
# https://developer.gnome.org/pango/stable/pango-Markup.html
# However the official function for escaping text is
# not implemented in Ruby GLib, so we have to do it ourselves.
to_pango,
**{
# String absolute path to TTF
:fontfile => font_path,
# It's not enough to just specify the TTF path;
# we must also specify a font family, subfamily, and size.
:font => "#{font_name} 16",
# Space between lines (in Points).
:spacing => to_ttfunk.line_gap,
:justify => true, # Requires libvips 8.8
:dpi => change.dpi&.to_i,
},
)
end
# Return the String absolute path to the TTF file
def font_path
File.join(
Cooltrainer::DistorteD::GEM_ROOT, # DistorteD-Floor
'font'.freeze,
font_codepage.to_s,
font_filename,
)
end
# Returns the numeric representation of the codepage
# covered by our font.
def font_codepage
FONT_CODEPAGE.dig(vips_font).to_s
end
# Returns the basename (with file extension) of our font.
def font_filename
FONT_FILENAME.dig(vips_font)
end
end # Text