# code: # * George Moschovitis # * Anastasios Koutoumanos # * Elias Karakoulakis # # (c) 2004 Navel, all rights reserved. # $Id: string.rb 106 2004-10-25 11:31:11Z gmosx $ require "uri" module N; # = StringUtils # # General string utilities collection. # # === Design: # # Implement as a module to avoid class polution. You can # still Ruby's advanced features to include the module in your # class. Passing the object to act upon allows to check for nil, # which isn't possible if you use self. # # === TODO: # # - implement a method that returns easy to remember # pseudo-random strings # - add aliases for those methods in Kernel. # module StringUtils @@map_to_greeklish = { "á" => "a", "Á" => "A", "Ü" => "a", "¶" => "A", "â" => "b", "Â" => "B", "ã" => "g", "Ã" => "G", "ä" => "d", "Ä" => "D", "å" => "e", "Å" => "E", "Ý" => "e", "Å" => "E", "æ" => "z", "Æ" => "Z", "ç" => "h", "Ç" => "H", "Þ" => "h", "¹" => "H", "è" => "8", "È" => "8", "é" => "i", "É" => "I", "ß" => "i", "º" => "I", "ê" => "k", "Ê" => "K", "ë" => "l", "Ë" => "L", "ì" => "m", "Ì" => "M", "í" => "n", "Í" => "N", "î" => "3", "Î" => "3", "ï" => "o", "Ï" => "O", "ü" => "o", "¼" => "O", "ð" => "p", "Ð" => "P", "ñ" => "r", "Ñ" => "R", "ò" => "s", "ó" => "s", "Ó" => "S", "ô" => "t", "Ô" => "T", "õ" => "y", "Õ" => "Y", "ý" => "y", "¾" => "Y", "ö" => "f", "Ö" => "F", "÷" => "x", "×" => "X", "ø" => "ps","Ø" => "PS", "ù" => "w", "Ù" => "W", "þ" => "w", "¿"=>"W" } # Convert the input string to greeklish # def self.to_greeklish(input) return nil unless input output = "" # gmosx: also parse new lines input.scan(/./m) { |w| c = @@map_to_greeklish[w] output << (c.nil?? w: c) } return output end # Move this in String class? # # Tests a string for a valid value (non nil, not empty) # def self.valid?(string) return (not ((nil == string) or (string.empty?))) end # returns short abstract of long strings (first 'count' # characters, chopped at the nearest word, appended by '...') # force_cutoff: break forcibly at 'count' chars. Does not accept # count < 2. # def self.head(string, count = 128, force_cutoff = false, ellipsis="...") return nil unless string return nil if count < 2 if string.size > count cut_at = force_cutoff ? count : (string.index(' ', count-1) || count) xstring = string.slice(0, cut_at) return xstring.chomp(" ") + ellipsis else return string end end # Apply a set of rules (regular expression matches) to the # string # # === Requirements: # - the rules must be applied in order! So we cannot use a # hash because the ordering is not guaranteed! we use an # array instead. # # === Input: # the string to rewrite # the array containing rule-pairs (match, rewrite) # # === Output: # the rewritten string MATCH = 0 REWRITE = 1 def self.rewrite(string, rules) return nil unless string # gmosx: helps to find bugs raise ArgumentError.new("the rules parameter is nil") unless rules rewritten_string = string.dup for rule in rules rewritten_string.gsub!(rule[MATCH], rule[REWRITE]) end return (rewritten_string or string) end # Enforces a maximum width of a string inside an # html container. If the string exceeds this maximum width # the string gets wraped # # === Input: # the string to be wrapped # the enforced width # the separator used for wrapping # # === Output: # the wrapped string # # === Example: # text = "1111111111111111111111111111111111111111111" # text = Web::Utils::Html.wrap(text, 10, " ") # p text # => "1111111111 1111111111 1111111111" # # See the test cases to better understand the behaviour! # def self.wrap(string, width = 20, separator = " ") return nil unless string re = /([^#{separator}]{1,#{width}})/ wrapped_string = string.scan(re).join(separator) return wrapped_string end EXTENSION_REGEX = /\.(\w+)$/ # extracts the extension from a path or filename. # # === Input: # the path to a file (or just a plain filename) # # === Output: # the extension of the filename # returns nil if no exception # the extension is DOWNCASED! # def self.extension_from_path(path) if md = EXTENSION_REGEX.match(path) return md[1].downcase else return nil end end # Extracts the (file)name from a path or filename. # # === Input: # the path to a file (or just a plain filename) # # === Output: # the name part of the filename (the extension is stripped) # returns empty string if input is nil (safer?) # def self.filename_from_path(path) # gmosx: "" is safer, no??? and it is logical too. return "" unless path parts = path.split(EXTENSION_REGEX) return parts[0] end # Extracts the file part from a path or filename. # # === Input: # the path to a file (or just a plain filename) # # === Output: # the file part (the directory is stripped) # returns empty string if input is nil (safer?) # def self.file_from_path(path) return "" unless path return path.split("/").slice(-1) end # Extracts the directory part from a path or filename. # # === Input: # the path to a file (or just a plain filename) # # === Output: # the directory part (the filename is stripped) # returns empty string if input is nil (safer?) # def self.directory_from_path(path) return "" unless path return path.chomp(self.file_from_path(path)).chop() end # Replace dangerours chars in filenames # def self.rationalize_filename(filename) return nil unless filename # gmosx: rationalize a copy!!! (add unit test) xfilename = filename.dup() # gmosx: replace some dangerous chars! xfilename.gsub!(/ /, "-") xfilename.gsub!(/!/, "") xfilename.gsub!(/'/, "") xfilename.gsub!(/\(/, "") xfilename.gsub!(/\)/, "") xfilename = self.to_greeklish(xfilename) return xfilename end # Returns a random string. one possible use is # password initialization. # # === Input: # the maximum length of the string # # === Output: # the random string # def self.random(max_length = 8, char_re = /[\w\d]/) # gmosx: this is a nice example of input parameter checking. # this is NOT a real time called method so we can add this # check. Congrats to the author. raise ArgumentError.new("char_re must be a regular expression!") unless char_re.is_a?(Regexp) string = "" while string.length < max_length ch = rand(255).chr string << ch if ch =~ char_re end return string end # Converts unicode to ISO8859-7 (unfortunately Ruby is a little poor at i18n) # Usefull for parsing xmls with libxml. # # === Input: # the string to convert # # === Output: # the converted string # # === TODO: # - MORE TEST UNITS !!!! # def self.unicode_to_iso88597(string) return string.to_s.unpack('U*').collect {|ch| ch>127 ? ch-0x02d0 : ch }.pack('C*') end # Screen an IP address # # gmosx: copied this method from n1, check how it works! # def self.screen_ip_address(address) if address return address.split(',').collect { |hostip| hostip.gsub(/\.[^\.]*$/, ".*") }.join(', ') else return "*.*.*.*" end end end end # module