# # An Encoding instance represents a character encoding usable in Ruby. It is # defined as a constant under the Encoding namespace. It has a name and # optionally, aliases: # # Encoding::ISO_8859_1.name # #=> "ISO-8859-1" # # Encoding::ISO_8859_1.names # #=> ["ISO-8859-1", "ISO8859-1"] # # Ruby methods dealing with encodings return or accept Encoding instances as # arguments (when a method accepts an Encoding instance as an argument, it can # be passed an Encoding name or alias instead). # # "some string".encoding # #=> # # # string = "some string".encode(Encoding::ISO_8859_1) # #=> "some string" # string.encoding # #=> # # # "some string".encode "ISO-8859-1" # #=> "some string" # # Encoding::ASCII_8BIT is a special encoding that is usually used for a byte # string, not a character string. But as the name insists, its characters in the # range of ASCII are considered as ASCII characters. This is useful when you # use ASCII-8BIT characters with other ASCII compatible characters. # # ## Changing an encoding # # The associated Encoding of a String can be changed in two different ways. # # First, it is possible to set the Encoding of a string to a new Encoding # without changing the internal byte representation of the string, with # String#force_encoding. This is how you can tell Ruby the correct encoding of a # string. # # string # #=> "R\xC3\xA9sum\xC3\xA9" # string.encoding # #=> # # string.force_encoding(Encoding::UTF_8) # #=> "R\u00E9sum\u00E9" # # Second, it is possible to transcode a string, i.e. translate its internal byte # representation to another encoding. Its associated encoding is also set to the # other encoding. See String#encode for the various forms of transcoding, and # the Encoding::Converter class for additional control over the transcoding # process. # # string # #=> "R\u00E9sum\u00E9" # string.encoding # #=> # # string = string.encode!(Encoding::ISO_8859_1) # #=> "R\xE9sum\xE9" # string.encoding # #=> # # # ## Script encoding # # All Ruby script code has an associated Encoding which any String literal # created in the source code will be associated to. # # The default script encoding is Encoding::UTF_8 after v2.0, but it can be # changed by a magic comment on the first line of the source code file (or # second line, if there is a shebang line on the first). The comment must # contain the word `coding` or `encoding`, followed by a colon, space and the # Encoding name or alias: # # # encoding: UTF-8 # # "some string".encoding # #=> # # # The `__ENCODING__` keyword returns the script encoding of the file which the # keyword is written: # # # encoding: ISO-8859-1 # # __ENCODING__ # #=> # # # `ruby -K` will change the default locale encoding, but this is not # recommended. Ruby source files should declare its script encoding by a magic # comment even when they only depend on US-ASCII strings or regular expressions. # # ## Locale encoding # # The default encoding of the environment. Usually derived from locale. # # see Encoding.locale_charmap, Encoding.find('locale') # # ## Filesystem encoding # # The default encoding of strings from the filesystem of the environment. This # is used for strings of file names or paths. # # see Encoding.find('filesystem') # # ## External encoding # # Each IO object has an external encoding which indicates the encoding that Ruby # will use to read its data. By default Ruby sets the external encoding of an IO # object to the default external encoding. The default external encoding is set # by locale encoding or the interpreter `-E` option. Encoding.default_external # returns the current value of the external encoding. # # ENV["LANG"] # #=> "UTF-8" # Encoding.default_external # #=> # # # $ ruby -E ISO-8859-1 -e "p Encoding.default_external" # # # # $ LANG=C ruby -e 'p Encoding.default_external' # # # # The default external encoding may also be set through # Encoding.default_external=, but you should not do this as strings created # before and after the change will have inconsistent encodings. Instead use # `ruby -E` to invoke ruby with the correct external encoding. # # When you know that the actual encoding of the data of an IO object is not the # default external encoding, you can reset its external encoding with # IO#set_encoding or set it at IO object creation (see IO.new options). # # ## Internal encoding # # To process the data of an IO object which has an encoding different from its # external encoding, you can set its internal encoding. Ruby will use this # internal encoding to transcode the data when it is read from the IO object. # # Conversely, when data is written to the IO object it is transcoded from the # internal encoding to the external encoding of the IO object. # # The internal encoding of an IO object can be set with IO#set_encoding or at IO # object creation (see IO.new options). # # The internal encoding is optional and when not set, the Ruby default internal # encoding is used. If not explicitly set this default internal encoding is # `nil` meaning that by default, no transcoding occurs. # # The default internal encoding can be set with the interpreter option `-E`. # Encoding.default_internal returns the current internal encoding. # # $ ruby -e 'p Encoding.default_internal' # nil # # $ ruby -E ISO-8859-1:UTF-8 -e "p [Encoding.default_external, \ # Encoding.default_internal]" # [#, #] # # The default internal encoding may also be set through # Encoding.default_internal=, but you should not do this as strings created # before and after the change will have inconsistent encodings. Instead use # `ruby -E` to invoke ruby with the correct internal encoding. # # ## IO encoding example # # In the following example a UTF-8 encoded string "Ru00E9sumu00E9" is transcoded # for output to ISO-8859-1 encoding, then read back in and transcoded to UTF-8: # # string = "R\u00E9sum\u00E9" # # open("transcoded.txt", "w:ISO-8859-1") do |io| # io.write(string) # end # # puts "raw text:" # p File.binread("transcoded.txt") # puts # # open("transcoded.txt", "r:ISO-8859-1:UTF-8") do |io| # puts "transcoded text:" # p io.read # end # # While writing the file, the internal encoding is not specified as it is only # necessary for reading. While reading the file both the internal and external # encoding must be specified to obtain the correct result. # # $ ruby t.rb # raw text: # "R\xE9sum\xE9" # # transcoded text: # "R\u00E9sum\u00E9" # class Encoding < Object # # Returns the hash of available encoding alias and original encoding name. # # Encoding.aliases # #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1968"=>"US-ASCII", # "SJIS"=>"Windows-31J", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"} # def self.aliases: () -> ::Hash[String, String] # # Checks the compatibility of two objects. # # If the objects are both strings they are compatible when they are # concatenatable. The encoding of the concatenated string will be returned if # they are compatible, nil if they are not. # # Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b") # #=> # # # Encoding.compatible?( # "\xa1".force_encoding("iso-8859-1"), # "\xa1\xa1".force_encoding("euc-jp")) # #=> nil # # If the objects are non-strings their encodings are compatible when they have # an encoding and: # * Either encoding is US-ASCII compatible # * One of the encodings is a 7-bit encoding # def self.compatible?: (untyped obj1, untyped obj2) -> Encoding? # # Returns default external encoding. # # The default external encoding is used by default for strings created from the # following locations: # # * CSV # * File data read from disk # * SDBM # * StringIO # * Zlib::GzipReader # * Zlib::GzipWriter # * String#inspect # * Regexp#inspect # # # While strings created from these locations will have this encoding, the # encoding may not be valid. Be sure to check String#valid_encoding?. # # File data written to disk will be transcoded to the default external encoding # when written, if default_internal is not nil. # # The default external encoding is initialized by the -E option. If -E isn't # set, it is initialized to UTF-8 on Windows and the locale on other operating # systems. # def self.default_external: () -> Encoding # # Sets default external encoding. You should not set Encoding::default_external # in ruby code as strings created before changing the value may have a different # encoding from strings created after the value was changed., instead you should # use `ruby -E` to invoke ruby with the correct default_external. # # See Encoding::default_external for information on how the default external # encoding is used. # def self.default_external=: (String arg0) -> String | (Encoding arg0) -> Encoding # # Returns default internal encoding. Strings will be transcoded to the default # internal encoding in the following places if the default internal encoding is # not nil: # # * CSV # * Etc.sysconfdir and Etc.systmpdir # * File data read from disk # * File names from Dir # * Integer#chr # * String#inspect and Regexp#inspect # * Strings returned from Readline # * Strings returned from SDBM # * Time#zone # * Values from ENV # * Values in ARGV including $PROGRAM_NAME # # # Additionally String#encode and String#encode! use the default internal # encoding if no encoding is given. # # The script encoding (__ENCODING__), not default_internal, is used as the # encoding of created strings. # # Encoding::default_internal is initialized with -E option or nil otherwise. # def self.default_internal: () -> Encoding? # # Sets default internal encoding or removes default internal encoding when # passed nil. You should not set Encoding::default_internal in ruby code as # strings created before changing the value may have a different encoding from # strings created after the change. Instead you should use `ruby -E` to invoke # ruby with the correct default_internal. # # See Encoding::default_internal for information on how the default internal # encoding is used. # def self.default_internal=: (String arg0) -> String? | (Encoding arg0) -> Encoding? | (nil arg0) -> nil # # Search the encoding with specified *name*. *name* should be a string. # # Encoding.find("US-ASCII") #=> # # # Names which this method accept are encoding names and aliases including # following special aliases # # "external" # : default external encoding # "internal" # : default internal encoding # "locale" # : locale encoding # "filesystem" # : filesystem encoding # # # An ArgumentError is raised when no encoding with *name*. Only # `Encoding.find("internal")` however returns nil when no encoding named # "internal", in other words, when Ruby has no default internal encoding. # def self.find: (String | Encoding arg0) -> Encoding # # Returns the list of loaded encodings. # # Encoding.list # #=> [#, #, # #] # # Encoding.find("US-ASCII") # #=> # # # Encoding.list # #=> [#, #, # #, #] # def self.list: () -> ::Array[Encoding] # # Returns the list of available encoding names. # # Encoding.name_list # #=> ["US-ASCII", "ASCII-8BIT", "UTF-8", # "ISO-8859-1", "Shift_JIS", "EUC-JP", # "Windows-31J", # "BINARY", "CP932", "eucJP"] # def self.name_list: () -> ::Array[String] # # Returns whether ASCII-compatible or not. # # Encoding::UTF_8.ascii_compatible? #=> true # Encoding::UTF_16BE.ascii_compatible? #=> false # def ascii_compatible?: () -> bool # # Returns true for dummy encodings. A dummy encoding is an encoding for which # character handling is not properly implemented. It is used for stateful # encodings. # # Encoding::ISO_2022_JP.dummy? #=> true # Encoding::UTF_8.dummy? #=> false # def dummy?: () -> bool # # Returns a string which represents the encoding for programmers. # # Encoding::UTF_8.inspect #=> "#" # Encoding::ISO_2022_JP.inspect #=> "#" # def inspect: () -> String # # Returns the name of the encoding. # # Encoding::UTF_8.name #=> "UTF-8" # def name: () -> String # # Returns the list of name and aliases of the encoding. # # Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J", "SJIS", "PCK"] # def names: () -> ::Array[String] # # Returns a replicated encoding of *enc* whose name is *name*. The new encoding # should have the same byte structure of *enc*. If *name* is used by another # encoding, raise ArgumentError. # def replicate: (String name) -> Encoding # # Returns the name of the encoding. # # Encoding::UTF_8.name #=> "UTF-8" # def to_s: () -> String end Encoding::ANSI_X3_4_1968: Encoding Encoding::ASCII: Encoding Encoding::ASCII_8BIT: Encoding Encoding::BIG5: Encoding Encoding::BIG5_HKSCS: Encoding Encoding::BIG5_HKSCS_2008: Encoding Encoding::BIG5_UAO: Encoding Encoding::BINARY: Encoding Encoding::Big5: Encoding Encoding::Big5_HKSCS: Encoding Encoding::Big5_HKSCS_2008: Encoding Encoding::Big5_UAO: Encoding Encoding::CESU_8: Encoding Encoding::CP1250: Encoding Encoding::CP1251: Encoding Encoding::CP1252: Encoding Encoding::CP1253: Encoding Encoding::CP1254: Encoding Encoding::CP1255: Encoding Encoding::CP1256: Encoding Encoding::CP1257: Encoding Encoding::CP1258: Encoding Encoding::CP437: Encoding Encoding::CP50220: Encoding Encoding::CP50221: Encoding Encoding::CP51932: Encoding Encoding::CP65000: Encoding Encoding::CP65001: Encoding Encoding::CP737: Encoding Encoding::CP775: Encoding Encoding::CP850: Encoding Encoding::CP852: Encoding Encoding::CP855: Encoding Encoding::CP857: Encoding Encoding::CP860: Encoding Encoding::CP861: Encoding Encoding::CP862: Encoding Encoding::CP863: Encoding Encoding::CP864: Encoding Encoding::CP865: Encoding Encoding::CP866: Encoding Encoding::CP869: Encoding Encoding::CP874: Encoding Encoding::CP878: Encoding Encoding::CP932: Encoding Encoding::CP936: Encoding Encoding::CP949: Encoding Encoding::CP950: Encoding Encoding::CP951: Encoding Encoding::CSWINDOWS31J: Encoding Encoding::CsWindows31J: Encoding Encoding::EBCDIC_CP_US: Encoding Encoding::EMACS_MULE: Encoding Encoding::EUCCN: Encoding Encoding::EUCJP: Encoding Encoding::EUCJP_MS: Encoding Encoding::EUCKR: Encoding Encoding::EUCTW: Encoding Encoding::EUC_CN: Encoding Encoding::EUC_JISX0213: Encoding Encoding::EUC_JIS_2004: Encoding Encoding::EUC_JP: Encoding Encoding::EUC_JP_MS: Encoding Encoding::EUC_KR: Encoding Encoding::EUC_TW: Encoding Encoding::Emacs_Mule: Encoding Encoding::EucCN: Encoding Encoding::EucJP: Encoding Encoding::EucJP_ms: Encoding Encoding::EucKR: Encoding Encoding::EucTW: Encoding Encoding::GB12345: Encoding Encoding::GB18030: Encoding Encoding::GB1988: Encoding Encoding::GB2312: Encoding Encoding::GBK: Encoding Encoding::IBM037: Encoding Encoding::IBM437: Encoding Encoding::IBM737: Encoding Encoding::IBM775: Encoding Encoding::IBM850: Encoding Encoding::IBM852: Encoding Encoding::IBM855: Encoding Encoding::IBM857: Encoding Encoding::IBM860: Encoding Encoding::IBM861: Encoding Encoding::IBM862: Encoding Encoding::IBM863: Encoding Encoding::IBM864: Encoding Encoding::IBM865: Encoding Encoding::IBM866: Encoding Encoding::IBM869: Encoding Encoding::ISO2022_JP: Encoding Encoding::ISO2022_JP2: Encoding Encoding::ISO8859_1: Encoding Encoding::ISO8859_10: Encoding Encoding::ISO8859_11: Encoding Encoding::ISO8859_13: Encoding Encoding::ISO8859_14: Encoding Encoding::ISO8859_15: Encoding Encoding::ISO8859_16: Encoding Encoding::ISO8859_2: Encoding Encoding::ISO8859_3: Encoding Encoding::ISO8859_4: Encoding Encoding::ISO8859_5: Encoding Encoding::ISO8859_6: Encoding Encoding::ISO8859_7: Encoding Encoding::ISO8859_8: Encoding Encoding::ISO8859_9: Encoding Encoding::ISO_2022_JP: Encoding Encoding::ISO_2022_JP_2: Encoding Encoding::ISO_2022_JP_KDDI: Encoding Encoding::ISO_8859_1: Encoding Encoding::ISO_8859_10: Encoding Encoding::ISO_8859_11: Encoding Encoding::ISO_8859_13: Encoding Encoding::ISO_8859_14: Encoding Encoding::ISO_8859_15: Encoding Encoding::ISO_8859_16: Encoding Encoding::ISO_8859_2: Encoding Encoding::ISO_8859_3: Encoding Encoding::ISO_8859_4: Encoding Encoding::ISO_8859_5: Encoding Encoding::ISO_8859_6: Encoding Encoding::ISO_8859_7: Encoding Encoding::ISO_8859_8: Encoding Encoding::ISO_8859_9: Encoding Encoding::KOI8_R: Encoding Encoding::KOI8_U: Encoding Encoding::MACCENTEURO: Encoding Encoding::MACCROATIAN: Encoding Encoding::MACCYRILLIC: Encoding Encoding::MACGREEK: Encoding Encoding::MACICELAND: Encoding Encoding::MACJAPAN: Encoding Encoding::MACJAPANESE: Encoding Encoding::MACROMAN: Encoding Encoding::MACROMANIA: Encoding Encoding::MACTHAI: Encoding Encoding::MACTURKISH: Encoding Encoding::MACUKRAINE: Encoding Encoding::MacCentEuro: Encoding Encoding::MacCroatian: Encoding Encoding::MacCyrillic: Encoding Encoding::MacGreek: Encoding Encoding::MacIceland: Encoding Encoding::MacJapan: Encoding Encoding::MacJapanese: Encoding Encoding::MacRoman: Encoding Encoding::MacRomania: Encoding Encoding::MacThai: Encoding Encoding::MacTurkish: Encoding Encoding::MacUkraine: Encoding Encoding::PCK: Encoding Encoding::SHIFT_JIS: Encoding Encoding::SJIS: Encoding Encoding::SJIS_DOCOMO: Encoding Encoding::SJIS_DoCoMo: Encoding Encoding::SJIS_KDDI: Encoding Encoding::SJIS_SOFTBANK: Encoding Encoding::SJIS_SoftBank: Encoding Encoding::STATELESS_ISO_2022_JP: Encoding Encoding::STATELESS_ISO_2022_JP_KDDI: Encoding Encoding::Shift_JIS: Encoding Encoding::Stateless_ISO_2022_JP: Encoding Encoding::Stateless_ISO_2022_JP_KDDI: Encoding Encoding::TIS_620: Encoding Encoding::UCS_2BE: Encoding Encoding::UCS_4BE: Encoding Encoding::UCS_4LE: Encoding Encoding::US_ASCII: Encoding Encoding::UTF8_DOCOMO: Encoding Encoding::UTF8_DoCoMo: Encoding Encoding::UTF8_KDDI: Encoding Encoding::UTF8_MAC: Encoding Encoding::UTF8_SOFTBANK: Encoding Encoding::UTF8_SoftBank: Encoding Encoding::UTF_16: Encoding Encoding::UTF_16BE: Encoding Encoding::UTF_16LE: Encoding Encoding::UTF_32: Encoding Encoding::UTF_32BE: Encoding Encoding::UTF_32LE: Encoding Encoding::UTF_7: Encoding Encoding::UTF_8: Encoding Encoding::UTF_8_HFS: Encoding Encoding::UTF_8_MAC: Encoding Encoding::WINDOWS_1250: Encoding Encoding::WINDOWS_1251: Encoding Encoding::WINDOWS_1252: Encoding Encoding::WINDOWS_1253: Encoding Encoding::WINDOWS_1254: Encoding Encoding::WINDOWS_1255: Encoding Encoding::WINDOWS_1256: Encoding Encoding::WINDOWS_1257: Encoding Encoding::WINDOWS_1258: Encoding Encoding::WINDOWS_31J: Encoding Encoding::WINDOWS_874: Encoding Encoding::Windows_1250: Encoding Encoding::Windows_1251: Encoding Encoding::Windows_1252: Encoding Encoding::Windows_1253: Encoding Encoding::Windows_1254: Encoding Encoding::Windows_1255: Encoding Encoding::Windows_1256: Encoding Encoding::Windows_1257: Encoding Encoding::Windows_1258: Encoding Encoding::Windows_31J: Encoding Encoding::Windows_874: Encoding # # Encoding conversion class. # class Encoding::Converter < Object end # # AFTER_OUTPUT # # Stop converting after some output is complete but before all of the input was # consumed. See primitive_convert for an example. # Encoding::Converter::AFTER_OUTPUT: Integer # # CRLF_NEWLINE_DECORATOR # # Decorator for converting LF to CRLF # Encoding::Converter::CRLF_NEWLINE_DECORATOR: Integer # # CR_NEWLINE_DECORATOR # # Decorator for converting LF to CR # Encoding::Converter::CR_NEWLINE_DECORATOR: Integer # # INVALID_MASK # # Mask for invalid byte sequences # Encoding::Converter::INVALID_MASK: Integer # # INVALID_REPLACE # # Replace invalid byte sequences # Encoding::Converter::INVALID_REPLACE: Integer # # PARTIAL_INPUT # # Indicates the source may be part of a larger string. See primitive_convert # for an example. # Encoding::Converter::PARTIAL_INPUT: Integer # # UNDEF_HEX_CHARREF # # Replace byte sequences that are undefined in the destination encoding with an # XML hexadecimal character reference. This is valid for XML conversion. # Encoding::Converter::UNDEF_HEX_CHARREF: Integer # # UNDEF_MASK # # Mask for a valid character in the source encoding but no related character(s) # in destination encoding. # Encoding::Converter::UNDEF_MASK: Integer # # UNDEF_REPLACE # # Replace byte sequences that are undefined in the destination encoding. # Encoding::Converter::UNDEF_REPLACE: Integer # # UNIVERSAL_NEWLINE_DECORATOR # # Decorator for converting CRLF and CR to LF # Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR: Integer # # XML_ATTR_CONTENT_DECORATOR # # Escape as XML AttValue # Encoding::Converter::XML_ATTR_CONTENT_DECORATOR: Integer # # XML_ATTR_QUOTE_DECORATOR # # Escape as XML AttValue # Encoding::Converter::XML_ATTR_QUOTE_DECORATOR: Integer # # XML_TEXT_DECORATOR # # Escape as XML CharData # Encoding::Converter::XML_TEXT_DECORATOR: Integer # # Raised by Encoding and String methods when the source encoding is incompatible # with the target encoding. # class Encoding::CompatibilityError < EncodingError end # # Raised by transcoding methods when a named encoding does not correspond with a # known converter. # class Encoding::ConverterNotFoundError < EncodingError end # # Raised by Encoding and String methods when the string being transcoded # contains a byte invalid for the either the source or target encoding. # class Encoding::InvalidByteSequenceError < EncodingError end # # Raised by Encoding and String methods when a transcoding operation fails. # class Encoding::UndefinedConversionError < EncodingError end