/************************************************ digest.c - $Author$ created at: Fri May 25 08:57:27 JST 2001 Copyright (C) 1995-2001 Yukihiro Matsumoto Copyright (C) 2001-2006 Akinori MUSHA $RoughId: digest.c,v 1.16 2001/07/13 15:38:27 knu Exp $ $Id$ ************************************************/ #include "digest.h" static VALUE rb_mDigest; static VALUE rb_mDigest_Instance; static VALUE rb_cDigest_Class; static VALUE rb_cDigest_Base; static ID id_reset, id_update, id_finish, id_digest, id_hexdigest, id_digest_length; static ID id_metadata; RUBY_EXTERN void Init_digest_base(void); /* * Document-module: Digest * * This module provides a framework for message digest libraries. * * You may want to look at OpenSSL::Digest as it supports more algorithms. * * A cryptographic hash function is a procedure that takes data and returns a * fixed bit string: the hash value, also known as _digest_. Hash functions * are also called one-way functions, it is easy to compute a digest from * a message, but it is infeasible to generate a message from a digest. * * == Examples * * require 'digest' * * # Compute a complete digest * Digest::SHA256.digest 'message' #=> "\xABS\n\x13\xE4Y..." * * sha256 = Digest::SHA256.new * sha256.digest 'message' #=> "\xABS\n\x13\xE4Y..." * * # Other encoding formats * Digest::SHA256.hexdigest 'message' #=> "ab530a13e459..." * Digest::SHA256.base64digest 'message' #=> "q1MKE+RZFJgr..." * * # Compute digest by chunks * md5 = Digest::MD5.new * md5.update 'message1' * md5 << 'message2' # << is an alias for update * * md5.hexdigest #=> "94af09c09bb9..." * * # Compute digest for a file * sha256 = Digest::SHA256.file 'testfile' * sha256.hexdigest * * Additionally digests can be encoded in "bubble babble" format as a sequence * of consonants and vowels which is more recognizable and comparable than a * hexadecimal digest. * * require 'digest/bubblebabble' * * Digest::SHA256.bubblebabble 'message' #=> "xopoh-fedac-fenyh-..." * * See the bubble babble specification at * http://web.mit.edu/kenta/www/one/bubblebabble/spec/jrtrjwzi/draft-huima-01.txt. * * == Digest algorithms * * Different digest algorithms (or hash functions) are available: * * MD5:: * See RFC 1321 The MD5 Message-Digest Algorithm * RIPEMD-160:: * As Digest::RMD160. * See http://homes.esat.kuleuven.be/~bosselae/ripemd160.html. * SHA1:: * See FIPS 180 Secure Hash Standard. * SHA2 family:: * See FIPS 180 Secure Hash Standard which defines the following algorithms: * * SHA512 * * SHA384 * * SHA256 * * The latest versions of the FIPS publications can be found here: * http://csrc.nist.gov/publications/PubsFIPS.html. */ static VALUE hexencode_str_new(VALUE str_digest) { char *digest; size_t digest_len; size_t i; VALUE str; char *p; static const char hex[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; StringValue(str_digest); digest = RSTRING_PTR(str_digest); digest_len = RSTRING_LEN(str_digest); if (LONG_MAX / 2 < digest_len) { rb_raise(rb_eRuntimeError, "digest string too long"); } str = rb_usascii_str_new(0, digest_len * 2); for (i = 0, p = RSTRING_PTR(str); i < digest_len; i++) { unsigned char byte = digest[i]; p[i + i] = hex[byte >> 4]; p[i + i + 1] = hex[byte & 0x0f]; } RB_GC_GUARD(str_digest); return str; } /* * call-seq: * Digest.hexencode(string) -> hexencoded_string * * Generates a hex-encoded version of a given _string_. */ static VALUE rb_digest_s_hexencode(VALUE klass, VALUE str) { return hexencode_str_new(str); } NORETURN(static void rb_digest_instance_method_unimpl(VALUE self, const char *method)); /* * Document-module: Digest::Instance * * This module provides instance methods for a digest implementation * object to calculate message digest values. */ static void rb_digest_instance_method_unimpl(VALUE self, const char *method) { rb_raise(rb_eRuntimeError, "%s does not implement %s()", rb_obj_classname(self), method); } /* * call-seq: * digest_obj.update(string) -> digest_obj * digest_obj << string -> digest_obj * * Updates the digest using a given _string_ and returns self. * * The update() method and the left-shift operator are overridden by * each implementation subclass. (One should be an alias for the * other) */ static VALUE rb_digest_instance_update(VALUE self, VALUE str) { rb_digest_instance_method_unimpl(self, "update"); UNREACHABLE; } /* * call-seq: * digest_obj.instance_eval { finish } -> digest_obj * * Finishes the digest and returns the resulting hash value. * * This method is overridden by each implementation subclass and often * made private, because some of those subclasses may leave internal * data uninitialized. Do not call this method from outside. Use * #digest!() instead, which ensures that internal data be reset for * security reasons. */ static VALUE rb_digest_instance_finish(VALUE self) { rb_digest_instance_method_unimpl(self, "finish"); UNREACHABLE; } /* * call-seq: * digest_obj.reset -> digest_obj * * Resets the digest to the initial state and returns self. * * This method is overridden by each implementation subclass. */ static VALUE rb_digest_instance_reset(VALUE self) { rb_digest_instance_method_unimpl(self, "reset"); UNREACHABLE; } /* * call-seq: * digest_obj.new -> another_digest_obj * * Returns a new, initialized copy of the digest object. Equivalent * to digest_obj.clone().reset(). */ static VALUE rb_digest_instance_new(VALUE self) { VALUE clone = rb_obj_clone(self); rb_funcall(clone, id_reset, 0); return clone; } /* * call-seq: * digest_obj.digest -> string * digest_obj.digest(string) -> string * * If none is given, returns the resulting hash value of the digest, * keeping the digest's state. * * If a _string_ is given, returns the hash value for the given * _string_, resetting the digest to the initial state before and * after the process. */ static VALUE rb_digest_instance_digest(int argc, VALUE *argv, VALUE self) { VALUE str, value; if (rb_scan_args(argc, argv, "01", &str) > 0) { rb_funcall(self, id_reset, 0); rb_funcall(self, id_update, 1, str); value = rb_funcall(self, id_finish, 0); rb_funcall(self, id_reset, 0); } else { value = rb_funcall(rb_obj_clone(self), id_finish, 0); } return value; } /* * call-seq: * digest_obj.digest! -> string * * Returns the resulting hash value and resets the digest to the * initial state. */ static VALUE rb_digest_instance_digest_bang(VALUE self) { VALUE value = rb_funcall(self, id_finish, 0); rb_funcall(self, id_reset, 0); return value; } /* * call-seq: * digest_obj.hexdigest -> string * digest_obj.hexdigest(string) -> string * * If none is given, returns the resulting hash value of the digest in * a hex-encoded form, keeping the digest's state. * * If a _string_ is given, returns the hash value for the given * _string_ in a hex-encoded form, resetting the digest to the initial * state before and after the process. */ static VALUE rb_digest_instance_hexdigest(int argc, VALUE *argv, VALUE self) { VALUE str, value; if (rb_scan_args(argc, argv, "01", &str) > 0) { rb_funcall(self, id_reset, 0); rb_funcall(self, id_update, 1, str); value = rb_funcall(self, id_finish, 0); rb_funcall(self, id_reset, 0); } else { value = rb_funcall(rb_obj_clone(self), id_finish, 0); } return hexencode_str_new(value); } /* * call-seq: * digest_obj.hexdigest! -> string * * Returns the resulting hash value in a hex-encoded form and resets * the digest to the initial state. */ static VALUE rb_digest_instance_hexdigest_bang(VALUE self) { VALUE value = rb_funcall(self, id_finish, 0); rb_funcall(self, id_reset, 0); return hexencode_str_new(value); } /* * call-seq: * digest_obj.to_s -> string * * Returns digest_obj.hexdigest(). */ static VALUE rb_digest_instance_to_s(VALUE self) { return rb_funcall(self, id_hexdigest, 0); } /* * call-seq: * digest_obj.inspect -> string * * Creates a printable version of the digest object. */ static VALUE rb_digest_instance_inspect(VALUE self) { VALUE str; size_t digest_len = 32; /* about this size at least */ const char *cname; cname = rb_obj_classname(self); /* # */ str = rb_str_buf_new(2 + strlen(cname) + 2 + digest_len * 2 + 1); rb_str_buf_cat2(str, "#<"); rb_str_buf_cat2(str, cname); rb_str_buf_cat2(str, ": "); rb_str_buf_append(str, rb_digest_instance_hexdigest(0, 0, self)); rb_str_buf_cat2(str, ">"); return str; } /* * call-seq: * digest_obj == another_digest_obj -> boolean * digest_obj == string -> boolean * * If a string is given, checks whether it is equal to the hex-encoded * hash value of the digest object. If another digest instance is * given, checks whether they have the same hash value. Otherwise * returns false. */ static VALUE rb_digest_instance_equal(VALUE self, VALUE other) { VALUE str1, str2; if (rb_obj_is_kind_of(other, rb_mDigest_Instance) == Qtrue) { str1 = rb_digest_instance_digest(0, 0, self); str2 = rb_digest_instance_digest(0, 0, other); } else { str1 = rb_digest_instance_to_s(self); str2 = rb_check_string_type(other); if (NIL_P(str2)) return Qfalse; } /* never blindly assume that subclass methods return strings */ StringValue(str1); StringValue(str2); if (RSTRING_LEN(str1) == RSTRING_LEN(str2) && rb_str_cmp(str1, str2) == 0) { return Qtrue; } return Qfalse; } /* * call-seq: * digest_obj.digest_length -> integer * * Returns the length of the hash value of the digest. * * This method should be overridden by each implementation subclass. * If not, digest_obj.digest().length() is returned. */ static VALUE rb_digest_instance_digest_length(VALUE self) { /* subclasses really should redefine this method */ VALUE digest = rb_digest_instance_digest(0, 0, self); /* never blindly assume that #digest() returns a string */ StringValue(digest); return LONG2NUM(RSTRING_LEN(digest)); } /* * call-seq: * digest_obj.length -> integer * digest_obj.size -> integer * * Returns digest_obj.digest_length(). */ static VALUE rb_digest_instance_length(VALUE self) { return rb_funcall(self, id_digest_length, 0); } /* * call-seq: * digest_obj.block_length -> integer * * Returns the block length of the digest. * * This method is overridden by each implementation subclass. */ static VALUE rb_digest_instance_block_length(VALUE self) { rb_digest_instance_method_unimpl(self, "block_length"); UNREACHABLE; } /* * Document-class: Digest::Class * * This module stands as a base class for digest implementation * classes. */ /* * call-seq: * Digest::Class.digest(string, *parameters) -> hash_string * * Returns the hash value of a given _string_. This is equivalent to * Digest::Class.new(*parameters).digest(string), where extra * _parameters_, if any, are passed through to the constructor and the * _string_ is passed to #digest(). */ static VALUE rb_digest_class_s_digest(int argc, VALUE *argv, VALUE klass) { VALUE str; volatile VALUE obj; if (argc < 1) { rb_raise(rb_eArgError, "no data given"); } str = *argv++; argc--; StringValue(str); obj = rb_obj_alloc(klass); rb_obj_call_init(obj, argc, argv); return rb_funcall(obj, id_digest, 1, str); } /* * call-seq: * Digest::Class.hexdigest(string[, ...]) -> hash_string * * Returns the hex-encoded hash value of a given _string_. This is * almost equivalent to * Digest.hexencode(Digest::Class.new(*parameters).digest(string)). */ static VALUE rb_digest_class_s_hexdigest(int argc, VALUE *argv, VALUE klass) { return hexencode_str_new(rb_funcallv(klass, id_digest, argc, argv)); } /* :nodoc: */ static VALUE rb_digest_class_init(VALUE self) { return self; } /* * Document-class: Digest::Base * * This abstract class provides a common interface to message digest * implementation classes written in C. * * ==Write a Digest subclass in C * Digest::Base provides a common interface to message digest * classes written in C. These classes must provide a struct * of type rb_digest_metadata_t: * typedef int (*rb_digest_hash_init_func_t)(void *); * typedef void (*rb_digest_hash_update_func_t)(void *, unsigned char *, size_t); * typedef int (*rb_digest_hash_finish_func_t)(void *, unsigned char *); * * typedef struct { * int api_version; * size_t digest_len; * size_t block_len; * size_t ctx_size; * rb_digest_hash_init_func_t init_func; * rb_digest_hash_update_func_t update_func; * rb_digest_hash_finish_func_t finish_func; * } rb_digest_metadata_t; * * This structure must be set as an instance variable named +metadata+ * (without the +@+ in front of the name). By example: * static const rb_digest_metadata_t sha1 = { * RUBY_DIGEST_API_VERSION, * SHA1_DIGEST_LENGTH, * SHA1_BLOCK_LENGTH, * sizeof(SHA1_CTX), * (rb_digest_hash_init_func_t)SHA1_Init, * (rb_digest_hash_update_func_t)SHA1_Update, * (rb_digest_hash_finish_func_t)SHA1_Finish, * }; * * * rb_ivar_set(cDigest_SHA1, rb_intern("metadata"), * rb_digest_make_metadata(&sha1)); */ #ifdef DIGEST_USE_RB_EXT_RESOLVE_SYMBOL static const rb_data_type_t metadata_type = { "digest/metadata", {0}, }; RUBY_FUNC_EXPORTED VALUE rb_digest_wrap_metadata(const rb_digest_metadata_t *meta) { return rb_obj_freeze(TypedData_Wrap_Struct(0, &metadata_type, (void *)meta)); } #endif static rb_digest_metadata_t * get_metadata_ptr(VALUE obj) { rb_digest_metadata_t *algo; #ifdef DIGEST_USE_RB_EXT_RESOLVE_SYMBOL if (!rb_typeddata_is_kind_of(obj, &metadata_type)) return 0; algo = RTYPEDDATA_DATA(obj); #else # undef RUBY_UNTYPED_DATA_WARNING # define RUBY_UNTYPED_DATA_WARNING 0 Data_Get_Struct(obj, rb_digest_metadata_t, algo); #endif return algo; } static rb_digest_metadata_t * get_digest_base_metadata(VALUE klass) { VALUE p; VALUE obj; rb_digest_metadata_t *algo; for (p = klass; !NIL_P(p); p = rb_class_superclass(p)) { if (rb_ivar_defined(p, id_metadata)) { obj = rb_ivar_get(p, id_metadata); break; } } if (NIL_P(p)) rb_raise(rb_eRuntimeError, "Digest::Base cannot be directly inherited in Ruby"); algo = get_metadata_ptr(obj); if (!algo) { if (p == klass) rb_raise(rb_eTypeError, "%"PRIsVALUE"::metadata is not initialized properly", klass); else rb_raise(rb_eTypeError, "%"PRIsVALUE"(%"PRIsVALUE")::metadata is not initialized properly", klass, p); } switch (algo->api_version) { case 3: break; /* * put conversion here if possible when API is updated */ default: rb_raise(rb_eRuntimeError, "Incompatible digest API version"); } return algo; } static rb_digest_metadata_t * get_digest_obj_metadata(VALUE obj) { return get_digest_base_metadata(rb_obj_class(obj)); } static const rb_data_type_t digest_type = { "digest", {0, RUBY_TYPED_DEFAULT_FREE, 0,}, 0, 0, (RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED), }; static inline void algo_init(const rb_digest_metadata_t *algo, void *pctx) { if (algo->init_func(pctx) != 1) { rb_raise(rb_eRuntimeError, "Digest initialization failed."); } } static VALUE rb_digest_base_alloc(VALUE klass) { rb_digest_metadata_t *algo; VALUE obj; void *pctx; if (klass == rb_cDigest_Base) { rb_raise(rb_eNotImpError, "Digest::Base is an abstract class"); } algo = get_digest_base_metadata(klass); obj = rb_data_typed_object_zalloc(klass, algo->ctx_size, &digest_type); pctx = RTYPEDDATA_DATA(obj); algo_init(algo, pctx); return obj; } /* :nodoc: */ static VALUE rb_digest_base_copy(VALUE copy, VALUE obj) { rb_digest_metadata_t *algo; void *pctx1, *pctx2; if (copy == obj) return copy; rb_check_frozen(copy); algo = get_digest_obj_metadata(copy); if (algo != get_digest_obj_metadata(obj)) rb_raise(rb_eTypeError, "different algorithms"); TypedData_Get_Struct(obj, void, &digest_type, pctx1); TypedData_Get_Struct(copy, void, &digest_type, pctx2); memcpy(pctx2, pctx1, algo->ctx_size); return copy; } /* * call-seq: digest_base.reset -> digest_base * * Reset the digest to its initial state and return +self+. */ static VALUE rb_digest_base_reset(VALUE self) { rb_digest_metadata_t *algo; void *pctx; algo = get_digest_obj_metadata(self); TypedData_Get_Struct(self, void, &digest_type, pctx); algo_init(algo, pctx); return self; } /* * call-seq: * digest_base.update(string) -> digest_base * digest_base << string -> digest_base * * Update the digest using given _string_ and return +self+. */ static VALUE rb_digest_base_update(VALUE self, VALUE str) { rb_digest_metadata_t *algo; void *pctx; algo = get_digest_obj_metadata(self); TypedData_Get_Struct(self, void, &digest_type, pctx); StringValue(str); algo->update_func(pctx, (unsigned char *)RSTRING_PTR(str), RSTRING_LEN(str)); RB_GC_GUARD(str); return self; } /* :nodoc: */ static VALUE rb_digest_base_finish(VALUE self) { rb_digest_metadata_t *algo; void *pctx; VALUE str; algo = get_digest_obj_metadata(self); TypedData_Get_Struct(self, void, &digest_type, pctx); str = rb_str_new(0, algo->digest_len); algo->finish_func(pctx, (unsigned char *)RSTRING_PTR(str)); /* avoid potential coredump caused by use of a finished context */ algo_init(algo, pctx); return str; } /* * call-seq: digest_base.digest_length -> Integer * * Return the length of the hash value in bytes. */ static VALUE rb_digest_base_digest_length(VALUE self) { rb_digest_metadata_t *algo; algo = get_digest_obj_metadata(self); return SIZET2NUM(algo->digest_len); } /* * call-seq: digest_base.block_length -> Integer * * Return the block length of the digest in bytes. */ static VALUE rb_digest_base_block_length(VALUE self) { rb_digest_metadata_t *algo; algo = get_digest_obj_metadata(self); return SIZET2NUM(algo->block_len); } void Init_digest(void) { #undef rb_intern id_reset = rb_intern("reset"); id_update = rb_intern("update"); id_finish = rb_intern("finish"); id_digest = rb_intern("digest"); id_hexdigest = rb_intern("hexdigest"); id_digest_length = rb_intern("digest_length"); id_metadata = rb_id_metadata(); InitVM(digest); } void InitVM_digest(void) { /* * module Digest */ rb_mDigest = rb_define_module("Digest"); #ifdef HAVE_RB_EXT_RACTOR_SAFE rb_ext_ractor_safe(true); #endif /* module functions */ rb_define_module_function(rb_mDigest, "hexencode", rb_digest_s_hexencode, 1); /* * module Digest::Instance */ rb_mDigest_Instance = rb_define_module_under(rb_mDigest, "Instance"); /* instance methods that should be overridden */ rb_define_method(rb_mDigest_Instance, "update", rb_digest_instance_update, 1); rb_define_method(rb_mDigest_Instance, "<<", rb_digest_instance_update, 1); rb_define_private_method(rb_mDigest_Instance, "finish", rb_digest_instance_finish, 0); rb_define_method(rb_mDigest_Instance, "reset", rb_digest_instance_reset, 0); rb_define_method(rb_mDigest_Instance, "digest_length", rb_digest_instance_digest_length, 0); rb_define_method(rb_mDigest_Instance, "block_length", rb_digest_instance_block_length, 0); /* instance methods that may be overridden */ rb_define_method(rb_mDigest_Instance, "==", rb_digest_instance_equal, 1); rb_define_method(rb_mDigest_Instance, "inspect", rb_digest_instance_inspect, 0); /* instance methods that need not usually be overridden */ rb_define_method(rb_mDigest_Instance, "new", rb_digest_instance_new, 0); rb_define_method(rb_mDigest_Instance, "digest", rb_digest_instance_digest, -1); rb_define_method(rb_mDigest_Instance, "digest!", rb_digest_instance_digest_bang, 0); rb_define_method(rb_mDigest_Instance, "hexdigest", rb_digest_instance_hexdigest, -1); rb_define_method(rb_mDigest_Instance, "hexdigest!", rb_digest_instance_hexdigest_bang, 0); rb_define_method(rb_mDigest_Instance, "to_s", rb_digest_instance_to_s, 0); rb_define_method(rb_mDigest_Instance, "length", rb_digest_instance_length, 0); rb_define_method(rb_mDigest_Instance, "size", rb_digest_instance_length, 0); /* * class Digest::Class */ rb_cDigest_Class = rb_define_class_under(rb_mDigest, "Class", rb_cObject); rb_define_method(rb_cDigest_Class, "initialize", rb_digest_class_init, 0); rb_include_module(rb_cDigest_Class, rb_mDigest_Instance); /* class methods */ rb_define_singleton_method(rb_cDigest_Class, "digest", rb_digest_class_s_digest, -1); rb_define_singleton_method(rb_cDigest_Class, "hexdigest", rb_digest_class_s_hexdigest, -1); /* class Digest::Base < Digest::Class */ rb_cDigest_Base = rb_define_class_under(rb_mDigest, "Base", rb_cDigest_Class); rb_define_alloc_func(rb_cDigest_Base, rb_digest_base_alloc); rb_define_method(rb_cDigest_Base, "initialize_copy", rb_digest_base_copy, 1); rb_define_method(rb_cDigest_Base, "reset", rb_digest_base_reset, 0); rb_define_method(rb_cDigest_Base, "update", rb_digest_base_update, 1); rb_define_method(rb_cDigest_Base, "<<", rb_digest_base_update, 1); rb_define_private_method(rb_cDigest_Base, "finish", rb_digest_base_finish, 0); rb_define_method(rb_cDigest_Base, "digest_length", rb_digest_base_digest_length, 0); rb_define_method(rb_cDigest_Base, "block_length", rb_digest_base_block_length, 0); }