/* * console.c -- ruby console library * Author: William Morgan (mailto: wmorgan-ruby-console@masanjin.net) * Copyright: Copyright 2010 William Morgan * License: same terms as Ruby itself */ #include #include #include #include #ifdef HAVE_RUBY_ENCODING_H #include #endif static inline int calc_width(char* string, long strlen, long byte_offset, size_t* num_bytes, size_t* num_cols) { wchar_t wc; size_t width = -1; mbstate_t state; memset(&state, 0, sizeof(state)); *num_bytes = mbrtowc(&wc, string + byte_offset, strlen - byte_offset, &state); if(*num_bytes == (size_t)-2) { rb_raise(rb_eArgError, "malformed string: incomplete multibyte character at position %ld", byte_offset); return -1; } else if(*num_bytes == (size_t)-1) { rb_raise(rb_eArgError, "malformed string: invalid multibyte character at position %ld", byte_offset); return -1; } else if(*num_bytes == 0) { rb_raise(rb_eArgError, "malformed string: NULL byte at position %ld", byte_offset); return -1; } *num_cols = wcwidth(wc); return 0; } /* * call-seq: init_locale! * * Sets the program's current locale from the appropriate environment variables. * (see `man 3 setlocale` for details). * * Equivalent to: * char* old_locale = setlocale(LC_ALL, NULL); * return old_locale; * * in C. * * If you are using Ruby 1.8, you *must* call this at least once before calling * the other methods in this package. Otherwise, using non-ASCII strings will * be considered invalid, and #display_width and #display_slice will raise * ArgumentErrors. * * Ruby 1.9 users do not need to call this, since Ruby 1.9 appears to set the * locale in this manner already. Calling it won't matter, however. * * Returns a string representing the old locale. If you wish to change locales * several times, you can use this value to return to the previous locale. * Otherwise, just ignore it. */ static VALUE init_locale(VALUE v_self) { char* old_locale = setlocale(LC_ALL, NULL); setlocale(LC_ALL, ""); // set ctype locale according to appropriate env vars return rb_str_new2(old_locale); } /* * call-seq: display_width(string) * * Returns the display width of string, that is, the number of * columns that the string will take up when printed to screen. This is * different from both the number of characters and the number of bytes in a * string. * * In Ruby 1.8, the input string is assumed to be in the current locale's * encoding. If it isn't, an ArgumentError will be raised. Be sure to call * init_locale! before calling this method! Otherwise every non-ASCII string * will trigger an ArgumentError. * * In Ruby 1.9, the string will be automatically converted from its encoding * into the current locale's encoding for processing. * * Throws an ArgumentError when it encounters an invalid character. On Ruby * 1.8, this includes any string not in the current locale's encoding. On Ruby * 1.9, this should only occur if Ruby is unable to convert the string from its * encoding into the current locale's encoding. */ static VALUE display_width(VALUE v_self, VALUE v_string) { Check_Type(v_string, T_STRING); #ifdef HAVE_RUBY_ENCODING_H // convert from whatever encoding it's in. // TODO: do i have to use rb_protect to relay any exceptions? v_string = rb_str_encode(v_string, rb_enc_from_encoding(rb_locale_encoding()), 0, Qnil); #endif char* string = RSTRING_PTR(v_string); long display_width = 0; long strlen = RSTRING_LEN(v_string); long offset = 0; while(offset < strlen) { size_t num_bytes, num_cols; int err = calc_width(string, strlen, offset, &num_bytes, &num_cols); if(err) break; display_width += num_cols; offset += num_bytes; } return LONG2NUM(display_width); } static const char* default_pad_string = " "; /* * call-seq: * display_slice(string, start_offset, display_width=1, pad_string=" ") * * Returns a slice of a string, based on display width, rather than character * or bytes. I.e, the start_offset and display_width * offsets index the columns required to display the string, not individual * characters or bytes. * * This is useful if you want to display a part of a string on screen, as you * can pull out a specific portion based on display size. * * Padding: slicing can truncate multi-column characters. If the slice * truncates a character, the string will be padded with * pad_string, on the left side, right side, or both, as * necessary. If pad_string is nil then no padding * will be done. pad_string should be a single-column string for * this to make sense. * * In Ruby 1.8, the input string is assumed to be in the current locale's * encoding. If it isn't, an ArgumentError will be raised. Be sure to call * init_locale! before calling this method! Otherwise every non-ASCII string * will trigger an ArgumentError. * * In Ruby 1.9, the string will be automatically converted from its encoding * into the current locale's encoding. Regardless of the original encoding, the * returned string will be in the current locale's encoding. * * Throws an ArgumentError when it encounters an invalid character. On Ruby * 1.8, this includes any string not in the current locale's encoding. On Ruby * 1.9, this should only occur if Ruby is unable to convert the string from its * encoding into the current locale's encoding. */ static VALUE display_slice(int argc, VALUE *argv, VALUE v_self) { VALUE v_string, v_display_start, v_display_width, v_pad_string; rb_scan_args(argc, argv, "22", &v_string, &v_display_start, &v_display_width, &v_pad_string); Check_Type(v_string, T_STRING); /* try and mimic String#slice's argument handling as much as possible */ int display_start = NUM2INT(v_display_start); if(display_start < 0) display_start = NUM2LONG(display_width(v_self, v_string)) + display_start; // negative start means from the end of the string if(display_start < 0) return Qnil; // but if you go too far, you fail int display_width; if(argc < 3) display_width = 1; // default value just like String#slice (although it makes slightly less sense) else display_width = NUM2INT(v_display_width); if(display_width < 0) return Qnil; // you fail const char* pad_string; if(argc < 4) pad_string = default_pad_string; // only fill in default if unspecified; nil is a valid value else if(v_pad_string == Qnil) pad_string = ""; else pad_string = RSTRING_PTR(v_pad_string); #ifdef HAVE_RUBY_ENCODING_H // TODO: do i have to use rb_protect to relay any exceptions? v_string = rb_str_encode(v_string, rb_enc_from_encoding(rb_locale_encoding()), 0, Qnil); #endif char* string = RSTRING_PTR(v_string); long slen = RSTRING_LEN(v_string); // first, advance the string pointer so that we've seen display_start width characters long current_width = 0; long offset = 0; while((offset < slen) && (current_width < display_start)) { size_t num_bytes, num_cols; int err = calc_width(string, slen, offset, &num_bytes, &num_cols); current_width += num_cols; offset += num_bytes; } /* here's a weird behavior (to me!) of String#slice that we emulate: * if the start point is the string length itself, you get an empty * string back; if the start point is greater than that, you get nil. */ if((current_width < display_start)) return Qnil; /* determine left padding */ const char* pad_left = ""; if((current_width > display_start) && (display_width > 0)) pad_left = pad_string; // now, advance the string_end pointer so that we've seen an additional display_width width characters long end_offset = offset; current_width -= display_start; while((end_offset < slen) && (current_width < display_width)) { size_t num_bytes, num_cols; int err = calc_width(string, slen, end_offset, &num_bytes, &num_cols); if((current_width + num_cols) > (size_t)display_width) break; // have to stop here current_width += num_cols; end_offset += num_bytes; } /* determine right padding */ const char* pad_right = ""; if((current_width < display_width) && (end_offset < slen)) pad_right = pad_string; // finally, construct a new string int bytesize = end_offset - offset; int leftsize = strlen(pad_left); int rightsize = strlen(pad_right); char* new_string = calloc(bytesize + leftsize + rightsize + 1, sizeof(char)); if(leftsize > 0) strcpy(new_string, pad_left); if(bytesize > 0) memcpy(new_string + leftsize, string + offset, bytesize * sizeof(char)); if(rightsize > 0) strcpy(new_string + leftsize + bytesize, pad_right); (new_string + bytesize + leftsize + rightsize)[0] = 0; #ifdef HAVE_RUBY_ENCODING_H return rb_enc_str_new(new_string, bytesize + leftsize + rightsize, rb_enc_get(v_string)); #else return rb_str_new(new_string, bytesize + leftsize + rightsize); #endif } /* * A helper class for console-based programs that need to deal with non-ASCII * code. If you are writing a curses/ncurses program, or otherwise care about * the display width of characters on the screen, this is crucial stuff. * * Provides: * * Console.init_locale!: set the program's locale from the appropriate * environment variables. (Ruby 1.8 programs must call this before calling any * of the other methods. Ruby 1.9 programs can call it or skip it without * effect.) * * Console.display_width: calculates the display width of a string * * Console.display_slice: returns a substring according to display offset * and display width parameters. * */ void Init_console() { VALUE cConsole; cConsole = rb_define_class("Console", rb_cObject); rb_define_module_function(cConsole, "display_width", display_width, 1); rb_define_module_function(cConsole, "display_slice", display_slice, -1); rb_define_module_function(cConsole, "init_locale!", init_locale, 0); }