ext/utf8/utf8.c in utf8-0.1.2 vs ext/utf8/utf8.c in utf8-0.1.3

- old
+ new

@@ -3,11 +3,11 @@ #define CHECK_LEN if ((size_t)(in-start) >= in_len) return -1; /* * Scans the current position of the buffer - * returning the length of this UTF8 character + * returning the length of this UTF-8 character */ inline int8_t utf8CharLen(unsigned char *in, size_t in_len) { if (in_len > 0) { unsigned char curChar, *start; @@ -58,11 +58,11 @@ return -1; } /* * Scans the current position of the buffer - * returning the total number of UTF8 characters found + * returning the total number of UTF-8 characters found */ int64_t utf8CharCount(unsigned char *in, size_t in_len) { int64_t total = 0, leftOver = in_len; int8_t len = 0; unsigned char *start = in; @@ -78,6 +78,28 @@ total++; } } return total; +} + +/* + * Scans the current position of the buffer + * returning the codepoint for the UTF-8 character at this position + */ +int32_t utf8CharToCodepoint(unsigned char *in, size_t in_len) { + int32_t cp, ncp, len; + + len = utf8CharLen(in, in_len); + cp = *in++; + if (len > 1) { + len--; + ncp = cp & ((1 << (6 - len)) - 1); + while (len--) { + cp = *in++; + ncp = (ncp << 6) | (cp & ((1 << 6) - 1)); + } + return ncp; + } else { + return cp; + } }