Sha256: 9c109d1f6ce868c11e2f884d2dd17a869f0e59f152534eb84bc35841fadb9c80
Contents?: true
Size: 1.65 KB
Versions: 1
Compression:
Stored size: 1.65 KB
Contents
#include <stdio.h> #include <stdint.h> #define CHECK_LEN if ((size_t)(in-start) >= in_len) return 0; /* * Scans the current position of the buffer * returning the length of this UTF8 character */ inline int8_t utf8CharLen(unsigned char *in, size_t in_len) { if (in_len > 0) { unsigned char curChar, *start; start = in; curChar = in[0]; in++; if (curChar <= 0x7f) { /* single byte */ return 1; } else if ((curChar >> 5) == 0x6) { /* two byte */ CHECK_LEN; curChar = in[0]; in++; if ((curChar >> 6) == 0x2) return 2; } else if ((curChar >> 4) == 0x0e) { /* three byte */ CHECK_LEN; curChar = in[0]; in++; if ((curChar >> 6) == 0x2) { CHECK_LEN; curChar = in[0]; in++; if ((curChar >> 6) == 0x2) return 3; } } else if ((curChar >> 3) == 0x1e) { /* four byte */ CHECK_LEN; curChar = in[0]; in++; if ((curChar >> 6) == 0x2) { CHECK_LEN; curChar = in[0]; in++; if ((curChar >> 6) == 0x2) { CHECK_LEN; curChar = in[0]; in++; if ((curChar >> 6) == 0x2) return 4; } } } } // error case return -1; } /* * Scans the current position of the buffer * returning the total number of UTF8 characters found */ size_t utf8CharCount(unsigned char *in, size_t in_len) { size_t total = 0, leftOver = in_len; int8_t len = 0; unsigned char *start = in; if (in_len > 0) { while (leftOver) { len = utf8CharLen(start, leftOver); leftOver -= len; start += len; total++; } } return total; }
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
utf8-0.1.0 | ext/utf8/utf8.c |