Sha256: 9c109d1f6ce868c11e2f884d2dd17a869f0e59f152534eb84bc35841fadb9c80

Contents?: true

Size: 1.65 KB

Versions: 1

Compression:

Stored size: 1.65 KB

Contents

#include <stdio.h>
#include <stdint.h>

#define CHECK_LEN if ((size_t)(in-start) >= in_len) return 0;

/*
 * Scans the current position of the buffer
 * returning the length of this UTF8 character
 */
inline int8_t utf8CharLen(unsigned char *in, size_t in_len) {
  if (in_len > 0) {
    unsigned char curChar, *start;

    start = in;
    curChar = in[0];
    in++;

    if (curChar <= 0x7f) {
      /* single byte */
      return 1;
    } else if ((curChar >> 5) == 0x6) {
      /* two byte */
      CHECK_LEN;
      curChar = in[0];
      in++;
      if ((curChar >> 6) == 0x2) return 2;
    } else if ((curChar >> 4) == 0x0e) {
      /* three byte */
      CHECK_LEN;
      curChar = in[0];
      in++;
      if ((curChar >> 6) == 0x2) {
        CHECK_LEN;
        curChar = in[0];
        in++;
        if ((curChar >> 6) == 0x2) return 3;
      }
    } else if ((curChar >> 3) == 0x1e) {
      /* four byte */
      CHECK_LEN;
      curChar = in[0];
      in++;
      if ((curChar >> 6) == 0x2) {
        CHECK_LEN;
        curChar = in[0];
        in++;
        if ((curChar >> 6) == 0x2) {
          CHECK_LEN;
          curChar = in[0];
          in++;
          if ((curChar >> 6) == 0x2) return 4;
        }
      }
    }
  }

  // error case
  return -1;
}

/*
 * Scans the current position of the buffer
 * returning the total number of UTF8 characters found
 */
size_t utf8CharCount(unsigned char *in, size_t in_len) {
  size_t total = 0, leftOver = in_len;
  int8_t len = 0;
  unsigned char *start = in;

  if (in_len > 0) {
    while (leftOver) {
      len = utf8CharLen(start, leftOver);
      leftOver -= len;
      start += len;
      total++;
    }
  }

  return total;
}

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
utf8-0.1.0 ext/utf8/utf8.c