Sha256: 39810552c20ae1de1e096a83214053cc34dbe7db2fb385b3a7b22649ad7c92cf

Contents?: true

Size: 1.18 KB

Versions: 10

Compression:

Stored size: 1.18 KB

Contents

#ifndef _WORD_H_
#define _WORD_H_

#include <climits>
#include <cstring>

#include "memory.h"

namespace rmmseg
{
    const int word_embed_len = 4; /* at least 1 char (3 bytes+'\0') */
    struct Word
    {
        unsigned char   nbytes;   /* number of bytes */
        char            length;   /* number of characters */
        unsigned short  freq;
        char            text[word_embed_len];
    };

    /**
     * text: the text of the word.
     * length: number of characters (not bytes).
     * freq: the frequency of the word.
     */
    inline Word *make_word(const char *text, int length=1,
                           int freq=0, int nbytes=-1)
    {
        if (freq > USHRT_MAX)
            freq = USHRT_MAX;   /* avoid overflow */
        if (nbytes == -1)
            nbytes = std::strlen(text);
        Word *w = static_cast<Word *>(pool_alloc(sizeof(Word)
                                                 + nbytes+1
                                                 - word_embed_len));
        w->nbytes = nbytes;
        w->length = length;
        w->freq = freq;
        std::strncpy(w->text, text, nbytes);
        w->text[nbytes] = '\0';
        return w;
    }
}

#endif /* _WORD_H_ */

Version data entries

10 entries across 10 versions & 4 rubygems

Version Path
rmmseg-cpp-new-0.3.1 ext/rmmseg/word.h
rmmseg-cpp-new-0.3.0 ext/rmmseg/word.h
lijia-rmmseg-cpp-10.2.9.2 ext/rmmseg/word.h
rmmseg-cpp-traditional-2.0.4 ext/rmmseg/word.h
rmmseg-cpp-traditional-2.0.2 ext/rmmseg/word.h
rmmseg-cpp-traditional-2.0.0 ext/rmmseg/word.h
rmmseg-cpp-traditional-1.0.1 ext/rmmseg/word.h
rmmseg-cpp-traditional-1.0.0 ext/rmmseg/word.h
rmmseg-cpp-traditional-0.0.2 ext/rmmseg/word.h
rmmseg-cpp-0.2.9 ext/rmmseg/word.h