acl4のページ0006

(1)

#if (a_Version >= 1)
    #define Token0              a_Token0
    #define Token0Table         a_Token0Table
    #define Token0Table_ini     a_Token0Table_ini
    #define Token0_get          a_Token0_get
    #define Token1_get          a_Token1_get
#endif

a_class(a_Token0Table) {
    uint32_t *op[4];
    unsigned char chrTyp[256];
        // 0: 空白類.
        // 1: 1文字記号.
        // 2: 一般記号文字.
        // 3: アルファベット/数字.
};

a_class(a_Token0) {
    a_Token0Table *tbl;
    const char *s, *s1;
    unsigned char cTyp;
    uint32_t c;
    intptr_t len;
};

a_static int a_Token0Table_iniSub(uint32_t *op, const char *s)
{
    int i = 0;
    for (;;) {
        while (*s == ' ') s++;
        if (*s == '\0') break;
        if (s[1] <= ' ') { op[i++] = s[0];                                       s++;    continue; }
        if (s[2] <= ' ') { op[i++] = s[0] | s[1] << 8;                           s += 2; continue; }
        if (s[3] <= ' ') { op[i++] = s[0] | s[1] << 8 | s[2] << 16;              s += 3; continue; }
        if (s[4] <= ' ') { op[i++] = s[0] | s[1] << 8 | s[2] << 16 | s[3] << 24; s += 4; continue; }
        break;
    }
    op[i] = 0;
    return i;
}

a_static void a_Token0Table_ini(a_Token0Table *w)
{
    static uint32_t op4[64], op3[64], op2[64], op1[64];
    a_Token0Table_iniSub(op4, "");
    a_Token0Table_iniSub(op3, ">>= <<= ...");
    a_Token0Table_iniSub(op2, "== != <= >= << >> ++ -- += -= *= /= %= ^= |= && || -> :: // /* ## .. !! %% ~~");
    a_Token0Table_iniSub(op1, "= < > + - * / % ^ & | ! ~ . : #");
    int c;
    for (c = 0; c < 256; c++) {
        unsigned char ct = 0;
        if (c != 0) {
            if (strchr(" \t\r\n", c) != NULL) ct = 0;
            if (strchr("(){},;[]\"\'", c) != NULL) ct = 1;
            if (strchr("!#%&-=^~:+*<>./?", c) != NULL) ct = 2;
            if ('0' <= c && c <= '9') ct = 3;
            if ('A' <= c && c <= 'Z') ct = 3;
            if ('a' <= c && c <= 'z') ct = 3;
            if (strchr("_$@`", c) != NULL) ct = 3;
        }
        w->chrTyp[c] = ct;
    }
    w->op[0] = op4;
    w->op[1] = op3;
    w->op[2] = op2;
    w->op[3] = op1;
}

a_static char *a_Token0_get(a_Token0 *w)
{
    const char *s = w->s, *s1 = w->s1;
    unsigned char ct, *chrTyp = w->tbl->chrTyp;
    intptr_t len = 0, i; w->c = 0;
    for (;;) {
        if (s >= s1) goto fin;
        ct = chrTyp[*(const unsigned char *) s];
        if (ct == 0) { s++; continue; }
        break;
    }
    if (ct == 1) {
        len = 1;
        w->c = *s;
        goto fin;
    }
    if (ct == 2) {
        unsigned char ch0 = *s, ch1 = 0, ch2 = 0, ch3 = 0;
        if (s + 3 < s1) {
            ch1 = s[1]; ch2 = s[2]; ch3 = s[3];
        } else {
            if (s + 1 < s1) ch1 = s[1];
            if (s + 2 < s1) ch2 = s[2];
        }
        uint32_t op = ch0 | ch1 << 8 | ch2 << 16 | ch3 << 24, *opTbl;
        opTbl = w->tbl->op[0]; len = 4; w->c = op;
        for (i = 0; opTbl[i] != 0; i++) {
            if (opTbl[i] == op) goto fin;
        }
        opTbl = w->tbl->op[1]; len = 3; w->c = op &= 0x00ffffff;
        for (i = 0; opTbl[i] != 0; i++) {
            if (opTbl[i] == op) goto fin;
        }
        opTbl = w->tbl->op[2]; len = 2; w->c = op &= 0x0000ffff;
        for (i = 0; opTbl[i] != 0; i++) {
            if (opTbl[i] == op) goto fin;
        }
        opTbl = w->tbl->op[3]; len = 1; w->c = op &= 0x000000ff;
        for (i = 0; opTbl[i] != 0; i++) {
            if (opTbl[i] == op) goto fin;
        }
        ct = 255; len = 0; w->c = 0;
    }
    if (ct == 3) {
        w->c = *s;
        for (len = 1;;) {
            if (s + len >= s1) goto fin;
            if (chrTyp[((unsigned char *) s)[len]] == ct) { len++; continue; }
            break;
        }
    }
fin:
    w->cTyp = ct;
    w->len = len;
    w->s = s + len;
    return (char *) s;
}

a_static char *a_Token1_get(a_Token0 *w)
{
    for (;;) {
        const char *s = a_Token0_get(w), *s1 = w->s1, *t;
        uint32_t c = w->c;
        if (s >= s1) return (char *) s;
        if (c == ('/' | '/' << 8)) {
            t = memchr(s, '\n', s1 - s);
            if (t != NULL) { w->s = t; continue; }
            w->s = s1; w->len = 0; return (char *) s1;
        }
        if (c == 0x22 || c == 0x27) {
            // 改行もしくはエスケープされていないcがでるまで.
            t = s + 1;
            for (;;) {
                if (t >= s1) break;
                if (*t == c) { t++; break; }
                if (*t == '\n') break;
                if (*t != '\\') { t++; continue; }
                t++;
                if (t >= s1) break;
                if (*t == '\n') break;
                t++;
            }
            w->len = t - s;
            w->s = t;
            return (char *) s;
        }
        if ('0' <= c && c <= '9') {
            t = s + w->len;
            for (;;) {
                if (t >= s1) break;
                c = *t;
                if (c == '.') { t++; continue; }
                if ('0' <= c && c <= '9') { t++; continue; }
                if ('A' <= c && c <= 'Z') { t++; continue; }
                if ('a' <= c && c <= 'z') { t++; continue; }
                if (c == '_') { t++; continue; }
                if ((c == '+' || c == '-') && t[-1] == 'e') { t++; continue; }
                break;
            }
            w->len = t - s;
            w->s = t;
            return (char *) s;
        }
        return (char *) s;
    }
}

(2) Token1_get

(3) サンプルプログラム: t0006a.c

#define a_Version 1
#include <acl4.c>

int main(int argc, const char **argv)
{
    VecChr vc[1]; VecChr_ini(vc);
    VecChr_readFileAll_errChk(vc, argv[1]);
    VecChr_eraseCr(vc);

    Token0Table t0t[1]; Token0Table_ini(t0t);
    Token0 t0[1]; t0->tbl = t0t;
    t0->s = vc->p; t0->s1 = vc->p + vc->n;

    int i = 1;
    for (;;) {
        char *t = Token1_get(t0);
        if (t0->len == 0 || i > 20) break;
        printf("%6d: %.*s\n", i++, t0->len, t);
    }

    VecChr_din(vc);
    a_malloc_debugList(_arg);
    return 0;
}
>t0006a t0006a.c
     1: #
     2: define
     3: a_Version
     4: 1
     5: #
     6: include
     7: <
     8: acl4
     9: .
    10: c
    11: >
    12: int
    13: main
    14: (
    15: int
    16: argc
    17: ,
    18: const
    19: char
    20: *
t0006a.c(22): malloc_debugList()

(99) 更新履歴


トップ   新規 一覧 単語検索 最終更新   ヘルプ   最終更新のRSS