a4_0014 の編集 - essen-wiki

* acl4のページ0014
-(by [[K]], 2026.03.30)

** (1)
-私はまあまあの頻度でスクリプト言語（インタプリタ型言語）を作るのですが、そのたびに内部コードを実行するための仮想マシンを設計・実装していて、無駄だなあと感じていました。
-一度、自分が納得のいく仮想マシンをきちんと作って、以後はこれを使いまわすべきなのです。
-ということで、a4vmを作りました。ただし、細部を全部決めきることができなかったので、メモリアクセスに関する仕様は含まれていません。だからこれはバージョン0にして、将来にバージョン1や2を作ることにします。

-''[特徴]''
-命令長は intptr_t の4倍にほぼ固定（一部例外はあります）。
-レジスタ数は無制限（Enter命令で使いたい個数を宣言します）。
-レジスタは関数ローカル（関数呼び出しの際にレジスタを使って引数・返値の受け渡しはできません）。
-フラグレジスタ的なものは持たず、比較命令とコンディション判定が融合（この仕様だとC言語での実装は簡単ですし、JITコンパイラも作りやすいです）。
-浮動小数点数演算もサポート。

-''[現時点での成果のまとめ]''
-a4_0014までとa4_t0001を使えば、77行で独自アセンブラソースコードを直接実行できる仮想マシンが作れました（t0014a.c：下記参照）。自作のプリプロセッサを内蔵しているので、プリプロセッサ機能はフルスペックで使えます。
-簡易なウィンドウ描画関数を持っているので、グラフィック表示もできます。
-これら全部を含んでも、実行ファイルは 26.0KB です。
-実行速度は十分に速く、mandelベンチマークではC言語比で6.83倍程度でした。

-''[今後]''
-ここまでできたので、あとはC言語のソースコードをこのa4vmのアセンブラに変換できればいいということになります。
-さらにa4vmのJITコンパイラや、C言語に再変換するやつも整備したいです。

** (2) a4_0014.c

#if (a_Version >= 2)
     #define BufFree                     a_BufFree
     #define BufFree_ini                 a_BufFree_ini
     #define BufFree_din                 a_BufFree_din
     #define BufFree_add                 a_BufFree_add
     #define BufFree_malloc              a_BufFree_malloc
     #define BufFree_flush               a_BufFree_flush
     #define double2intptr               a_double2intptr
     #define A4vm_exec0                  a_A4vm_exec0
 #endif
 
 a_class(a_BufFree) { a_VecChr stk[1]; };
 a_static void a_BufFree_ini(a_BufFree *w) { a_VecChr_ini(w->stk); }
 a_static void a_BufFree_din(a_BufFree *w) { a_VecChr_din(w->stk); }
 
 a_static void a_BufFree_add(a_BufFree *w, void *p, intptr_t sz)
 {
     void **t = a_VecChr_stkAdd(w->stk, 2 * sizeof (void *));
     t[0] = p; t[1] = (void *) sz;
 }
 
 a_static void *a_BufFree_malloc(a_BufFree *w, intptr_t sz)
 {
     void *p = a_malloc(_arg_  sz); a_BufFree_add(w, p, sz); return p;
 }
 
 a_static void a_BufFree_flush(a_BufFree *w)
 {
     while (w->stk->n > 0) {
         w->stk->n -= 2 * sizeof (void *);
         void **t = (void **) (w->stk->p + w->stk->n);
         a_free(_arg_  t[0], (intptr_t) t[1]);
     }
 }
 
 // 切り捨てではなく四捨五入.
 a_static intptr_t a_double2intptr(double x) { if (x >= 0.0) { x += 0.5; } else { x -= 0.5; } return (intptr_t) x; }
 
 a_class(a_A4vm_exec0_funcInfo) {
     intptr_t prv, reg, frg, subArg, pc, arg;
 };
 
 a_static intptr_t a_A4vm_exec0(intptr_t *bp, intptr_t bn, intptr_t argc, intptr_t *argv)
 {
     a_VecChr vcLbl[1], vcInf[1], vcStk[1]; a_VecChr_ini4(vcLbl, vcInf, vcStk, 0);
     a_VecChr_reserve(vcLbl, 256 * sizeof (intptr_t));
     intptr_t argc1 = (argc + 1) & -2;
     a_VecChr_reserve(vcStk, argc1 * sizeof (intptr_t));
     if (argc > 0) { memcpy(vcStk->p, argv, argc * sizeof (intptr_t)); vcStk->n = argc1 * sizeof (intptr_t); }
     intptr_t *lab, pc, *reg = NULL, *subArg = NULL, *arg = (intptr_t *) vcStk->p, retVal = 0, argIdx = 0;
     double *frg = NULL;
     a_A4vm_exec0_funcInfo *inf;
     for (pc = 0; pc < bn; pc += 4) {
         if (bp[pc] == 0x03) {
             a_VecChr_reserve(vcLbl, (bp[pc + 1] + 1) * sizeof (intptr_t));
             lab = (intptr_t *) vcLbl->p;
             lab[bp[pc + 1]] = pc + 4;  // Lbl_T(t)命令の次の命令を指す.
         }
     }
     for (pc = 0;;) {
         intptr_t op = bp[pc], a = bp[pc + 1], b = bp[pc + 2], c = bp[pc + 3], d; pc += 4;
         switch (op) {
         case 0x00: continue;
         case 0x01: pc = lab[a]; continue;
         case 0x02: retVal = a; goto err;
         case 0x03: continue;
         case 0x04: reg[a] = b; continue;
         case 0x05: reg[a] = reg[b]; continue;
         case 0x06: reg[a] = reg[b] |  c;      continue;
         case 0x07: reg[a] = reg[b] |  reg[c]; continue;
         case 0x08: reg[a] = reg[b] ^  c;      continue;
         case 0x09: reg[a] = reg[b] ^  reg[c]; continue;
         case 0x0a: reg[a] = reg[b] &  c;      continue;
         case 0x0b: reg[a] = reg[b] &  reg[c]; continue;
         case 0x0c: reg[a] = reg[b] << c;      continue;
         case 0x0d: reg[a] = reg[b] << reg[c]; continue;
         case 0x0e: reg[a] = reg[b] >> c;      continue;
         case 0x0f: reg[a] = reg[b] >> reg[c]; continue;
         case 0x10: reg[a] = reg[b] +  c;      continue;
         case 0x11: reg[a] = reg[b] +  reg[c]; continue;
         case 0x12: reg[a] = reg[b] -  c;      continue;
         case 0x13: reg[a] = reg[b] -  reg[c]; continue;
         case 0x14: reg[a] = reg[b] *  c;      continue;
         case 0x15: reg[a] = reg[b] *  reg[c]; continue;
         case 0x16: reg[a] = reg[b] /  c;      continue;
         case 0x17: reg[a] = reg[b] /  reg[c]; continue;
         case 0x18: reg[a] = reg[b] %  c;      continue;
         case 0x19: reg[a] = reg[b] %  reg[c]; continue;
         case 0x1a: reg[a] = c      -  reg[b]; continue;
         case 0x1b: reg[a] = c      /  reg[b]; continue;
         case 0x1c: reg[a] = c      %  reg[b]; continue;
         case 0x1d: d = bp[pc + 1]; pc += 4; reg[a] = (intptr_t)(((int64_t)reg[b]*(int64_t)reg[c])>>d); continue;
         case 0x1e: reg[a] = a_double2intptr(cos(reg[b] * (2 * 3.14159265358979323 / 65536)) * 65536); continue;
         case 0x1f: reg[a] = a_double2intptr(sin(reg[b] * (2 * 3.14159265358979323 / 65536)) * 65536); continue;
         case 0x20: reg[a] = (reg[b] <  c);      continue;
         case 0x21: reg[a] = (reg[b] <  reg[c]); continue;
         case 0x22: reg[a] = (reg[b] >= c);      continue;
         case 0x23: reg[a] = (reg[b] >= reg[c]); continue;
         case 0x24: reg[a] = (reg[b] <= c);      continue;
         case 0x25: reg[a] = (reg[b] <= reg[c]); continue;
         case 0x26: reg[a] = (reg[b] >  c);      continue;
         case 0x27: reg[a] = (reg[b] >  reg[c]); continue;
         case 0x28: reg[a] = (reg[b] == c);      continue;
         case 0x29: reg[a] = (reg[b] == reg[c]); continue;
         case 0x2a: reg[a] = (reg[b] != c);      continue;
         case 0x2b: reg[a] = (reg[b] != reg[c]); continue;
         case 0x2c: if (b >= 256) { reg[a] = arg[b - 256]; } else { reg[a] = subArg[b]; } continue; // Lod_RA
         case 0x2d: if (a >= 256) { arg[a - 256] = reg[b]; } else { subArg[a] = reg[b]; } continue; // Lod_AR
         case 0x2e: if (a >= 256) { arg[a - 256] = b;      } else { subArg[a] = b;      } continue; // Lod_AI
         case 0x2f: inf = (a_A4vm_exec0_funcInfo *) (vcInf->p + vcInf->n - sizeof (a_A4vm_exec0_funcInfo)); // call
             inf->pc = pc; inf->arg = argIdx; arg = subArg; argIdx = inf->subArg; pc = lab[a]; continue;
         case 0x30: if (reg[b] <  c)      { pc = lab[a]; } continue;
         case 0x31: if (reg[b] <  reg[c]) { pc = lab[a]; } continue;
         case 0x32: if (reg[b] >= c)      { pc = lab[a]; } continue;
         case 0x33: if (reg[b] >= reg[c]) { pc = lab[a]; } continue;
         case 0x34: if (reg[b] <= c)      { pc = lab[a]; } continue;
         case 0x35: if (reg[b] <= reg[c]) { pc = lab[a]; } continue;
         case 0x36: if (reg[b] >  c)      { pc = lab[a]; } continue;
         case 0x37: if (reg[b] >  reg[c]) { pc = lab[a]; } continue;
         case 0x38: if (reg[b] == c)      { pc = lab[a]; } continue;
         case 0x39: if (reg[b] == reg[c]) { pc = lab[a]; } continue;
         case 0x3a: if (reg[b] != c)      { pc = lab[a]; } continue;
         case 0x3b: if (reg[b] != reg[c]) { pc = lab[a]; } continue;
         case 0x3c: reg[a] = a_double2intptr(sqrt(reg[b] * (1.0 / 65536)) * 65536); continue;
         case 0x3d:
             if (b == 0) { intptr_t (*fn)() = (void *) a; reg[c] = fn(); continue; }
             if (b <= 3) { intptr_t (*fn)(intptr_t, intptr_t, intptr_t) = (void *) a; reg[c] = fn(reg[bp[pc+1]], reg[bp[pc+2]], reg[bp[pc+3]]); pc += 4; continue; }
             if (b <= 6) { intptr_t (*fn)(intptr_t, intptr_t, intptr_t, intptr_t, intptr_t, intptr_t) = (void *) a;
                reg[c] = fn(reg[bp[pc+1]], reg[bp[pc+2]], reg[bp[pc+3]], reg[bp[pc+5]], reg[bp[pc+6]], reg[bp[pc+7]]); pc += 8; continue; }
             if (b <= 9) { intptr_t (*fn)(intptr_t, intptr_t, intptr_t, intptr_t, intptr_t, intptr_t, intptr_t, intptr_t, intptr_t) = (void *) a;
                reg[c] = fn(reg[bp[pc+1]], reg[bp[pc+2]], reg[bp[pc+3]], reg[bp[pc+5]], reg[bp[pc+6]], reg[bp[pc+7]], reg[bp[pc+9]], reg[bp[pc+10]], reg[bp[pc+11]]); pc += 12; continue; }
         case 0x3e: // enter
             pc += 4; inf = a_VecChr_stkAdd(vcInf, sizeof (a_A4vm_exec0_funcInfo)); a = (a + 1) & -2; c = (c + 1) & -2;
             inf->prv = vcStk->n; a_VecChr_resizeDiff(_arg_ vcStk, (a + b * 2 + c) * sizeof (intptr_t));
             inf->reg = inf->prv; inf->frg = inf->reg + a * sizeof (intptr_t); inf->subArg = inf->frg + b * 2 * sizeof (intptr_t);
             reg = (intptr_t *) (vcStk->p + inf->reg); frg = (double *) (vcStk->p + inf->frg); subArg = (intptr_t *) (vcStk->p + inf->subArg);
             arg = (intptr_t *) (vcStk->p + argIdx); continue;
         case 0x3f: // leave/return
             inf = a_VecChr_stkRmv(vcInf, sizeof (a_A4vm_exec0_funcInfo));
             vcStk->n = inf->prv; if (vcInf->n == 0) goto fin;
             inf--; reg = (intptr_t *) (vcStk->p + inf->reg); frg = (double *) (vcStk->p + inf->frg); subArg = (intptr_t *) (vcStk->p + inf->subArg);
             argIdx = inf->arg; arg = (intptr_t *) (vcStk->p + argIdx); pc = inf->pc; continue;
 
         case 0x40: frg[a] = (double) *(float *) &bp[pc - 2]; continue;
         case 0x41: frg[a] = frg[b]; continue;
         case 0x42: frg[a] = (double) reg[b]; continue;
         case 0x43: reg[a] = (intptr_t) frg[b]; continue;
         case 0x44: frg[a] = *(double *) &bp[pc - 2]; continue;
         case 0x45: if (a >= 256) { *(double *) &arg[a - 256] = *(double *) &bp[pc - 2]; } else { *(double *) &subArg[a] = *(double *) &bp[pc - 2]; } continue;// lod_AJ
         case 0x46: frg[a] = (double) *(float *) &bp[pc - 1] - frg[b]; continue;
         case 0x47: frg[a] = (double) *(float *) &bp[pc - 1] / frg[b]; continue;
         case 0x48: frg[a] = frg[b] + (double) *(float *) &bp[pc - 1]; continue;
         case 0x49: frg[a] = frg[b] + frg[c];                          continue;
         case 0x4a: frg[a] = frg[b] - (double) *(float *) &bp[pc - 1]; continue;
         case 0x4b: frg[a] = frg[b] - frg[c];                          continue;
         case 0x4c: frg[a] = frg[b] * (double) *(float *) &bp[pc - 1]; continue;
         case 0x4d: frg[a] = frg[b] * frg[c];                          continue;
         case 0x4e: frg[a] = frg[b] / (double) *(float *) &bp[pc - 1]; continue;
         case 0x4f: frg[a] = frg[b] / frg[c];                          continue;
         case 0x50: frg[a] = fabs(frg[b]);  continue;
         case 0x51: frg[a] = sqrt(frg[b]);  continue;
         case 0x52: frg[a] = exp(frg[b]);   continue;
         case 0x53: frg[a] = log(frg[b]);   continue;
         case 0x54: frg[a] = cos(frg[b]);   continue;
         case 0x55: frg[a] = sin(frg[b]);   continue;
         case 0x56: frg[a] = tan(frg[b]);   continue;
         case 0x57: frg[a] = pow(frg[b], frg[c]); continue;
         case 0x58: case 0x59: case 0x5a: case 0x5b: case 0x5c: case 0x5d:
         case 0x5e: frg[a] = floor(frg[b]); continue;
         case 0x5f: frg[a] = ceil(frg[b]);  continue;
         case 0x60: reg[a] = (frg[b] <  (double) *(float *) &bp[pc - 1]); continue;
         case 0x61: reg[a] = (frg[b] <  frg[c]);                          continue;
         case 0x62: reg[a] = (frg[b] >= (double) *(float *) &bp[pc - 1]); continue;
         case 0x63: reg[a] = (frg[b] >= frg[c]);                          continue;
         case 0x64: reg[a] = (frg[b] <= (double) *(float *) &bp[pc - 1]); continue;
         case 0x65: reg[a] = (frg[b] <= frg[c]);                          continue;
         case 0x66: reg[a] = (frg[b] >  (double) *(float *) &bp[pc - 1]); continue;
         case 0x67: reg[a] = (frg[b] >  frg[c]);                          continue;
         case 0x68: reg[a] = (frg[b] == (double) *(float *) &bp[pc - 1]); continue;
         case 0x69: reg[a] = (frg[b] == frg[c]);                          continue;
         case 0x6a: reg[a] = (frg[b] != (double) *(float *) &bp[pc - 1]); continue;
         case 0x6b: reg[a] = (frg[b] != frg[c]);                          continue;
         case 0x6c: if (b >= 256) { frg[a] = *(double *) &arg[b - 256]; } else { frg[a] = *(double *) &subArg[b]; } continue; // Lod_FA
         case 0x6d: if (a >= 256) { *(double *) &arg[a - 256] = frg[b]; } else { *(double *) &subArg[a] = frg[b]; } continue; // Lod_AF
         case 0x6e:
         case 0x6f:
         case 0x70: if (frg[b] <  (double) *(float *) &bp[pc - 1]) { pc = lab[a]; } continue;
         case 0x71: if (frg[b] <  frg[c])                          { pc = lab[a]; } continue;
         case 0x72: if (frg[b] >= (double) *(float *) &bp[pc - 1]) { pc = lab[a]; } continue;
         case 0x73: if (frg[b] >= frg[c])                          { pc = lab[a]; } continue;
         case 0x74: if (frg[b] <= (double) *(float *) &bp[pc - 1]) { pc = lab[a]; } continue;
         case 0x75: if (frg[b] <= frg[c])                          { pc = lab[a]; } continue;
         case 0x76: if (frg[b] >  (double) *(float *) &bp[pc - 1]) { pc = lab[a]; } continue;
         case 0x77: if (frg[b] >  frg[c])                          { pc = lab[a]; } continue;
         case 0x78: if (frg[b] == (double) *(float *) &bp[pc - 1]) { pc = lab[a]; } continue;
         case 0x79: if (frg[b] == frg[c])                          { pc = lab[a]; } continue;
         case 0x7a: if (frg[b] != (double) *(float *) &bp[pc - 1]) { pc = lab[a]; } continue;
         case 0x7b: if (frg[b] != frg[c])                          { pc = lab[a]; } continue;
         case 0x7c:
         case 0x7d:
         case 0x7e:
         case 0x7f:
             ;
         }
     }
 fin:
     retVal = arg[0];
     memcpy(argv, vcStk->p, argc * sizeof (intptr_t));
 err:
     a_VecChr_din4(vcLbl, vcInf, vcStk, 0);
     return retVal;
 }

** (3) a4vm-asm-v0.h

#define Nop()               0x0000, 0, 0, 0
 #define Jmp_T(t)            0x0001, t, 0, 0
 #define Abt_I(i)            0x0002, i, 0, 0
 #define Lbl_T(t)            0x0003, t, 0, 0
 #define Lod_RI(r, i)        0x0004, r, i, 0
 #define Lod_RR(r, s)        0x0005, r, s, 0
 #define Or__RRI(r, s, i)    0x0006, r, s, i
 #define Or__RRR(r, s, t)    0x0007, r, s, t
 #define Xor_RRI(r, s, i)    0x0008, r, s, i
 #define Xor_RRR(r, s, t)    0x0009, r, s, t
 #define And_RRI(r, s, i)    0x000a, r, s, i
 #define And_RRR(r, s, t)    0x000b, r, s, t
 #define Shl_RRI(r, s, i)    0x000c, r, s, i
 #define Shl_RRR(r, s, t)    0x000d, r, s, t
 #define Shr_RRI(r, s, i)    0x000e, r, s, i
 #define Shr_RRR(r, s, t)    0x000f, r, s, t
 #define Add_RRI(r, s, i)    0x0010, r, s, i
 #define Add_RRR(r, s, t)    0x0011, r, s, t
 #define Sub_RRI(r, s, i)    0x0012, r, s, i
 #define Sub_RRR(r, s, t)    0x0013, r, s, t
 #define Mul_RRI(r, s, i)    0x0014, r, s, i
 #define Mul_RRR(r, s, t)    0x0015, r, s, t
 #define Div_RRI(r, s, i)    0x0016, r, s, i
 #define Div_RRR(r, s, t)    0x0017, r, s, t
 #define Mod_RRI(r, s, i)    0x0018, r, s, i
 #define Mod_RRR(r, s, t)    0x0019, r, s, t
 #define Sub_RIR(r, i, s)    0x001a, r, s, i
 #define Div_RIR(r, i, s)    0x001b, r, s, i
 #define Mod_RIR(r, i, s)    0x001c, r, s, i
 #define Mul64Shr_RRRI(r, s, t, i)   0x001d, r, s, t, 0x2, i, 0, 0
 #define Cos16_RR(r, s)      0x001e, r, s, 0
 #define Sin16_RR(r, s)      0x001f, r, s, 0
 #define Clt_RRI(r, s, i)    0x0020, r, s, i
 #define Clt_RRR(r, s, t)    0x0021, r, s, t
 #define Cge_RRI(r, s, i)    0x0022, r, s, i
 #define Cge_RRR(r, s, t)    0x0023, r, s, t
 #define Cle_RRI(r, s, i)    0x0024, r, s, i
 #define Cle_RRR(r, s, t)    0x0025, r, s, t
 #define Cgt_RRI(r, s, i)    0x0026, r, s, i
 #define Cgt_RRR(r, s, t)    0x0027, r, s, t
 #define Ceq_RRI(r, s, i)    0x0028, r, s, i
 #define Ceq_RRR(r, s, t)    0x0029, r, s, t
 #define Cne_RRI(r, s, i)    0x002a, r, s, i
 #define Cne_RRR(r, s, t)    0x002b, r, s, t
 #define Lod_RA(r, a)        0x002c, r, a, 0
 #define Lod_AR(a, r)        0x002d, a, r, 0
 #define Lod_AI(a, i)        0x002e, a, i, 0
 #define Cal_T(t)            0x002f, t, 0, 0
 #define Jlt_RIT(r, i, t)    0x0030, t, r, i
 #define Jlt_RRT(r, s, t)    0x0031, t, r, s
 #define Jge_RIT(r, i, t)    0x0032, t, r, i
 #define Jge_RRT(r, s, t)    0x0033, t, r, s
 #define Jle_RIT(r, i, t)    0x0034, t, r, i
 #define Jle_RRT(r, s, t)    0x0035, t, r, s
 #define Jgt_RIT(r, i, t)    0x0036, t, r, i
 #define Jgt_RRT(r, s, t)    0x0037, t, r, s
 #define Jeq_RIT(r, i, t)    0x0038, t, r, i
 #define Jeq_RRT(r, s, t)    0x0039, t, r, s
 #define Jne_RIT(r, i, t)    0x003a, t, r, i
 #define Jne_RRT(r, s, t)    0x003b, t, r, s
 #define Sqr16_RR(r, s)      0x003c, r, s, 0
 #define Cfn_IR(         fn, rv)                             0x003d, fn, 0, rv
 #define Cfn_IRRRR(      fn, rv, a, b, c)                    0x003d, fn, 3, rv, 0x2, a, b, c
 #define Cfn_IRRRRRRR(   fn, rv, a, b, c, d, e, f)           0x003d, fn, 6, rv, 0x2, a, b, c, 0x2, d, e, f
 #define Cfn_IRRRRRRRRRR(fn, rv, a, b, c, d, e, f, g, h, i)  0x003d, fn, 9, rv, 0x2, a, b, c, 0x2, d, e, f, 0x2, g, h, i
 #define Ent_III(i, j, k)    0x003e, i, j, k, 0x2, 0, 0, 0
 #define Lev_III(i, j, k)    0x003f, i, j, k, 0x2, 0, 0, 0
 
 #define Lod_FI(f, i)        0x0040, f, i, 0
 #define Lod_FF(f, g)        0x0041, f, g, 0
 #define Lod_FR(f, r)        0x0042, f, r, 0
 #define Lod_RF(r, f)        0x0043, r, f, 0
 #define Lod_FJ(f, j, k)     0x0044, f, j, k
 #define Lod_AJ(a, j, k)     0x0045, a, j, k
 #define Sub_FIF(f, i, g)    0x0046, f, g, i
 #define Div_FIF(f, i, g)    0x0047, f, g, i
 #define Add_FFI(f, g, i)    0x0048, f, g, i
 #define Add_FFF(f, g, h)    0x0049, f, g, h
 #define Sub_FFI(f, g, i)    0x004a, f, g, i
 #define Sub_FFF(f, g, h)    0x004b, f, g, h
 #define Mul_FFI(f, g, i)    0x004c, f, g, i
 #define Mul_FFF(f, g, h)    0x004d, f, g, h
 #define Div_FFI(f, g, i)    0x004e, f, g, i
 #define Div_FFF(f, g, h)    0x004f, f, g, h
 #define Abs_FF(f, g)        0x0050, f, g, 0
 #define Sqr_FF(f, g)        0x0051, f, g, 0
 #define Exp_FF(f, g)        0x0052, f, g, 0
 #define Log_FF(f, g)        0x0053, f, g, 0
 #define Cos_FF(f, g)        0x0054, f, g, 0
 #define Sin_FF(f, g)        0x0055, f, g, 0
 #define Tan_FF(f, g)        0x0056, f, g, 0
 #define Pow_FF(f, g, h)     0x0057, f, g, h
 // 0058-005d
 #define Flr_FF(f, g)        0x005e, f, g, 0
 #define Cel_FF(f, g)        0x005f, f, g, 0
 #define Clt_RFI(r, f, i)    0x0060, r, f, i
 #define Clt_RFF(r, f, g)    0x0061, r, f, g
 #define Cge_RFI(r, f, i)    0x0062, r, f, i
 #define Cge_RFF(r, f, g)    0x0063, r, f, g
 #define Cle_RFI(r, f, i)    0x0064, r, f, i
 #define Cle_RFF(r, f, g)    0x0065, r, f, g
 #define Cgt_RFI(r, f, i)    0x0066, r, f, i
 #define Cgt_RFF(r, f, g)    0x0067, r, f, g
 #define Ceq_RFI(r, f, i)    0x0068, r, f, i
 #define Ceq_RFF(r, f, g)    0x0069, r, f, g
 #define Cne_RFI(r, f, i)    0x006a, r, f, i
 #define Cne_RFF(r, f, g)    0x006b, r, f, g
 #define Lod_FA(f, a)        0x006c, f, a, 0
 #define Lod_AF(a, f)        0x006d, a, f, 0
 // 006e-006f
 #define Jlt_FIT(f, i, t)    0x0070, t, f, i
 #define Jlt_FFT(f, g, t)    0x0071, t, f, g
 #define Jge_FIT(f, i, t)    0x0072, t, f, i
 #define Jge_FFT(f, g, t)    0x0073, t, f, g
 #define Jle_FIT(f, i, t)    0x0074, t, f, i
 #define Jle_FFT(f, g, t)    0x0075, t, f, g
 #define Jgt_FIT(f, i, t)    0x0076, t, f, i
 #define Jgt_FFT(f, g, t)    0x0077, t, f, g
 #define Jeq_FIT(f, i, t)    0x0078, t, f, i
 #define Jeq_FFT(f, g, t)    0x0079, t, f, g
 #define Jne_FIT(f, i, t)    0x007a, t, f, i
 #define Jne_FFT(f, g, t)    0x007b, t, f, g
 // 007c-007f

** (4) サンプルプログラム: t0014a.c

#define a_Version 2
 #include <acl4.c>
 #include <windows.h>
 #include "a4_t0001.c"
 
 a_Win *win;
 clock_t tm0;
 
 void openWin(intptr_t xsz, intptr_t ysz)
 {
     win = a_malloc(_arg_  sizeof (a_Win));
     a_Win_ini(_arg_  win, xsz, ysz, "graph", 0x000000);
     tm0 = clock();
 }
 
 void setPix(intptr_t x, intptr_t y, intptr_t c) { win->buf[x + y * win->xsz] = c; }
 void flushWin() { a_Win_flushAll0(win); }
 void waitInf() { for (;;) Sleep(1000); }
 void printTime() { printf("time=%d\n", clock() - tm0); }
 
 void myPut(Preprocessor_Put0 *w, VecChr *lin) // t0013b.cで作ったやつと同じ.
 {
     if (Preprocessor_isDirective(lin->p, lin->p + lin->n) != 0)
         fprintf(stderr, "[err] %.*s", (int) lin->n, lin->p);
     else
         VecChr_puts(w->dst, lin->p, lin->p + lin->n);
 }
 
 int main(int argc, const char **argv)
 {
     if (argc < 2) return 1;
     Preprocessor pp[1]; Preprocessor_ini(pp);                            // プリプロセッサの初期化.
     pp->put = (void *) myPut;                                            // 出力関数の変更.
     Preprocessor_SourceFiles_addFile(pp->sfs, argv[1], strlen(argv[1])); // 入力ファイルを指定.
     Preprocessor_SourceFiles_addFile(pp->sfs, "a4vm-asm-v0.h", 13);      // 最初にこれをインクロードさせる.
     VecChr vc[1]; VecChr_ini(vc); pp->put0->dst = vc;                    // 出力先のオブジェクトを指定.
     Preprocessor_main(pp); Preprocessor_din(pp);                         // プリプロセッサ処理. およびメモリ開放.
 
     BufFree bf[1]; a_BufFree_ini(bf);
     Token0 t0[1]; Token0_ini1(t0);
     t0->s = vc->p; t0->s1 = vc->p + vc->n;
     Preprocessor_Eval ev[1]; ev->err = 0;
     VecChr bin[1], vcTmp[1]; VecChr_ini4(bin, vcTmp, 0, 0);
     for (;;) {
         char *s = Token1_get(t0), *t;
         intptr_t i, n = t0->len;
         if (n >= 2 && s[0] == 0x22) {
             vcTmp->n = 0; VecChr_puts(vcTmp, s + 1, s + n - 1);
             VecChr_convEsc(vcTmp);
             i = (intptr_t) (t = a_BufFree_malloc(bf, vcTmp->n + 1));
             memcpy(t, vcTmp->p, vcTmp->n + 1);
         }
         else if (n == 1 && (*s == ';' || *s == ',')) continue;
         else if (n == 6 && memcmp(s, "printf", 6)    == 0) { i = (intptr_t) printf; }
         else if (n == 7 && memcmp(s, "openWin", 7)   == 0) { i = (intptr_t) openWin; }
         else if (n == 6 && memcmp(s, "setPix", 6)    == 0) { i = (intptr_t) setPix; }
         else if (n == 8 && memcmp(s, "flushWin", 8)  == 0) { i = (intptr_t) flushWin; }
         else if (n == 7 && memcmp(s, "waitInf", 7)   == 0) { i = (intptr_t) waitInf; }
         else if (n == 9 && memcmp(s, "printTime", 9) == 0) { i = (intptr_t) printTime; }
         else {
             t0->s = s;
             i = Preprocessor_eval(ev, t0, 0x7fff);
             if (ev->err != 0) break;
         }
         VecChr_resizeDiff(_arg_  bin, sizeof (intptr_t)); ((intptr_t *) (bin->p + bin->n))[-1] = i;
         Token1_get(t0);
         if (t0->c != ',' && t0->c != ';') break;
     }
     intptr_t *bp = (intptr_t *) bin->p, bn = bin->n / sizeof (intptr_t);
     intptr_t a4arg[10], i;
     for (i = 0; i < argc - 2; i++)
         a4arg[i + 1] = strtol(argv[2 + i], 0, 0);
     tm0 = clock(); A4vm_exec0(bp, bn, 10, a4arg); printf("ans=%d\n", a4arg[0]); printTime();
     VecChr_din4(vc, bin, vcTmp, 0); BufFree_flush(bf); BufFree_din(bf);
     a_malloc_debugList(_arg);
     return 0;
 }

** (5) サンプルプログラム: t0014b.txt
 #define R00     0
 #define R01     1
 #define L00     0
 #define L01     1
 
 Lbl_T(L00);
   Ent_III(2,0,2);       // enter(Reg数=2, Freg数=0, 呼び出し引数領域長=2).
   Lod_RA(R00,0x101);    // R00 = arg[1]; // arg[1]: 第1引数.
   Jle_RIT(R00,1,1);     // if (R00 >= 1) goto L01;
   Add_RRI(R00,R00,-2); Lod_AR(1,R00); Cal_T(L00); Lod_RA(R01,0); // R00 -= 2; R01 = L00(R00);
   Add_RRI(R00,R00,+1); Lod_AR(1,R00); Cal_T(L00); Lod_RA(R00,0); // R00 += 1; R00 = L00(R00);
   Add_RRR(R00,R00,R01); // R00 += R01;
 Lbl_T(L01);
   Lod_AR(0x100,R00);    // arg[0] = R00; // arg[0]:関数の返値.
   Lev_III(2,0,2);       // leave/return

// 実行結果.
 >t0014a t0014b.txt 40
 ans=102334155
 time=12210
-再帰でフィボナッチ数を求めるプログラムです。ちゃんと再帰ができているかどうか確認することと、処理速度を確認するために書きました。計算結果はもちろんあっています。
-C言語で同等の処理をやらせたところ23.5倍くらい速かったので、関数呼び出し性能の比較では A4vm_exec0() の性能はその程度です。悪くはないですが良くもないです。
-一般にコンパイラとインタプリタの速度比は10倍くらいなので、23.5倍はそれよりも2倍以上悪いと言えますが、a4vmは関数呼び出しコストを抑えることよりも、その他の部分を速くすることに注力しているので、これはまあこんなものかなーという感じです。
-（ちなみに普通はフィボナッチ数を求めるのならこんなアルゴリズムは使いません。だからこの手の処理が遅くても実際の性能の目安にはなりません。）

** (6) サンプルプログラム: t0014c.txt
 #define R00     0
 #define R01     1
 #define L00     0
 #define L01     1
 
 Lbl_T(L00);
   Ent_III(2,0,2);
   Lod_RA(R00,0x101);
   Jeq_RIT(R00,1,L01);
   Add_RRI(R01,R00,-1); Lod_AR(1,R01); Cal_T(L00); Lod_RA(R01,0);
   Mul_RRR(R00,R00,R01);
 Lbl_T(L01);
   Lod_AR(0x100,R00);
   Lev_III(2,0,2);

// 実行結果.
 >t0014a t0014c.txt 10
 ans=3628800
 time=0
-再帰で階乗を計算するプログラムです。これも再帰処理がバグってないか調べるために書きました。階乗も本来ならループで計算するところですが、デバッグのために再帰で書いてみました。ちゃんと動いています。

** (7) サンプルプログラム: t0014d.txt
 #define R00     0
 
   Ent_III(1,0,0);
   Lod_RI(R00,"hello, world\n");
   Cfn_IRRRR(printf,R00, R00,0,0);
   Lev_III(1,0,2);

// 実行結果.
 >t0014a t0014d.txt
 hello, world
 ans=0
 time=0
-a4vmでは、C言語の関数を呼び出すことができます。この例ではprintfを呼び出して、「hello, world」を表示しています。

** (8) サンプルプログラム: t0014e.txt
 #define R00     0
 #define R01     1
 #define R02     2
 #define R03     3
 #define R04     4
 #define L00     0
 
   Ent_III(5,0,0);
   Lod_RI(R02,"%d ");
   Lod_RI(R03,"\n");
   Lod_RA(R01,0x101); // R01=arg[1]
   Lod_RI(R00,0);     // for (R00=0; R00<=R01; R00++) {
 Lbl_T(L00);
   Cfn_IRRRR(printf,R04, R02,R00,0); // R04=printf("%d ",R00);
   Add_RRI(R00,R00,1);
   Jle_RRT(R00,R01,L00); // }
   Cfn_IRRRR(printf,R04, R03,0,0); // R04=printf("\n");
   Lev_III(5,0,0);

// 実行結果.
 >t0014a t0014e.txt 20
 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
 ans=0
 time=0
-printfができるので、"%d"とかももちろんできます。それを確認するためのプログラムです。

** (9) サンプルプログラム: t0014f.txt
 #define R00     0
 #define R01     1
 #define R02     2
 #define R03     3
 #define L00     0
 #define L01     1
 
   Ent_III(4,0,0);
   Lod_RI(R01,256);
   Cfn_IRRRR(openWin,R00, R01,R01,0);
   Lod_RI(R01,0x000000);
   Lod_RI(R03,0); // for (R03=0; R03<256; R03++) {
 Lbl_T(L00);
   Lod_RI(R02,0); // for (R02=0; R02<256; R02++) {
 Lbl_T(L01);
   Cfn_IRRRR(setPix,R00, R02,R03,R01); // setPix(R02,R03,R01);
   Add_RRI(R01,R01,0x100);
   Add_RRI(R02,R02,1);
   Jlt_RIT(R02,256,L01); // }
   Add_RRI(R03,R03,1);
   Jlt_RIT(R03,256,L00); // }
   Cfn_IR(flushWin,R00);
   Cfn_IR(waitInf,R00);
   Lev_III(4,0,0);
-実行結果
--https://essen.osask.jp/files/pic20260331a.png
-t0014a.c には a4_t0001.c が入っているのでウィンドウ描画ができます。

** (10) サンプルプログラム: t0014g.txt
 #define R00     0
 #define R01     1
 #define R02     2
 #define R03     3
 #define L00     0
 #define L01     1
 
   Ent_III(4,0,0);
   Lod_RI(R01,256);
   Cfn_IRRRR(openWin,R00, R01,R01,0);
   Lod_RI(R03,0);
 Lbl_T(L00);
   Lod_RI(R02,0);
 Lbl_T(L01);
   Xor_RRR(R01,R02,R03);
   Mul_RRI(R01,R01,0x010101);
   Cfn_IRRRR(setPix,R00, R02,R03,R01);
   Add_RRI(R02,R02,1);
   Jlt_RIT(R02,256,L01);
   Add_RRI(R03,R03,1);
   Jlt_RIT(R03,256,L00);
   Cfn_IR(flushWin, R00);
   Cfn_IR(waitInf, R00);
   Lev_III(4,0,0);
-実行結果
--https://essen.osask.jp/files/pic20260331b.png
-これも問題なくうまくいきました。

** (11) サンプルプログラム: t0014h.txt
 #define x   0 // R00
 #define y   1 // R01
 #define c   2 // R02
 #define sx  3 // R03
 #define sy  4 // R04
 #define cx  5 // R05
 #define cy  6 // R06
 #define zx  7 // R07
 #define zy  8 // R08
 #define xx  9 // R09
 #define yy  10 // R0a
 #define n   11 // R0b
 #define sn  12 // R0c
 #define t0  c
 #define lp0 0 // L0000
 #define lp1 1 // L0001
 #define lp2 2 // L0002
 #define lp3 3 // L0003
 #define lp4 4 // L0004
 #define sk0 5 // L0005
 #define sk1 6 // L0006
 
   Ent_III(13, 0, 0);
   Lod_RI(x, 512); Lod_RI(y, 384); Cfn_IRRRR(openWin,c, x,y,0);
   Lod_RI(y, 0); Lbl_T(lp0); // for (y = 0; y < 384; y++) {
     Lod_RI(x, 0); Lbl_T(lp1); // for (x = 0; x < 512; x++) {
       Lod_RI(sn, 0);
       Lod_RI(sx, 0); Lbl_T(lp2); // for (sx = 0; sx < 4; sx++) {
         Mul_RRI(cx, x, 4); Add_RRR(cx, cx, sx); Mul_RRI(cx, cx, 56); Add_RRI(cx, cx, 4673536);
         Lod_RI(sy, 0); Lbl_T(lp3); // for (sy = 0; sy < 4; sy++) {
           Mul_RRI(cy, y, 4); Add_RRR(cy, cy, sy); Mul_RRI(cy, cy, -56); Add_RRI(cy, cy, -124928);
           Lod_RR(zx, cx); Lod_RR(zy, cy);
           Lod_RI(n, 1); Lbl_T(lp4); // for (n = 1; n < 447; n++) {
             Mul64Shr_RRRI(xx, zx, zx, 24);
             Mul64Shr_RRRI(yy, zy, zy, 24);
             Add_RRR(t0, xx, yy); Jgt_RIT(t0, 0x4000000, sk0);
             Mul64Shr_RRRI(zy, zy, zx, 23);
             Add_RRR(zx, xx, cx); Sub_RRR(zx, zx, yy);
             Add_RRR(zy, zy, cy);
             Add_RRI(n, n, 1); Jlt_RIT(n, 447, lp4);
 Lbl_T(sk0);
           Add_RRR(sn, sn, n);
           Add_RRI(sy, sy, 1); Jlt_RIT(sy, 4, lp3);
         Add_RRI(sx, sx, 1); Jlt_RIT(sx, 4, lp2);
       Shr_RRI(sn, sn, 4);
       Mul_RRI(c, sn, 256);
       Jlt_RIT(sn, 256, sk1); // if (sn >= 256) {
         Lod_RI(c, 0);
         Jge_RIT(sn, 447, sk1); // if (sn < 447) {
           Add_RRI(c, sn, 0xfe01);
 Lbl_T(sk1);
       Mul_RRI(c, c, 256);
       Cfn_IRRRR(setPix,c, x,y,c);
       Add_RRI(x, x, 1); Jlt_RIT(x, 512, lp1);
     Add_RRI(y, y, 1); Jlt_RIT(y, 384, lp0);
   Cfn_IR(printTime,c);
   Cfn_IR(flushWin,c);
   Cfn_IR(waitInf,c);
   Lev_III(13, 0, 0);
-実行結果
--https://essen.osask.jp/files/pic20260331c.png
-私にとっては定番のmandelプログラムです。C言語で書いたバージョンと比較して、6.83倍の処理時間でした。これはインタプリタとしては高性能だと思います。やったね！

** (99) 更新履歴
-2026.03.30(月) 初版