/* http://kurtstephens.com/pub/tail_call_interp/tail_call_interp.c The Scheme programming language requires proper implementation of tail calls, because all looping in Scheme is implemented as function calls -- there is a very fundamental relationship between tail-calls and iteration. Many Scheme interpreters implement proper tail-recursion by forcing everything to "happen" within a tightly looping byte-code VM. The VM loops on op-codes and manipulates a stack (or two). Tail-calls typically reuse the current activation record with new arguments. Oaklisp, a pure OO Scheme interpreter, uses two stacks, one for values (call arguments and locals) and one for activation records, and a tightly tuned bytecode VM loop. Use of two stacks, allows more efficent control flow in tail-calls and continuations. To embed a stack-based interpreter in C, calling out to C functions from within interpreted code and calling into interpreter functions from C need to be allowed without too much gnashing of teeth; we cannot force the developer or extender of the interpreter to put all activity in a byte-code loop. Thus extending a properly tail-recursive bytecode VM with C call-ins and call-outs can be cumbersome. However, we want to support a byte-code execution environment in our interpreter, otherwise it's not much of an interpreter. We want C code to execute in its environment and byte-code to execute in its environement while allowing the two execution enviroments to commnunicate efficiently and as seamlessly as possible, because the interpreter will spend most of its time executing interpreted code and we want the core of the interpreter to be written and extendable in C. Since C (almost always) uses the same stack for activation records and values, we will have some problems getting proper tail-recursion out of C code (although there is some support in GCC for tail-recursion as long as there are no pointers to stack locals). However we can do something about tail-calls occuring at the boundaries of C and interpreted code. The example puts the tail-call loop at all call sites in the C code of the interpreter, thus allowing efficient tail-calls to and from C code, while still allowing the interpreter to be developed as a somewhat normal C program. It supports the intention to iterate using tail-calls in the interpreter as an explicit iteration on activation records at the call sites in the interpreter's implementation. It allows different bytecode VMs to be embedded whereever and however the extender of the interpreter wants without subverting proper tail-calling. The example is a simple interpreter that has only integer values and no interpreted function name lookups. The interpreted function names are hard-coded in C to keep this example (somewhat) short. A more complete implementation of this technique is in an embeddable pure OO Scheme implementation, based on Oaklisp, named http://kurtstephens.com/pub/ll. */ #include #include typedef int value; /* Could be a reference with type tags in the lower bits */ /* Definition of primitive extension functions. * Note: they all return "int", not "value", values are * *always* kept on the value stack. The return int value * is used to instruct the caller to loop on tail-calls. */ typedef int (*Primitive)(void); /* Defintion of the interpreters function primitive. */ typedef struct Function { Primitive primitive; unsigned char *bytecode; } Function; typedef struct ActivationRecord { Function *func; int argc; value *argv; } ActivationRecord; /***************************************** ** Explicit interpreter stacks. **/ /* Note: they dont need to be very large here because * we will be doing proper tail-recursion in this example. */ ActivationRecord __ar_stack[16]; ActivationRecord *__ar_sp = __ar_stack + (sizeof(__ar_stack)/sizeof(__ar_stack[0])) - 1; value __val_stack[16]; value *__val_sp = __val_stack + (sizeof(__val_stack)/sizeof(__val_stack[0])) - 1; /***************************************** ** Interpreter stack and call sub-primitives **/ #define VAL_POP() (*(__val_sp ++)) #define VAL_PUSH(X) (*(-- __val_sp) = (X)) #define VAL_PEEK (*__val_sp) #define AR_POP() (__ar_sp ++) #define AR_PUSH() (-- __ar_sp) #define ARGV_END() (__ar_sp->argv + __ar_sp->argc) #define VAL_RESTORE() (__val_sp = ARGV_END()) /***************************************** ** Interpreter call/return/tail-call protocol **/ /* Argument/Value stack protocol */ /* Push arguments in reverse order. */ #define A0() (void) #define A1(_1) ARG_VAL_PUSH(_1) #define A2(_1,_2) (ARG_VAL_PUSH(_2),A1(_1)) #define A3(_1,_2,_3) (ARG_VAL_PUSH(_3),A2(_1,_2)) #define A4(_1,_2,_3,_4) (ARG_VAL_PUSH(_4),A3(_1,_2,_3)) /* Prevent side-effect ambiguity in: *(-- _val_sp) = *(__val_sp ++) which occurs in expansion of: CALL(foo, 1, A1(CALL(bar, 0, A0()))) */ __inline__ void ARG_VAL_PUSH(value x) { VAL_PUSH(x); } /* Standard function call protocol */ /* Call FUNC with ARGS * Note: This expands to a valid C expression, not a C statement. */ #define CALL(FUNC,ARGC,ARGS) \ ( \ /* Push args on value stack in reverse order. */ \ ARGS, \ /* Create new activation record. */ \ AR_PUSH(), \ /* Init the new activation record. */ \ (__ar_sp->func = (FUNC)), \ /* Top of value stack is beginning of the new AR's argument list. */ \ (__ar_sp->argv = __val_sp), \ (__ar_sp->argc = (ARGC)), \ /* Iterate on tail-calls at this call site. */ \ INVOKE(), \ /* Pop the current AR. */ \ AR_POP(), \ /* "return" the top stack value back to C code. */ \ VAL_POP() \ ) /* Return value protocol */ #define RETURN(X) \ do { \ /* Evaluate X in the context of the current value stack "frame" */ \ value __rtn_tmp = (X); \ /* Restore the value stack "frame" */ \ VAL_RESTORE(); \ /* Put the return value on the stack */ \ VAL_PUSH(__rtn_tmp); \ fprintf(stderr, " RETURN(%d) (@%p)\n", VAL_PEEK, (void*) __val_sp); \ /* Instruct caller's CALL() site to stop looping on tail-calls. */ \ return 0; \ } while(0) /* Tail function call protocol */ /* Tail-call FUNC with ARGS */ #define TAIL_CALL(FUNC,ARGC,ARGS) \ do { \ /* Evaluate tail-call arguments in the context of the current value stack "frame" */ \ ARGS; \ /* Replace current value stack "frame" with tail-call arguments */ \ { \ value *__src = __val_sp + (ARGC); \ value *__dst = ARGV_END(); \ int __cnt = (ARGC); \ while ( __cnt -- ) { \ *(-- __dst) = *(-- __src); \ } \ } \ /* Reuse the current AR */ \ __ar_sp->func = (FUNC); \ /* Point value stack frame to the new value stack "frame" */ \ __ar_sp->argv = __val_sp = ARGV_END() - (ARGC); \ __ar_sp->argc = (ARGC); \ __print_val_stack(); \ /* Instruct caller's call() site to invoke our activation record! */ \ INVOKE_TAIL_CALL(); \ return 1; \ } while(0) #define INVOKE_PRIM() (__invoke_print(), __ar_sp->func->primitive()) #define INVOKE_TAIL_CALL() (__invoke_print()) __inline__ void __invoke() { while ( INVOKE_PRIM() ) /* On the pending activation records */ ; } #define INVOKE() __invoke() /* Argument access */ #define ARG (__ar_sp->argv) /***************************************** ** Debugging support. **/ void __print_val_stack() { value *x = __val_sp; fprintf(stderr, " val_sp = %p: ", (void*) __val_sp); while ( x < __ar_sp->argv ) { fprintf(stderr, "%d ", *(x ++)); } fprintf(stderr, "( "); while ( x < ARGV_END() ) { fprintf(stderr, "%d ", *(x ++)); } fprintf(stderr, ")\n"); } void __invoke_print() { fprintf(stderr, " INVOKE(func = %p, argc = %d, argv = %p /* {", (void*) __ar_sp->func, __ar_sp->argc, __ar_sp->argv); { int i; for ( i = 0; i < __ar_sp->argc; ++ i ) { fprintf(stderr, "%s%d", (i ? ", " : ""), __ar_sp->argv[i]); } } fprintf(stderr, "} */)\n"); { void *c_sp = (void*) &c_sp; printf(" C sp = %p, ar_sp = %p\n", (void*) c_sp, (void*) __ar_sp); } } /***************************************** ** Example usage of protocol. **/ /* The obligatory "print object" primitive in C */ int print_prim(); Function print = { &print_prim }; int print_prim() { printf("PRINT: %d\n", ARG[0]); RETURN(ARG[0]); } /* Two mutually tail-call recursive functions as primitives in C. */ int a_prim(); Function a = { &a_prim }; /* Still a Function, but no bytecode */ int b_prim(); Function b = { &b_prim }; extern Function c; /* A bytecode function */ int a_prim() { printf("a(%d)\n", ARG[0]); CALL(&print, 1, A1(ARG[0])); CALL(&c, 2, A2(1, 2)); TAIL_CALL(&b, 2, A2(ARG[0], 2)); } int b_prim() { printf("b(%d, %d))\n", ARG[0], ARG[1]); CALL(&print, 1, A1(ARG[0])); TAIL_CALL(&a, 1, A1( CALL(&c, 2, A2(ARG[0], ARG[1])) ) ); } /* Here's a simple C function that is written in a classical C style. */ value add(value a, value b) { return a + b; } /* Here's a simple embedded byte-code VM sub-primitive */ int bytecode_prim() { const unsigned char *pc = __ar_sp->func->bytecode; fprintf(stderr, " BC: Starting '%s'\n", pc); while ( *pc ) { #if 0 fprintf(stderr, " BC: top of stack: %d (@%p)\n", VAL_PEEK, __val_sp); fprintf(stderr, " BC: opcode %c\n", *pc); #endif switch (*(pc ++)) { /* "print" opcode */ case 'p': printf("BC: %d\n", VAL_PEEK); break; /* Read argument opcodes */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { VAL_PUSH(ARG[pc[-1] - '0']); } break; /* Addition */ case '+': { /* * Call-out to native C code from VM. * Note the naive value stack motion here. */ value a = VAL_POP(); value b = VAL_POP(); VAL_PUSH(add(a, b)); } break; /* Subtraction */ case '-': { /* Locally implemented opcode */ value tmp = VAL_POP(); VAL_PEEK = tmp - VAL_PEEK; } break; /* ADD MORE MICROCODE HERE */ default: { fprintf(stderr, " BC: bad opcode (%d)\n", pc[-1]); abort(); } break; } } /* The return value is the value on the top of the stack */ fprintf(stderr, " BC: RETURN: %d\n", VAL_PEEK); RETURN(VAL_PEEK); } /* Simple bytecode interpreted function: ARG[0] + ARG[1]; ("10+") Print and return the result. ("p") */ Function c = { &bytecode_prim, "10+p" }; int main(int argc, char **argv) { CALL(&a, 1, A1(1)); /* This should "loop" forever */ abort(); /* This should never happen */ return 0; }