/* ^Forth: Traditional Benchmarks File: gfbench.hf Deriver: david.n.williams@umich.edu License: Public Domain (We're guessing that includes the original benchmark codes.) Starting date: March 16, 2001 Version 0.7.0: March 24, 2001 Version 0.7.1: August 29, 2001 Version 0.7.2: October 26, 2002 Version 0.7.3: November 6, 2002 Version 0.7.4: November 17, 2002 Version 0.7.5: November 22, 2002 This is a ^Forth version of the traditional benchmark suite used in Gforth. ^Forth is a dialect of Forth rearranged for automatic translation to C source. The current translator produces code for a pfe loadable module. The main optimizations are: * Word call overhead in the pfe virtual machine, and function call overhead as well, are eliminated for words defined as ^Forth primitives, which emit in-line C source. * Nonprimitive ^Forth words used in ^Forth words can be treated by gcc as leaf functions. * ^Forth code that combines ^Forth primitives gets some extra benefit from gcc optimization. * When available, the loop parameters for the currently innermost loop are kept in registers. * When available, the top entries in the data and floating point stacks are kept in registers (TOS and FTOS). The word CMATRIX-MAIN2 below perhaps best illustrates how ^Forth allows Forth and C to be mixed in the same definition. For examples of ^Forth primitives, see the file kernel.hf. The point here is to compare run times for Forth code compiled with normal colon definitions (benchmarked in the usual way), ^Forth code automatically translated to C, and Forth code translated to C by a human. */ BEGIN-EXTERNAL {" /* No extra include's. */ "} REQUIRES hfkernel.hf \ debug ext-def: .s "dot_s" END-EXTERNAL MAKE-INDEX hfbench "^Forth Benchmarks Word Set" /* *** siev.fs *** */ \ : SECS TIME&DATE SWAP 60 * + SWAP 3600 * + NIP NIP NIP ; \ CREATE FLAGS 8190 ALLOT 8190 ccreate-allot: flags "flags" 8190 constant: #flags "sharp_flags" \ for primes up to 16,384 \ FLAGS 8190 + CONSTANT EFLAG variable: eflag "eflag" def: primes "primes" ( -- #primes ) flags #flags one fill zero three ( #p odd) eflag @ flags DO ( #p odd) i c@ ( flag) IF ( - prime) dup i + ( - i+prime) dup eflag @ < IF ( - prime i+prime) eflag @ swap DO ( - prime) zero i c! ( - prime) dup +LOOP ELSE ( - prime i+prime) drop THEN ( #p prime) swap 1+ swap THEN ( #p odd) two + LOOP ( #p odd) drop ;def /* Printing version of the above, for debugging. */ def: .primes "dot_primes" ( -- #primes ) flags #flags + eflag ! flags #flags one fill zero three ( #p odd) eflag @ flags DO ( #p odd) i c@ ( flag) IF ( - prime) dup . dup i + ( - i+prime) dup eflag @ < IF ( - prime i+prime) eflag @ swap DO ( - prime) zero i c! ( - prime) dup +LOOP ELSE ( - prime i+prime) drop THEN ( #p prime) swap 1+ swap THEN ( #p odd) two + LOOP ( #p odd) drop ;def /* This is intended to be a fairly direct translation of PRIMES above. */ def: cprimes "c_primes" ( -- #primes ) c{ int primes = 0, odd = 3; char *p = (char *) &flags, *q = (char *) eflag, *r; memset (p, 1, sharp_flags); while (p < q){ if (*p){ /* odd is prime */ /* HF_PUSH (odd); }c . c{ */ /* print primes for debug */ r = p + odd; if (r < q){ while (r < q){ *r = 0; r += odd;}} primes += 1;} odd += 2; p++;} HF_PUSH (primes); }c ;def /* Printing version of the above, for debugging. */ def: .cprimes "dot_c_primes" ( -- #primes ) flags #flags + eflag ! c{ { int primes = 0, odd = 3; char *p = (char *) &flags, *q = (char *) eflag, *r; memset (p, 1, sharp_flags); while (p < q){ if (*p){ /* odd is prime */ HF_PUSH (odd); }c . c{ /* print primes for debug */ r = p + odd; if (r < q){ while (r < q){ *r = 0; r += odd;}} primes += 1;} odd += 2; p++;} HF_PUSH (primes); } }c ;def 1000 constant: #iters "sharp_iters" \ 1 constant: #iters "sharp_iters" \ debug def: benchmark "benchmark" zero #iters zero DO primes nip LOOP ;def def: cbenchmark "c_benchmark" zero #iters zero DO cprimes nip LOOP ;def \ SECS BENCHMARK . SECS SWAP - CR . .( secs) def: sieve-main "sieve_main" flags #flags + eflag ! benchmark ( . ) drop ;def def: csieve-main "c_sieve_main" flags #flags + eflag ! cbenchmark ( . ) drop ;def /* *** bubble.fs *** */ \ .( Loading Bubble Sort benchmark...) cr \ A classical benchmark of an O(n**2) algorithm; Bubble sort \ \ Part of the programs gathered by John Hennessy for the MIPS \ RISC project at Stanford. Translated to forth by Marty Fraeman \ Johns Hopkins University/Applied Physics Laboratory. \ MM forth2c doesn't have it ! \ : mybounds over + swap ; \ We use BOUNDS, ^Forth has it. No measureable effect with pfe. \ 1 cells Constant cell \ present in ^Forth variable: seed "seed" ( -- addr) \ : initiate-seed ( -- ) 74755 seed ! ; \ : random ( -- n ) seed @ 1309 * 13849 + 65535 and dup seed ! ; 74755 constant: seed0 "seed_zero" 1309 constant: 1309 "thirteen_zero_nine" 13849 constant: 13849 "thirteen_eight_forty_nine" 65535 constant: 65535 "sixty_four_k_minus_one" def: initiate-seed "initiate_seed" ( -- ) seed0 seed ! ;def def: random "random" ( -- n ) seed @ 1309 * 13849 + 65535 and dup seed ! ;def 6000 constant: elements "elements" \ 10 constant: elements "elements" \ debug \ align create list elements cells allot host[ 6000 cells ]host create-allot: list "list" def: initiate-list "initiate_list" ( -- ) list elements cells + list DO random i ! cell +LOOP ;def def: dump-list "dump_list" ( -- ) list elements cells + list DO i @ . cell +LOOP cr ;def def: verify-list "verify_list" ( -- ) list elements 1- cells bounds DO i 2@ > abort" bubble-sort: not sorted " cell +LOOP ;def def: bubble "bubble" ( -- ) \ ." bubbling..." cr \ one \ (dnw) This was a bug which had no time effect. elements one DO list elements i - cells bounds DO i 2@ > if i 2@ swap i 2! then cell +LOOP LOOP ;def def: cbubble "c_bubble" ( -- ) c{ int i = 1; p4cell *p, *q, s0, s1; while (i < elements){ p = (p4cell *) &list; q = p + elements - i; while (p < q){ s0 = p[0]; s1 = p[1]; if (s0 < s1){ p[0] = s1; p[1] = s0;} p++;} i++;} }c ;def def: bubble-sort "bubble_sort" ( -- ) initiate-seed initiate-list bubble verify-list ;def def: cbubble-sort "c_bubble_sort" ( -- ) initiate-seed initiate-list cbubble verify-list ;def def: bubble-with-flag "bubble_with_flag" ( -- ) one elements one DO -one list elements i - cells bounds DO i 2@ > if i 2@ swap i 2! drop zero then cell +LOOP if leave then LOOP ;def def: bubble-sort-with-flag "bubble_sort_with_flag" ( -- ) initiate-seed initiate-list bubble-with-flag verify-list ;def def: bubble-main "bubble_main" ( -- ) bubble-sort \ bubble-sort-with-flag ;def def: cbubble-main "c_bubble_main" ( -- ) cbubble-sort \ cbubble-sort-with-flag \ not implemented ;def /* *** matrix.fs *** */ \ .( Loading Matrix Multiplication benchmark...) cr \ NOTE: This version needs 0.5MB data space \ A classical benchmark of an O(n**3) algorithm; Matrix Multiplication \ \ Part of the programs gathered by John Hennessy for the MIPS \ RISC project at Stanford. Translated to forth by Marty Fraeman, \ Johns Hopkins University/Applied Physics Laboratory. \ MM forth2c doesn't have it ! \ : mybounds over + swap ; \ We use BOUNDS, ^Forth has it. No measureable effect with pfe. /* Already defined above. */ \ variable seed \ : initiate-seed ( -- ) 74755 seed ! ; \ : random ( -- n ) seed @ 1309 * 13849 + 65535 and dup seed ! ; host[ \ calculated constants in EXTENSIONS vocabulary 200 constant row-size \ 10 constant row-size \ debug row-size cells constant row-byte-size row-size row-size * constant mat-size mat-size cells constant mat-byte-size ]host \ definitions in TRANSLATE vocabulary host[ row-size ]host constant: row-size "row_size" host[ row-byte-size ]host constant: row-byte-size "row_byte_size" host[ mat-size ]host constant: mat-size "mat_size" host[ mat-byte-size ]host constant: mat-byte-size "mat_byte_size" \ align create ima mat-byte-size allot \ align create imb mat-byte-size allot \ align create imr mat-byte-size allot host[ mat-byte-size ]host create-allot: ima "ima" host[ mat-byte-size ]host create-allot: imb "imb" host[ mat-byte-size ]host create-allot: imr "imr" 120 constant: 120 "one_hundred_twenty" 60 constant: 60 "sixty" def: initiate-matrix "initiate_matrix" ( m[row-size][row-size] -- ) mat-byte-size bounds DO random ( r) dup 120 / 120 * - 60 - ( r-[r/120]*120-60) i ! cell +LOOP ;def def: cinitiate-matrix "c_initiate_matrix" ( m[row-size][row-size] -- ) c{ p4cell *q, *p = (p4cell *)TOS; p4ucell r = seed; HF_DROP1; q = p + mat_size; while (p < q){ r = (r * 1309 + 13849) & 65535; *p++ = r - (r / 120) * 120 - 60;} seed = r; }c ;def def: innerproduct "innerproduct" ( a[row][*] b[*][column] -- int) zero ( col row sum) row-size zero DO ( sum) >r over @ over @ ( col row colval rowval) * r> + ( sum) >r ( col row) swap cell+ swap row-byte-size + r> ( col row sum) LOOP >r 2drop r> ( sum) ;def def: cinnerproduct "c_innerproduct" ( a[row][*] b[*][column] -- int) c{ p4cell *row = (p4cell *) TOS; p4cell *col = (p4cell *) SP[1]; p4cell i = 0, sum = 0; SP++; while (i++ < row_size){ sum += *row * *col++; row += row_size;} TOS = sum; }c ;def def: matrix-main "matrix_main" ( -- ) initiate-seed ima initiate-matrix imb initiate-matrix imr ima mat-byte-size bounds DO imb row-byte-size bounds DO j ( left.row) i ( right.col) innerproduct over ! cell+ cell +LOOP row-size cells +LOOP drop ;def def: cmatrix-main1 "c_matrix_main_one" ( -- ) initiate-seed ima cinitiate-matrix imb cinitiate-matrix imr ima mat-byte-size bounds DO imb row-byte-size bounds DO j i cinnerproduct over ! cell+ cell +LOOP row-size cells +LOOP drop ;def /* This gives a run time not measureably different from the above in our system. */ def: cmatrix-main2 "c_matrix_main_two" ( -- ) c{ p4cell *row = ima, *col = imb, *result = imr; p4cell *row_lim = row + mat_size; p4cell *col_lim = col + row_size; }c initiate-seed ima cinitiate-matrix imb cinitiate-matrix c{ while (row < row_lim){ /* rows in left matrix */ col = imb; while (col < col_lim){ /* cols in right matrix */ HF_FILL_TOS SP -= 2; (p4cell *)SP[1] = row; (p4cell *) TOS = col++; }c cinnerproduct c{ *result++ = TOS; SP++;} row += row_size;} }c ;def def: dump-a-matrix "dump_a_matrix" ( -- ) ima mat-byte-size bounds DO i @ . cell +LOOP ;def def: dump-b-matrix "dump_b_matrix" ( -- ) imb mat-byte-size bounds DO i @ . cell +LOOP ;def def: dump-result-matrix "dump_result_matrix" ( -- ) imr mat-byte-size bounds DO i @ . cell +LOOP ;def /* *** fib.fs *** */ def: fib "fib" ( n -- f[n+1] ) dup two < IF drop one ELSE dup 1- RECURSE swap two - RECURSE + THEN ;def def: cfib "c_fib" ( n -- f[n+1] ) c{ int n = TOS, f; if (n < 2){ TOS = 1; }else{ TOS = n - 1; hf_c_fib_ (); f = TOS; TOS = n - 2; hf_c_fib_ (); TOS += f;} }c ;def 34 constant: 34 "thirty_four" def: fib-main "fib_main" 34 fib drop ;def def: cfib-main "c_fib_main" 34 cfib drop ;def