From 304cb9570a866b679448e9e5eae8a7359409b0d6 Mon Sep 17 00:00:00 2001 From: Pavel Panchekha Date: Mon, 27 Oct 2025 14:55:34 -0600 Subject: [PATCH 1/3] Precision optimization machinery --- eval/optimal.rkt | 89 ++++++++++++++++++++++++++++++++++++++++++++++ infra/optimize.rkt | 76 +++++++++++++++++++++++++++++++++++++++ repl.rkt | 23 ++++++++++++ 3 files changed, 188 insertions(+) create mode 100644 eval/optimal.rkt create mode 100644 infra/optimize.rkt diff --git a/eval/optimal.rkt b/eval/optimal.rkt new file mode 100644 index 0000000..f102de6 --- /dev/null +++ b/eval/optimal.rkt @@ -0,0 +1,89 @@ +#lang racket + +(require "../ops/all.rkt" + "machine.rkt" + "run.rkt" + "main.rkt") + +(provide rival-machine-test-precision + rival-machine-search-precision + rival-machine-find-optimal-precisions) + +; Test if a machine succeeds at a given point with a specific precision vector +; Returns #t if the evaluation succeeds (good? and done?), #f otherwise +(define (rival-machine-test-precision machine pt prec-vec) + ; Load point into registers + (define ival-pt + (for/vector #:length (vector-length pt) + ([x (in-vector pt)]) + (ival x))) + (rival-machine-load machine ival-pt) + + ; Set custom precision vector + (set-rival-machine-iteration! machine 1) ; Don't use initial precision vector + (vector-copy! (rival-machine-precisions machine) 0 prec-vec) + (vector-copy! (rival-machine-repeats machine) 0 (rival-machine-initial-repeats machine)) + (rival-machine-run machine (rival-machine-default-hint machine)) + + ; Check result + (define-values (good? done? bad? stuck? fvec) (rival-machine-return machine)) + (and good? done?)) + +; Binary search for the lowest precision at index idx that makes the machine succeed +; Returns the minimum precision in [min-prec, max-prec] where evaluation succeeds, +; or #f if even max-prec fails +(define (rival-machine-search-precision machine pt prec-vec idx) + (define test-vec (vector-copy prec-vec)) + (define max-prec (vector-ref test-vec idx)) + + ; Check if max-prec works at all + (unless (rival-machine-test-precision machine pt test-vec) + (error 'rival-machine-search-precision "max-prec does not succeed")) + + ; Binary search for minimum + (let loop ([lo 2] + [hi max-prec]) + (if (>= lo hi) + hi + (let* ([mid (quotient (+ lo hi) 2)]) + (vector-set! test-vec idx mid) + (if (rival-machine-test-precision machine pt test-vec) + (loop lo mid) + (loop (+ mid 1) hi)))))) + +; Run thunk n times and return the minimum time +(define (time-min thunk #:min [n 5] #:sum [m 10]) + (thunk) ; Discard warm-up run + (apply min + (for/list ([i (in-range n)]) + (define start (current-inexact-milliseconds)) + (for ([i (in-range m)]) + (thunk)) + (/ (- (current-inexact-milliseconds) start) m)))) + +; Find optimal precisions for a machine at a given point +(define (rival-machine-find-optimal-precisions machine pt) + ; Extract the precision assignment, assuming no slack + (define out (rival-apply machine pt)) + (set-rival-machine-iteration! machine 1) ; Don't use initial precision vector + (rival-machine-adjust machine (rival-machine-default-hint machine)) + (define max-precs (vector-copy (rival-machine-precisions machine))) + + (cond + [(rival-machine-test-precision machine pt max-precs) + ; Timed run with rival-apply (full evaluation), take min of 5 runs + (define final-time (time-min (lambda () (rival-machine-test-precision machine pt max-precs)))) + + ; Start with max precisions + (define optimal-precs (vector-copy max-precs)) + (define n-instrs (vector-length (rival-machine-instructions machine))) + (for ([idx (in-range (- n-instrs 1) -1 -1)]) + (vector-set! optimal-precs idx (rival-machine-search-precision machine pt optimal-precs idx))) + + ; Time run with optimal precisions + (define optimal-time (time-min (lambda () (rival-machine-test-precision machine pt optimal-precs)))) + + ; Return both ratios and precision vectors + (list optimal-precs optimal-time max-precs final-time)] + [else + #f])) diff --git a/infra/optimize.rkt b/infra/optimize.rkt new file mode 100644 index 0000000..043ee78 --- /dev/null +++ b/infra/optimize.rkt @@ -0,0 +1,76 @@ +#lang racket + +(require json + math/bigfloat) +(require "main.rkt" + "eval/machine.rkt" + "eval/optimal.rkt" + "utils.rkt") + +(define (read-from-string s) + (read (open-input-string s))) + +(define (analyze-program rec bench-id min-speedup output-port) + (define exprs (map read-from-string (hash-ref rec 'exprs))) + (define vars (map read-from-string (hash-ref rec 'vars))) + (match-define `(bool flonum ...) (map read-from-string (hash-ref rec 'discs))) + (define discs (cons boolean-discretization (map (const flonum-discretization) (cdr exprs)))) + + (define machine + (parameterize ([*rival-max-precision* 32256]) + (rival-compile exprs vars discs))) + + (define results + (filter identity + (for/list ([pt* (in-list (hash-ref rec 'points))] + [pt-id (in-naturals)]) + (match-define (list pt _sollya-exs _sollya-status _sollya-apply-time) pt*) + (define pt-vec + (parameterize ([bf-precision 53]) + (list->vector (map bf pt)))) + (define result (rival-machine-find-optimal-precisions machine pt-vec)) + (match result + [(list optimal-precs optimal-time cur-precs cur-time) + (list pt-id pt optimal-time cur-time)] + [#f + (eprintf "; Benchmark ~a point ~a, failure to optimize\n" bench-id pt-id) + #f])))) + + (define (bad-pt? rec) + (match-define (list pt-id pt opt-time cur-time) rec) + (and (> cur-time (* min-speedup opt-time)) + (> (- cur-time opt-time) .001))) ; At least 1 us of speedup! + + (define dt (* 1000 (- (apply + (map fourth results)) (apply + (map third results))))) + (define valid-results (sort (filter bad-pt? results) > #:key fourth)) ; Sort by cur-time + (eprintf "; Benchmark ~a, total ~aµs available, ~a bad points\n" + bench-id (~r dt #:precision '(= 1)) (length valid-results)) + (unless (empty? valid-results) + (fprintf output-port "; Benchmark ~a, total ~aµs available\n" bench-id (~r dt #:precision '(= 1))) + (fprintf output-port + "(define (b~a ~a)\n ~a)\n" + bench-id + (string-join (map ~s vars) " ") + (string-join (map ~s exprs) " ")) + + (for ([result (in-list valid-results)] + [n (in-range 10)]) ; At most 10 + (match-define (list pt-id pt opt-time cur-time) result) + (fprintf output-port "(optimize b~a ~a)\n" bench-id (string-join (map ~a pt) " "))) + (fprintf output-port "\n"))) + +(module+ main + (require racket/cmdline) + (define min-speedup (make-parameter 1.2)) + (command-line + #:once-each [("--min") n "Minimum speedup to report" (min-speedup (string->number n))] + #:args ([points-file "infra/points.json"] + [output-file "optimaize.rival"]) + (printf "Analyzing points bad precision assignment (min speedup: ~a)...\n\n" (min-speedup)) + (call-with-output-file output-file #:exists 'replace + (λ (out-port) + (call-with-input-file points-file + (λ (input) + (for ([rec (in-port read-json input)] + [bench-id (in-naturals)]) + (analyze-program rec bench-id (min-speedup) out-port)))))))) diff --git a/repl.rkt b/repl.rkt index 56932c8..4951fb0 100644 --- a/repl.rkt +++ b/repl.rkt @@ -10,6 +10,7 @@ profile) (require "eval/main.rkt" "eval/machine.rkt" + "eval/optimal.rkt" "utils.rkt") (provide repl-main repl-profile) @@ -220,6 +221,27 @@ (when print? (write-explain machine) (printf "\nTotal: ~aµs\n" (~r (* (- end start) 1000) #:precision '(= 1))))] + [`(optimize ,name ,(? (disjoin real? boolean?) vals) ...) + (define machine (repl-get-machine repl name)) + (check-args! name machine vals) + (define result + (parameterize ([bf-precision (repl-precision-bits repl)]) + (rival-machine-find-optimal-precisions machine (list->vector (map ->bf vals))))) + (when print? + (match-define (list optimal-precs optimal-time cur-precs cur-time) result) + (printf "~a optimal ~aµs faster (~a×)\n" + name + (~r (* 1000 (- cur-time optimal-time)) #:precision '(= 1)) + (if (zero? optimal-time) "∞" (~r (/ cur-time optimal-time) #:precision '(= 3)))) + (define ivec (rival-machine-instructions machine)) + (for ([instr (in-vector ivec)] + [final (in-vector cur-precs)] + [optimal (in-vector optimal-precs)]) + (define instr-name (normalize-function-name (~a (object-name (car instr))))) + (printf "~a ~a ~a\n" (~a instr-name #:width 20 #:align 'left) + (~a final #:width 6 #:align 'right) + (~a optimal #:width 6 #:align 'right))) + (newline))] [(or '(help) 'help) (displayln "This is the Rival REPL, a demo of the Rival real evaluator.") (newline) @@ -229,6 +251,7 @@ (displayln " (eval ...) Evaluate a named function") (displayln " (explain ...) Show profile for evaluating a named function") + (displayln " (optimize ...) Find optimal precisions and report speedup") (newline) (displayln "A closed expression can always be used in place of a named function.")] [_ (printf "Unknown command ~a; use help for command list\n" cmd)]))) From a7fdb0e0ad8406168aba737f2e4025320d3d4e33 Mon Sep 17 00:00:00 2001 From: Pavel Panchekha Date: Mon, 27 Oct 2025 15:05:25 -0600 Subject: [PATCH 2/3] fmt --- eval/optimal.rkt | 6 +++--- infra/optimize.rkt | 51 ++++++++++++++++++++++++---------------------- repl.rkt | 7 +++++-- 3 files changed, 35 insertions(+), 29 deletions(-) diff --git a/eval/optimal.rkt b/eval/optimal.rkt index f102de6..275a732 100644 --- a/eval/optimal.rkt +++ b/eval/optimal.rkt @@ -81,9 +81,9 @@ (vector-set! optimal-precs idx (rival-machine-search-precision machine pt optimal-precs idx))) ; Time run with optimal precisions - (define optimal-time (time-min (lambda () (rival-machine-test-precision machine pt optimal-precs)))) + (define optimal-time + (time-min (lambda () (rival-machine-test-precision machine pt optimal-precs)))) ; Return both ratios and precision vectors (list optimal-precs optimal-time max-precs final-time)] - [else - #f])) + [else #f])) diff --git a/infra/optimize.rkt b/infra/optimize.rkt index 043ee78..9253114 100644 --- a/infra/optimize.rkt +++ b/infra/optimize.rkt @@ -21,20 +21,20 @@ (rival-compile exprs vars discs))) (define results - (filter identity - (for/list ([pt* (in-list (hash-ref rec 'points))] - [pt-id (in-naturals)]) - (match-define (list pt _sollya-exs _sollya-status _sollya-apply-time) pt*) - (define pt-vec - (parameterize ([bf-precision 53]) - (list->vector (map bf pt)))) - (define result (rival-machine-find-optimal-precisions machine pt-vec)) - (match result - [(list optimal-precs optimal-time cur-precs cur-time) - (list pt-id pt optimal-time cur-time)] - [#f - (eprintf "; Benchmark ~a point ~a, failure to optimize\n" bench-id pt-id) - #f])))) + (filter + identity + (for/list ([pt* (in-list (hash-ref rec 'points))] + [pt-id (in-naturals)]) + (match-define (list pt _sollya-exs _sollya-status _sollya-apply-time) pt*) + (define pt-vec + (parameterize ([bf-precision 53]) + (list->vector (map bf pt)))) + (define result (rival-machine-find-optimal-precisions machine pt-vec)) + (match result + [(list optimal-precs optimal-time cur-precs cur-time) (list pt-id pt optimal-time cur-time)] + [#f + (eprintf "; Benchmark ~a point ~a, failure to optimize\n" bench-id pt-id) + #f])))) (define (bad-pt? rec) (match-define (list pt-id pt opt-time cur-time) rec) @@ -44,7 +44,9 @@ (define dt (* 1000 (- (apply + (map fourth results)) (apply + (map third results))))) (define valid-results (sort (filter bad-pt? results) > #:key fourth)) ; Sort by cur-time (eprintf "; Benchmark ~a, total ~aµs available, ~a bad points\n" - bench-id (~r dt #:precision '(= 1)) (length valid-results)) + bench-id + (~r dt #:precision '(= 1)) + (length valid-results)) (unless (empty? valid-results) (fprintf output-port "; Benchmark ~a, total ~aµs available\n" bench-id (~r dt #:precision '(= 1))) (fprintf output-port @@ -64,13 +66,14 @@ (define min-speedup (make-parameter 1.2)) (command-line #:once-each [("--min") n "Minimum speedup to report" (min-speedup (string->number n))] - #:args ([points-file "infra/points.json"] - [output-file "optimaize.rival"]) + #:args ([points-file "infra/points.json"] [output-file "optimaize.rival"]) (printf "Analyzing points bad precision assignment (min speedup: ~a)...\n\n" (min-speedup)) - (call-with-output-file output-file #:exists 'replace - (λ (out-port) - (call-with-input-file points-file - (λ (input) - (for ([rec (in-port read-json input)] - [bench-id (in-naturals)]) - (analyze-program rec bench-id (min-speedup) out-port)))))))) + (call-with-output-file + output-file + #:exists 'replace + (λ (out-port) + (call-with-input-file points-file + (λ (input) + (for ([rec (in-port read-json input)] + [bench-id (in-naturals)]) + (analyze-program rec bench-id (min-speedup) out-port)))))))) diff --git a/repl.rkt b/repl.rkt index 4951fb0..5f6fafe 100644 --- a/repl.rkt +++ b/repl.rkt @@ -232,13 +232,16 @@ (printf "~a optimal ~aµs faster (~a×)\n" name (~r (* 1000 (- cur-time optimal-time)) #:precision '(= 1)) - (if (zero? optimal-time) "∞" (~r (/ cur-time optimal-time) #:precision '(= 3)))) + (if (zero? optimal-time) + "∞" + (~r (/ cur-time optimal-time) #:precision '(= 3)))) (define ivec (rival-machine-instructions machine)) (for ([instr (in-vector ivec)] [final (in-vector cur-precs)] [optimal (in-vector optimal-precs)]) (define instr-name (normalize-function-name (~a (object-name (car instr))))) - (printf "~a ~a ~a\n" (~a instr-name #:width 20 #:align 'left) + (printf "~a ~a ~a\n" + (~a instr-name #:width 20 #:align 'left) (~a final #:width 6 #:align 'right) (~a optimal #:width 6 #:align 'right))) (newline))] From c1aed79b34f047bfea3f5ad19553d3b60923cef9 Mon Sep 17 00:00:00 2001 From: Pavel Panchekha Date: Mon, 27 Oct 2025 19:48:30 -0600 Subject: [PATCH 3/3] Woops --- infra/optimize.rkt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/infra/optimize.rkt b/infra/optimize.rkt index 9253114..d0c6d30 100644 --- a/infra/optimize.rkt +++ b/infra/optimize.rkt @@ -2,10 +2,10 @@ (require json math/bigfloat) -(require "main.rkt" - "eval/machine.rkt" - "eval/optimal.rkt" - "utils.rkt") +(require "../main.rkt" + "../eval/machine.rkt" + "../eval/optimal.rkt" + "../utils.rkt") (define (read-from-string s) (read (open-input-string s)))