-
Notifications
You must be signed in to change notification settings - Fork 30
/
kernel-itc.mu4
481 lines (366 loc) · 15.9 KB
/
kernel-itc.mu4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
| This file is part of muforth: https://muforth.dev/
|
| Copyright 2002-2024 David Frech. (Read the LICENSE for details.)
loading ARM v6-M Forth kernel (ITC)
| Yes, you guessed it: The guts of Forth for ARMv6-M processors!
|
| Because this is a true RISC architecture, doing threaded code doesn't
| seem to make a lot of sense. If you include NEXT inline in each code word -
| the speediest approach - you really need post-increment addressing.
| Otherwise NEXT can take up a lot of space!
|
| On ARMv6-M, NEXT for ITC - indirect-threaded code - is three instructions,
| and NEXT for DTC - direct-threaded code - is two.
|
| However! There are several advantages to doing ITC. One is that the
| system is dead simple, and it's easy to write "user-level" code that mucks
| about with the internals of words, since everything has a uniform
| structure.
|
| It's also cache-efficient, mostly separating data - pointers and
| variables - from code. In a native Forth these often end up next to each
| other in memory, making the caches' jobs more difficult. Since Forth is
| so simple, it's very likely that the entire native code implemenation will
| fit into the I-cache, making it potentially very fast. We'll see about
| this!
( Make sure disassembler uses the right register names.)
forth-regs
| ------------------------------------------------------------------------
| Macros defining common VM operations
| ------------------------------------------------------------------------
( XXX this is a hack until we have a proper smart jump and call.)
assembler
: j asm{{ b }} ;
: c asm{{ bl }} ;
forth
( XXX Macros or subroutines?)
( Data stack macros.)
| NOTE: The registers are pushed and popped in "register index" order:
| lower indices end up at lower addresses. The order that they are
| mentioned in the macro call is irrelevant! So be careful.
assembler
: dpush1 ( r1) 1 regs>mask asm{{ push }} ;
: dpush2 ( r1 r2) 2 regs>mask asm{{ push }} ;
: dpush3 ( r1 r2 r3) 3 regs>mask asm{{ push }} ;
: dpush4 ( r1 r2 r3 r4) 4 regs>mask asm{{ push }} ;
: dpop1 ( r1) 1 regs>mask asm{{ pop }} ;
: dpop2 ( r1 r2) 2 regs>mask asm{{ pop }} ;
: dpop3 ( r1 r2 r3) 3 regs>mask asm{{ pop }} ;
: dpop4 ( r1 r2 r3 r4) 4 regs>mask asm{{ pop }} ;
( Return stack macros.)
( These are pushed so that r1 is the *top* of the R stack.)
: rpush1 ( r1) push asm{{ 1 cells rp rp subs
0 rp \f pop str }} ;
: rpush2 ( r2 r1) 2push asm{{ 2 cells rp rp subs
0 rp \f pop str
1 cells rp \f pop str }} ;
( NOTE: The above warning about order is relevant here too!)
( These are popped in register index order - lower to higher.)
: rpop1 ( r1) 1 regs>mask asm{{ rp ldm }} ;
: rpop2 ( r1 r2) 2 regs>mask asm{{ rp ldm }} ;
forth
| ------------------------------------------------------------------------
| The kernel begins here!
| ------------------------------------------------------------------------
( Compile NEXT inline.)
assembler
: dispatch asm{{ { x } w ldm x bx }} ;
: next asm{{ { w } ip ldm dispatch }} ;
forth
__meta
meta: constant new , ;code label doconst
implements target-do-const
0 w w ldr ( fetch constant into w)
( fall thru) ;c
meta: create new ;code label dovar
implements target-do-var
label wpush
top dpush1 w top mov ( move pfa to top)
next ;c
label dodoes
top dpush1 w top mov ( move pfa to top)
lr w mov ( lr is parent ip)
1 w w subs ( clear thumb bit)
( fall thru) ;c
meta: : new ] ;code label docolon
implements target-do-colon
ip rpush1 w ip mov ( pfa is new ip) next ;c
( Make a nameless colon word.)
meta: -: here docolon 1+ ( thumb!) , ] ;
code* (unnest) [r]
label unnest ip rpop1 begin next ;c
codes nope
definer: does> <;code> asm{{ dodoes bl }} ] ;
code* (lit) [r] { w } ip ldm ( fetch literal into w) wpush j ;c
code execute ( cfa) top w mov begin top dpop1 dispatch ;c
code @execute ( 'cfa) 0 top w ldr again ;c
code* (branch) [r]
label branch 0 ip ip ldr ( follow branch) next ;c
code* (0branch) [r] top top tst top dpop1 branch 0!= until
( fall thru) ;c
label skip 4 ip ip adds next ( skip branch address) ;c
code* (?0branch) [r] top top tst skip 0= until top dpop1 branch j ;c
code* (=0branch) [r] top top tst skip 0= until branch j ;c
( Fast version, using loop register)
code* (for) [r] ( u)
ix rpush1 top ix mov top dpop1 next ;c
code* (next) [r]
1 ix ix subs branch 0= until ix rpop1 skip j ;c
| Do-loop frame looks like this:
|
| +------------------+
| | saved ix reg |
| +------------------+
| | limit |<--- rp
| +------------------+
|
| Current index is in ix register; current "user-visible" index is
| calculated as index + limit.
code* (do) [r] ( limit start)
top w mov ( w = start)
{ top x } pop ( x = limit)
ix x rpush2 ( push ix reg and limit onto R)
x w ix subs ( index = start - limit)
next ;c
| Increment index. If it overflows to zero, restore ix register, pop stack
| frame, skip backwards jump, and continue. If non-zero, simply take the
| backwards jump.
code* (loop) [r]
1 ix ix adds branch 0= until
label undo
ix w rpop2 ( restore ix from R stack and pop do-loop frame)
skip j ;c
( To leave a do-loop early and return to caller.)
code undo^ [r]
ix w rpop2 ( restore ix from R stack and pop do-loop frame)
unnest j ;c
| Add incr to index. If the sign of index has changed, we've crossed the
| threshold, so restore index, pop frame, and skip jump. Otherwise, take
| the backwards jump.
code* (+loop) [r] ( incr)
ix x mov ( save ix *before* adding incr)
top ix ix adds top dpop1 ix x x eors undo 0>= until
branch j ;c
( Push current loop index. User-visible index = index + limit)
code i [r] ( - index)
0 rp w ldr ix w w adds wpush j ;c
| Allocate buffer space _before_ defining the constant that pushes the
| buffer's address. This way we can define buffers in ram as well as in
| flash!
meta: buffer ( #bytes)
h @ push ram align here swap aligned allot pop h ! constant ;
meta: variable cell buffer ; ( A variable is a cell-sized buffer!)
meta: 2variable cell 2* buffer ;
( Basic unary ops.)
code invert top top mvns next ;c
code negate top top negs next ;c
code 2* 1 top top lsls next ;c ( also: top top top add)
code 2/ 1 top top asrs next ;c
code u2/ 1 top top lsrs next ;c
code << ( n shamt - n') x dpop1 top x x lsls begin begin x top mov next ;c
code >> ( n shamt - n') x dpop1 top x x asrs again ;c
code u>> ( n shamt - n') x dpop1 top x x lsrs again ;c
( Basic binary ops.)
code - ( x t - x-t) x dpop1 top x top subs next ;c
code + ( x t - x+t) x dpop1 x top top adds next ;c
code * ( x t - x*t) x dpop1 x top top muls next ;c ( 32x32 -> 32 signed multiply)
code and ( x t - x&t) x dpop1 begin x top top ands next ;c
code or ( x t - x|t) x dpop1 x top top orrs next ;c
code xor ( x t - x^t) x dpop1 x top top eors next ;c
code bic ( x t - x&~t) x dpop1 top top mvns again ;c
( Stack ops.)
code dup [r] ( t - t t) top dpush1 next ;c
code drop [r] ( x t - x) top dpop1 next ;c
code nip [r] ( x t - t) cell sp sp add next ;c
code over [r] ( w t - w t w) 0 sp w ldr wpush j ;c
code swap [r] ( w t - t w) 0 sp w ldr 0 sp top str
begin w top mov next ;c
code rot [r] ( w x t - x t w) 0 sp x ldr cell sp w ldr
0 sp top str cell sp x str again ;c
code tuck [r] ( w t - t w t) 0 sp w ldr 0 sp top str w dpush1 next ;c
( Make a copy of the nth stack item, where 0 nth is dup)
code nth [r] ( w_n .. w_0 t - w_n .. w_0 w_t)
2 top top lsls sp top top add 0 top top ldr next ;c
: 2dup over over ; [r]
: -rot rot rot ; [r]
( Return stack ops.)
code >r [r] ( w) top rpush1 label poptop top dpop1 next ;c
code r> [r] ( - w) w rpop1 wpush j ;c
code r@ [r] ( - w) 0 rp w ldr wpush j ;c
( Memory access.)
code @ ( a - w) 0 top top ldr next ;c
code h@ ( a - uh) 0 top top ldrh next ;c ( Unsigned!)
code c@ ( a - ub) 0 top top ldrb next ;c ( Unsigned!)
code ! ( w a) w dpop1 begin begin begin
0 top w str poptop j ;c
code h! ( h a) w dpop1 0 top w strh poptop j ;c
code c! ( b a) w dpop1 0 top w strb poptop j ;c
( Generic, non-atomic read-modify-write ops: add, bit set, and bit clear.)
code +! ( x a) x dpop1 0 top w ldr x w w adds again ;c
code set! ( x a) x dpop1 0 top w ldr x w w orrs again ;c
code clr! ( x a) x dpop1 0 top w ldr x w w bics again ;c
code @+ ( a - w a+4) { w } top ldm w dpush1 next ;c
code !+ ( w a - a+4) w dpop1 { w } top stm next ;c
code c@+ ( a - w a+1) 0 top w ldrb w dpush1
begin 1 top top adds next ;c
code c!+ ( w a - a+1) w dpop1 0 top w strb again ;c
( Comparison and flag operators.)
code 0< ( n - f) 31 top top asrs next ;c
| These are a bit tricky, esp since borrow is ~carry. The idea is: get the
| inverse of the flag value we want into carry, then subtract top from
| itself - yielding zero - minus borrow, or -1 for true, 0 for false. It's
| efficient but non-obvious.
code 0= ( n - f)
1 top top subs ( ~Z -> C)
label makeflag
top top top sbcs next ;c
code u< ( x t - x u< t)
x dpop1 top x cmp ( ~uless -> C) makeflag j ;c
| Unlike with u<, it's hard to get the flag value we want into carry, so
| let's just use a conditional jump.
code < ( x t - x < t)
0 w movs ( false)
x dpop1 top x cmp < if w w mvns ( true) then
w top mov next ;c
( Another useful compare operator - equality!)
: = xor 0= ;
( Small constants.)
-2 constant -2 [r]
-1 constant -1 [r]
0 constant 0 [r]
1 constant 1 [r]
2 constant 2 [r]
| Incrementers by small constants. Shared code means they take up very
| little space!
meta: incr constant ;code 0 w w ldr w top top adds next ;c
1 incr 1+ [r]
2 incr 2+ [r]
-1 incr 1- [r]
-2 incr 2- [r]
cell constant cell [r]
cell incr cell+ [r]
cell \f negate incr cell- [r]
code cells [r] 2 top top lsls next ;c
code cell/ [r] 2 top top asrs next ;c
.ifdef notyet ( unconverted RISC-V code)
| Double numbers. Useful, among other things, for computing with RISC-V's
| 64-bit timers and counters.
| Double-length math register use.
|
| Enter with d1 and d2 on the stack. Registers are first loaded as follows:
|
| top = d2hi
| w = d2lo
| x = d1hi
| y = d1lo
code d+ ( d1 d2 - d1+d2)
0 sp w lw ( d2lo) cell sp x lw ( d1hi) 2 cells sp y lw ( d1lo)
y w w add ( sumlo) w y y sltu ( y+w < y => carry)
x top top add ( sumhi) top y top add ( +carry)
label wput-pop2
2 cells sp w sw ( lo) 2 cells sp sp addi ( pop)
next ;c
code d- ( d1 d2 - d1-d2)
0 sp w lw ( d2lo) cell sp x lw ( d1hi) 2 cells sp y lw ( d1lo)
y w w sub ( difflo) y w y sltu ( y-w > y => borrow)
x top top sub ( diffhi) top y top sub ( -borrow)
wput-pop2 j ;c
code dnegate ( d - -d)
0 sp w lw ( dlo) w y snez ( will borrow only if w != 0) w w neg
0 sp w sw top top neg top y top sub ( -borrow) next ;c
| I've wracked my brain and I can't come up with a way to do 64-bit
| compares without using a branch. Here is the basic idea:
|
| Compare the high-order words. If they are _equal_, return as the result
| the _unsigned_ comparison of the low-order words.
|
| Otherwise, ignore the low-order words and:
| For ud< return the _unsigned_ comparison of the high-order words
| For d< return the _signed_ comparison of the high-order words
| Compare high-order words; if equal, compare low-order, pop stack, push
| flag, and run NEXT. If not equal, pop stack and return to caller.
label dless-common
cell sp x lw ( d1hi) x top = if ( high-order words equal: compare low)
0 sp w lw ( d2lo)
2 cells sp y lw ( d1lo)
3 cells sp sp addi ( pop) y w top sltu ( d1 d2 u<) makeflag j
then
3 cells sp sp addi 0 w jr ;c
code d< ( d1 d2 - f)
dless-common w jal x top top slt makeflag j ;c
code ud< ( d1 d2 - f)
dless-common w jal x top top sltu makeflag j ;c
| Host words to make it easier to deal with double numbers on the target.
|
| >d converts a host-side 64-bit number into two 32-bit target numbers
| d> goes the other way: target -> host
| d. prints a target double number as a signed 64-bit number
| ud. prints a target double number as an unsigned 64-bit number
meta: >d ( host - tdlo tdhi) dup "ffff_ffff and ( lo) swap 32 u>> ;
meta: d> ( tdlo tdhi - host) 32 << swap "ffff_ffff and + ;
meta: ud. ( tdlo tdhi) d> u. ;
meta: d. ( tdlo tdhi) d> \f . ;
( Once we have double numbers, these come in handy.)
: 2dup ( a b - a b a b) over over ; [r]
: 2swap ( a b c d - c d a b) rot >r rot r> ; [r]
: 2over ( a b c d - a b c d a b) 3 nth 3 nth ; [r]
: 2tuck ( a b c d - c d a b c d) 2swap 2over ; [r]
.then ( notyet)
{ h @ } ( save region)
ram
( For debugging and interactive execution.)
| The host can push things onto host stack; they get copied to target
| stack, registers popped, words execute, re-push, copy back to host...
| Much easier than stuffing things into register slots on stack frame!
.ifdef picoboot.exec
| Before executing any Forth code, we need to save any ARM ABI callee-saved
| registers. All of the Forth VM regs are callee-saved, by design. So we
| push these when we save the lr, and restore them when returning to
| PICOBOOT.
implements continue-forth __asm
{ lr top ip rp ix } push sp y mov
pico-sp w lit
0 w x ldr ( load forth sp)
0 w y str ( save picoboot sp)
x sp mov { top ip rp ix } pop dsb isb next ;c
code bug [r]
{ top ip rp ix } push sp x mov
pico-sp w lit
0 w y ldr ( restore picoboot sp)
0 w x str ( save forth sp)
y sp mov { pc top ip rp ix } pop ;c
pool,
.else
implements continue-forth __asm
cpsid ( disable interrupts)
{ top ip rp ix } pop dsb isb next ;c
code bug [r]
{ top ip rp ix } push 0 bkpt ;c
.then
align
implements trampoline ( x0 .. xn - y0 .. ym)
] execute begin bug again [
__host
h ! ( restore region)
forth
| Show some indication of whether a word is still executing. If IP is
| anything other than two cells past trampoline, we've hit "bug" somewhere
| other than the trampoline. Show this by annotating IP with a "*".
: executing? ( ip - f) [ \m trampoline @ 2 \m cells + #] - ;
: .ip
tip @ dup .h32 executing? if ." * " ^ then ( done) 2sp ;
-: ( forth version of .regs)
cr ." IX SP RP IP"
( 00000000 00000000 00000000 00000000 )
cr tix .tr tsp .tr trp .tr .ip ;
is .regs
__meta
| comment ~~examples~~
| variable inc
| : lala do i bug drop inc @ +loop ;
|
| ( to demonstrate scripting target execution from the host)
| meta: grog ( start n) 0 do \t 1+ remote loop ;
| ( try: 44 10 grog)
|
| ~~examples~~