Presentation is loading. Please wait.

Presentation is loading. Please wait.

嵌入式處理器架構與程式設計 王建民 中央研究院 資訊所 2008 年 7 月. 2 Contents Introduction Computer Architecture ARM Architecture Development Tools  GNU Development Tools ARM Instruction.

Similar presentations


Presentation on theme: "嵌入式處理器架構與程式設計 王建民 中央研究院 資訊所 2008 年 7 月. 2 Contents Introduction Computer Architecture ARM Architecture Development Tools  GNU Development Tools ARM Instruction."— Presentation transcript:

1 嵌入式處理器架構與程式設計 王建民 中央研究院 資訊所 2008 年 7 月

2 2 Contents Introduction Computer Architecture ARM Architecture Development Tools  GNU Development Tools ARM Instruction Set ARM Assembly Language ARM Assembly Programming  GNU ARM ToolChain Interrupts and Monitor

3 Lecture 8 ARM Assembly Programming

4 4 Outline Assembly Programming Assembly-C Interface Peephole Optimization

5 5 Example #4: String Length #include extern int mystrlen(char *s); int main() { char s[20] = “Hello, World!\n”; printf("The length of the string is %d\n", mystrlen(s)); } int mystrlen(char *s1) { char *s2; s2 = s1; while (*s2 != 0) { s2++; } return (s2-s1); }

6 6 Example #4: Pseudo Code int mystrlen(char *s1) { char *s2; s2 = s1; while (*s2 != 0) { s2++; } return (s2-s1); } mystrlen: s2 = s1 start_loop: if (*s2 == 0) goto end_loop s2 = s2 + 1 goto start_loop end_loop: return (s2-s1)

7 7 Example #4: Storage Assignment mystrlen: s2 = s1 start_loop: if (*s2 == 0) goto end_loop s2 = s2 + 1 goto start_loop end_loop: return (s2-s1) mystrlen: r4 = r0 start_loop: r5 = *r4 if (r5 == 0) goto end_loop s4 = r4 + 1 goto start_loop end_loop: return (r4-r0)

8 8 Example #4: Final Assembly Code mystrlen: r4 = r0 start_loop: r5 = *r4 if (r5 == 0) goto end_loop r4 = r4 + 1 goto start_loop end_loop: return (r4-r0).text.align 2.global mystrlen mystrlen: mov r4, r0 start_loop: ldrb r5, [r4] cmp r5, #0 beq end_loop add r4, r4, #1 b start_loop end_loop: sub r0, r4, r0 mov pc, lr

9 9 Example #5: Summation #include extern int mysum(int n, int *array); int main() { int a[5] = {1, 3, 5, 7, 9}; printf("The summation of the array is %d\n", mysum(5,a)); } int mysum(int n, int *array) { int i, sum; sum = 0; for (i = 0; i < n; i++) { sum += array[i]; } return sum; }

10 10 Example #5: Pseudo Code int mysum(int n, int *array) { int i, sum; sum = 0; for (i = 0; i < n; i++) { sum += array[i]; } return sum; } mysum: sum = 0 i = 0 start_loop: if (i >= n) goto end_loop sum = sum + array[i] i = i + 1 goto start_loop end_loop: return sum

11 11 Example #5: Storage Assignment mysum: sum = 0 i = 0 start_loop: if (i >= n) goto end_loop sum = sum + array[i] i = i + 1 goto start_loop end_loop: return sum mysum: r5 = 0 r4 = 0 start_loop: if (r4 >= r0) goto end_loop r6 = r1[r4] r5 = r5 + r6 r4 = r4 + 1 goto start_loop end_loop: return r5

12 12 Example #5: Final Assembly Code mysum: r5 = 0 r4 = 0 start_loop: if (r4 >= r0) goto end_loop r6 = r1[r4] r5 = r5 + r6 r4 = r4 + 1 goto start_loop end_loop: return r5.text.align 2.global mysum mysum: mov r5, #0 mov r4, #0 start_loop: cmp r4, r0 bge end_loop ldr r6, [r1,r4,LSL#2] add r5, r5, r6 add r4, r4, #1 b start_loop end_loop: mov r0, r5 mov pc, lr

13 13 Example #6: Bubble Sort 1 #include extern void bubble(int n, int *a); int main() { int i; int a[5] = {9, 7, 5, 3, 1}; bubble(5, a); printf("The sorted array:\n"); for (i = 0; i < 5; i++) { printf("a[%d] = %d\n", i, a[i]); }

14 14 Example #6: Bubble Sort 2 void sort2(int *a, int *b) { int tmp; if (*b < *a) { tmp = *a; *a = *b; *b = tmp; } void bubble(int n, int *a) { int i, j; for (i = 0; i < n-1; i++) { for (j = 0; j < n-1-i; j++) { sort2(&a[j], &a[j+1]); }

15 15 Example #6: Pseudo Code void bubble(int n, int *a); { int i, j; for (i = 0; i < n-1; i++) { for (j = 0; j < n-1-i; j++) { sort2(&a[j], &a[j+1]); } bubble: i = 0 start_outer: if (i >= n-1) goto end_outer j = 0 start_inner: if (j >= n-1-i) goto end_inner sort2(&a[j],&a[j+1]) j = j + 1 goto start_inner end_inner: i = i + 1 goto start_outer end_outer: return

16 16 Example #6: Storage Assignment bubble: i = 0 start_outer: if (i >= n-1) goto end_outer j = 0 start_inner: if (j >= n-1-i) goto end_inner sort2(&a[j],&a[j+1]) j = j + 1 goto start_inner end_inner: i = i + 1 goto start_outer end_outer: return bubble: r2 = 0 start_outer: r4 = r0 - 1 if (r2 >= r4) goto end_outer r3 = 0 start_inner: r5 = r4 – r2 if (r3 >= r5) goto end_inner sort2(r1+r3*4,r1+r3*4+4) r3 = r3 + 1 goto start_inner end_inner: r2 = r2 + 1 goto start_outer end_outer: return

17 17 Example #6: Assembly Code? bubble: r2 = 0 start_outer: r4 = r0 - 1 if (r2 >= r4) goto end_outer r3 = 0 start_inner: r5 = r4 – r2 if (r3 >= r5) goto end_inner sort2(r1+r3*4,r1+r3*4+4) r3 = r3 + 1 goto start_inner end_inner: r2 = r2 + 1 goto start_outer end_outer: return bubble: mov r2, #0 start_outer: sub r4, r0, #1 cmp r2, r4 bge end_outer mov r3, #0 start_inner: sub r5, r4, r2 cmp r3, r5 bge end_inner add r0, r1, r3, LSL #2 add r1, r0, #4 bl sort2 add r3, r3, #1 b start_inner end_inner: add r2, r2, #1 b start_outer end_outer: mov pc, lr

18 18 Example #6: Final Assembly Code bubble: mov r2, #0 start_outer: sub r4, r0, #1 cmp r2, r4 bge end_outer mov r3, #0 start_inner: sub r5, r4, r2 cmp r3, r5 bge end_inner add r0, r1, r3, LSL #2 add r1, r0, #4 bl sort2 add r3, r3, #1 b start_inner end_inner: add r2, r2, #1 b start_outer end_outer: mov pc, lr bubble: mov r2, #0 start_outer: sub r4, r0, #1 cmp r2, r4 bge end_outer mov r3, #0 start_inner: sub r5, r4, r2 cmp r3, r5 bge end_inner stmfd sp!,{r0-r3,lr} add r0, r1, r3, LSL #2 add r1, r0, #4 bl sort2 ldmfd sp, {r0-r3,lr} add r3, r3, #1 b start_inner end_inner: add r2, r2, #1 b start_outer end_outer: mov pc, lr

19 19 Outline Assembly Programming Assembly-C Interface Peephole Optimization

20 20 Generating Assembly Code from C In this course, we will be using the GNU ARM ToolChain. To compile a C program to assembly code arm-elf-gcc –S filename.c When you compile a.c file, you get a.s file This.s file contains the assembly language code  When assembled, this code can potentially be linked and loaded as an executable To display information from an object file arm-elf-objdump –S –r filename

21 21 Example #7: A Simple Program int a, b; int main() { a = 3; b = 4; } /* end main() */.file "example4.c".text.align 2.global main.type main, %function main: mov ip, sp stmfd sp!, {fp, ip, lr, pc} sub fp, ip, #4 ldr r2,.L3 mov r3, #3 str r3, [r2, #0] ldr r2,.L3+4 mov r3, #4 str r3, [r2, #0] ldmfd sp, {fp, sp, pc}.L4:.align 2.L3:.word a.word b.size main,.-main.comm a,4,4.comm b,4,4.ident "GCC: (GNU) 4.0.0" Declare storage for a and b Loader will put addresses of a and b in this memory location

22 22 Example #7: Object File example1.o: file format elf32-littlearm Disassembly of section.text: : 0:e1a0c00d movip, sp 4:e92dd800 stmdbsp!, {fp, ip, lr, pc} 8:e24cb004 subfp, ip, #4; 0x4 c:e59f2014 ldrr2, [pc, #20]; 28 10:e3a03003 movr3, #3; 0x3 14:e strr3, [r2] 18:e59f200c ldrr2, [pc, #12]; 2c 1c:e3a03004 movr3, #4; 0x4 20:e strr3, [r2] 24:e89da800 ldmiasp, {fp, sp, pc}... 28: R_ARM_ABS32a 2c: R_ARM_ABS32b

23 23 Example #7: Executable File : 8208:e1a0c00d movip, sp 820c:e92dd800 stmdbsp!, {fp, ip, lr, pc} 8210:e24cb004 subfp, ip, #4; 0x4 8214:e59f2014 ldrr2, [pc, #20] ; :e3a03003 movr3, #3; 0x3 821c:e strr3, [r2] 8220:e59f200c ldrr2, [pc, #12]; :e3a03004 movr3, #4; 0x4 8228:e strr3, [r2] 822c:e89da800 ldmiasp, {fp, sp, pc} 8230:0000adc4 andeqsl, r0, r4, asr # :0000adc0 andeqsl, r0, r0, asr #27

24 24 Example #8: Calling A Function int tmp; void swap(int a, int b); int main() { int a, b; a = 3; b = 4; swap(a, b); } /* end main() */ void swap(int a, int b) { tmp = a; a = b; b = tmp; } /* end swap() */

25 25 Example #8: Assembly Listing main: mov ip, sp stmfd sp!, {fp, ip, lr, pc} sub fp, ip, #4 sub sp, sp, #8 mov r3, #3 str r3, [fp, #-20] mov r3, #4 str r3, [fp, #-16] ldr r0, [fp, #-20] ldr r1, [fp, #-16] bl swap sub sp, fp, #12 ldmfd sp, {fp, sp, pc} swap: mov ip, sp stmfd sp!, {fp, ip, lr, pc} sub fp, ip, #4 sub sp, sp, #8 str r0, [fp, #-16] str r1, [fp, #-20] ldr r2,.L5 ldr r3, [fp, #-16] str r3, [r2, #0] ldr r3, [fp, #-20] str r3, [fp, #-16] ldr r3,.L5 ldr r3, [r3, #0] str r3, [fp, #-20] sub sp, fp, #12 ldmfd sp, {fp, sp, pc}.L6:.align 2.L5:.word tmp.comm tmp,4,4

26 26 Example #9: Manipulating Pointers int tmp; int *pa, *pb; void swap(int a, int b); int main() { int a, b; pa = &a; pb = &b; *pa = 3; *pb = 4; swap(*pa, *pb); } /* end main() */ void swap(int a, int b) { tmp = a; a = b; b = tmp; } /* end swap() */

27 27 Example #9: Assembly Listing main: mov ip, sp stmfd sp!, {fp, ip, lr, pc} sub fp, ip, #4 sub sp, sp, #8 ldr r2,.L3 sub r3, fp, #16 str r3, [r2, #0] ldr r2,.L3+4 sub r3, fp, #20 str r3, [r2, #0] ldr r3,.L3 ldr r2, [r3, #0] mov r3, #3 str r3, [r2, #0] ldr r3,.L3+4 ldr r2, [r3, #0] mov r3, #4 str r3, [r2, #0] ldr r3,.L3 ldr r3, [r3, #0] ldr r2, [r3, #0] ldr r3,.L3+4 ldr r3, [r3, #0] mov r0, r2 mov r1, r3 bl swap sub sp, fp, #12 ldmfd sp, {fp, sp, pc}.L4:.align 2.L3:.word pa.word pb

28 28 Example #10: Dealing with struct typedef struct testStruct { unsigned int a; unsigned int b; char c; } testStruct; testStruct *ptest; int main() { ptest­>a = 4; ptest­>b = 10; ptest­>c = 'A'; } /* end main() */ main: mov ip, sp stmfd sp!, {fp, ip, lr, pc} sub fp, ip, #4 ldr r3,.L3 ldr r2, [r3, #0] mov r3, #4 str r3, [r2, #0] ldr r3,.L3 ldr r2, [r3, #0] mov r3, #10 str r3, [r2, #4] ldr r3,.L3 ldr r2, [r3, #0] mov r3, #65 strb r3, [r2, #8] ldmfd sp, {fp, sp, pc}.L4:.align 2.L3:.word ptest

29 29 Example #11: Passing Arguments int tmp; void test(int a, int b, int c, int d, int *e); int main() { int a, b, c, d, e; a = 3; b = 4; c = 5; d = 6; e = 7; test(a, b, c, d, &e); } /* end main() */ void test(int a, int b, int c, int d, int *e) { tmp = a; a = b; b = tmp; c = b; b = d; *e = d; } /* end test() */

30 30 Example #11: Assembly Listing 1 main: mov ip, sp stmfd sp!, {fp, ip, lr, pc} sub fp, ip, #4 sub sp, sp, #24 mov r3, #3 str r3, [fp, #-28] mov r3, #4 str r3, [fp, #-24] mov r3, #5 str r3, [fp, #-20] mov r3, #6 str r3, [fp, #-16] mov r3, #7 str r3, [fp, #-32] sub r3, fp, #32 str r3, [sp, #0] ldr r0, [fp, #-28] ldr r1, [fp, #-24] ldr r2, [fp, #-20] ldr r3, [fp, #-16] bl test sub sp, fp, #12 ldmfd sp, {fp, sp, pc}

31 31 Example #11: Assembly Listing 2 test: mov ip, sp stmfd sp!, {fp, ip, lr, pc} sub fp, ip, #4 sub sp, sp, #16 str r0, [fp, #-16] str r1, [fp, #-20] str r2, [fp, #-24] str r3, [fp, #-28] ldr r2,.L5 ldr r3, [fp, #-16] str r3, [r2, #0] ldr r3, [fp, #-20] str r3, [fp, #-16] ldr r3,.L5 ldr r3, [r3, #0] str r3, [fp, #-20] ldr r3, [fp, #-20] str r3, [fp, #-24] ldr r3, [fp, #-28] str r3, [fp, #-20] ldr r2, [fp, #4] ldr r3, [fp, #-28] str r3, [r2, #0] sub sp, fp, #12 ldmfd sp, {fp, sp, pc}.L6:.align 2.L5:.word tmp

32 32 Interfacing C and Assembly ARM has developed a standard called the “ARM Procedure Call Standard” (APCS) which defines: constraints on the use of registers stack conventions format of a stack backtrace data structure argument passing and result return support for ARM shared library mechanism Compiler­generated code conforms to the APCS It's just a standard ­ not an architectural requirement Cannot avoid standard when interfacing C and assembly code Can avoid standard when just writing assembly code or when writing assembly code that isn't called by C code

33 33 Register Names and Use Register #APCS NameAPCS Role R0 a1 argument 1 R1 a2 argument 2 R2 a3 argument 3 R3 a4 argument 4 R4..R8 v1..v5 register variables R9 sb/v6 static base/register variable R10 sl/v7 stack limit/register variable R11 fp frame pointer R12 ip scratch reg/new­sb in inter­link­unit calls R13 sp low end of current stack frame R14 lr link address/scratch register R15 pc program counter

34 34 How Does STM Work on Memory ? STM sp!, {r0­r15} The ARM processor uses a bit-vector to represent each register to be saved The architecture places the lowest number register into the lowest address Default STM == STMDB == STMFD pc lr sp SP before address 0x90 0x8c 0x88 0x84 0x80 0x7c 0x78 0x74 0x70 0x6c 0x68 0x64 0x60 0x5c 0x58 0x54 0x50 ip fp v7 v6 v5 v4 v3 v2 v1 a4 a3 a2 a1 SP after

35 35 Passing and Returning Structures Structures are usually passed in registers (and overflow onto the stack when necessary) When a function returns a struct, a pointer to where the struct result is to be placed is passed in a1 (first argument) Example struct s f(int x); ­­ is compiled as ­­ void f(struct s *result, int x);

36 36 Example #12: Passing Structures typedef struct two_ch_struct{ char ch1; char ch2; } two_ch; two_ch max(two_ch a, two_ch b){ return((a.ch1 > b.ch1)?a:b); } /* end max() */ max: mov ip, sp stmfd sp!, {fp, ip, lr, pc} sub fp, ip, #4 sub sp, sp, #12 str r0, [fp, #-24] str r1, [fp, #-16] str r2, [fp, #-20] ldrb r2, [fp, #-16] ldrb r3, [fp, #-20] cmp r2, r3 bls.L2 ldr r3, [fp, #-16] ldr r2, [fp, #-24] str r3, [r2, #0] b.L1.L2: ldr r3, [fp, #-20] ldr r2, [fp, #-24] str r3, [r2, #0].L1: ldr r0, [fp, #-24] sub sp, fp, #12 ldmfd sp, {fp, sp, pc}

37 37 The Frame Pointer Frame pointer (fp) points to the top of stack for function By using the frame pointer and storing it at the same offset for every function call, it creates a singly­linked list of activation records foo: mov ip,sp stmfd sp!,{a1­a3,fp,ip,lr,pc} sub fp,ip,#4 sub fp,fp,#12 ldmfd fp,{fp,sp,pc} pc lr ip fp address 0x90 0x8c 0x88 0x84 0x80 0x7c 0x78 0x74 0x70 fp a3 a2 a1 ipsp

38 38 Backtrace The fp register points to the stack backtrace structure for the currently executing function. The saved fp value is (zero or) a pointer to a stack backtrace structure created by the function which called the current function. The saved fp value in this structure is a pointer to the stack backtrace structure for the function that called the function that called the current function; and so on back until the first function.

39 39 Creating the “Backtrace” Structure MOV ip, sp STMFD sp!,{a1­a4,v1­v7,fp,ip,sp,lr,pc} SUB fp, ip, #4 … sub fp, fp, #16 LDMFD fp, {fp,sp,sb,pc} SP before address 0x90 0x8c 0x88 0x84 0x80 0x7c 0x78 0x74 0x70 0x6c 0x68 0x64 0x60 0x5c 0x58 0x54 0x50 SP current FP after (saved) pc (saved) lr (saved) sp (saved) ip (saved) fp v7 v6 v5 v4 v3 v2 v1 a4 a3 a2 a1 IP current

40 40 Example Backtrace (saved) pc (saved) lr ( saved) sp (saved) ip (saved) fp v7 v6 v5 v4 v3 v2 v1 a4 a3 a2 a1 (saved) pc (saved) lr ( saved) sp (saved) ip (saved) fp v7 v6 v5 v4 v3 v2 v1 a4 a3 a2 a1 (saved) pc (saved) lr ( saved) sp (saved) ip (saved) fp v7 v6 v5 v4 v3 v2 v1 a4 a3 a2 a1 fp bar ’s frame foo ’s frame main ’s frame

41 41 Exercise #1 Write an assembly subroutine that implements the quicksort algorithm to sort a list of unsigned integer values. The first entry in the list is the list’s length. void quickSort(unsigned int *list); InputOutput list:0x x xA356A1010x xE235C2030x x7A35B310 0x xA356A101 0x xE235C203

42 42 Exercise #2 Write an assembly subroutine that deletes an item from an ordered list of unsigned values if it is not already there. The first entry in the list is the list’s length. void removeItem(unsigned int item, unsigned int *list); InputOutput item:0x7A35B310 list:0x x x x x7A35B3100xA356A101 0xA356A1010xE235C203 0xE235C203

43 43 Outline Assembly Programming Assembly-C Interface Peephole Optimization

44 44 Peephole Optimization Final pass over generated code: Examine a few consecutive instructions: 2 to 4 See if an obvious replacement is possible: store/load pairs MOV %eax => mema MOV mema => %eax Can eliminate the second instruction without needing any global knowledge of mema Use algebraic identities Special-case individual instructions

45 45 Algebraic Identities Worth recognizing single instructions with a constant operand: A * 2 = A + A A * 1 = A A * 0 = 0 A / 1 = A More delicate with floating-point

46 46 Is this ever helpful? Why would anyone write X * 1? Why bother to correct such obvious junk code? In fact one might write #define MAX_TASKS 1... a = b * MAX_TASKS; Also, seemingly redundant code can be produced by other optimizations. This is an important effect.

47 47 Replace Multiply by Shift A := A * 4; Can be replaced by 2-bit left shift (signed/unsigned) But must worry about overflow if language does A := A / 4; If unsigned, can replace with shift right But shift right arithmetic is a well-known problem Language may allow it anyway (traditional C)

48 48 Addition Chains for Multiplication If multiply is very slow (or on a machine with no multiply instruction like the original SPARC), decomposing a constant operand into sum of powers of two can be effective: X * 125 = x * 128 – x * 4 + x Two shifts, one subtract and one add, which may be faster than one multiply Note similarity with efficient exponentiation method

49 49 The Right Shift Problem Arithmetic Right shift: Shift right and use sign bit to fill most significant bits SAR Which is -3, not -2 In most languages -5/2 = -2 Prior to C99, implementations were allowed to truncate towards or away from zero if either operand was negative

50 50 Folding Jumps to Jumps A jump to an unconditional jump can copy the target address JNE lab1... lab1JMP lab2 Can be replaced by JNE lab2 As a result, lab1 may become dead (unreferenced)

51 51 Jump to Return A jump to a return can be replaced by a return JMP lab1... lab1RET Can be replaced by RET lab1 may become dead code

52 52 Tail Recursion Elimination 1 A subprogram is tail-recursive if the last computation is a call to itself: function last (lis : list_type) return lis_type is begin if lis.next = null then return lis; else return last (lis.next); end; Recursive call can be replaced with lis := lis.next; goto start; -- added label

53 53 Tail Recursion Elimination 2 Saves time: an assignment and jump is faster than a call with one parameter Saves stack space: converts linear stack usage to constant usage. In languages with no loops, this may be a required optimization: specified in Scheme standard.

54 54 Tail Recursion Elimination 3 Consider the sequence on the x86: CALL func RET CALL pushes return point on stack, RET in body of func removes it, RET in caller returns Can generate instead: JMP func Now RET in func returns to original caller, because single return address on stack

55 55 The REALIA COBOL Compiler 1 Full compiler for Standard COBOL, targeted to the IBM PC. Now distributed by Computer Associates Runs in 150K bytes, but must be able to handle very large programs that run on mainframes

56 56 The REALIA COBOL Compiler 2 No global optimization possible: multiple linear passes over code, no global data structures, no flow graph. Multiple peephole optimizations, compiler iterates until code is stable. Each pass scan code backwards to minimize address recomputations

57 57 Typical COBOL Code Process-Balance. if Balance is negative then perform Send-Bill else perform Record-Credit end-if. Send-Bill.... Record-Credit....

58 58 Simple Assembly Pb:cmpbalance, 0 jnlL1 callSb jmpL2-- jump to return L1:callRc L2:ret Sb:… ret Rc:… ret

59 59 Fold Jump to Return Statement Pb:cmpbalance, 0 jnlL1 callSb-- tail recursion ret-- folded L1:callRc-- tail recursion L2:ret Sb:… ret Rc:… ret

60 60 Eliminate Tail Recursion Pb:cmpbalance, 0 jnlL1-- jump to unconditional jump impSb ret L1:jmpRc-- will become useless L2:ret Sb:… ret Rc:… ret

61 61 Corresponding Assembly Pb:cmpbalance, 0 jnlRc-- folded jmpSb ret-- unreachable L1:jmpRc-- unreachable L2:ret-- unreachable Sb:… ret Rc:… ret

62 62 Remove Dead Code Pb:cmpbalance, 0 jnlRc jmpSb-- jump to next instruction Sb:… ret Rc:… ret

63 63 Final Code Pb:cmpbalance, 0 jnlRc Sb:… ret Rc:… ret Final code as efficient as inlining. All transformations are local. Each optimization may yield further optimization opportunities. Iterate till no further change.

64 64 Arcane Tricks Consider typical maximum computation if A >= B then C := A; else C := B; end if; For simplicity assume all unsigned, and all in registers

65 65 Eliminating Max Jump on x86 Simple-minded assembly code CMPA, B JNAEL1 MOVA=>C JMPL2 L1:MOVB=>C L2: One jump in either case

66 66 Computing Max without Jumps Architecture-specific trick: use subtract with borrow instruction and carry flag CMPA, B; CF=1 if B > A, CF = 0 if A >= B SBB%eax, %eax; all 1's if B > A, all 0's if A >= B MOV%eax, C NOTC; all 0's if B > A, all 1's if A >= B ANDB=>%eax; B if B>A, 0 if A>=B ANDA=>C; 0 if B >A, A if A>=B OR%eax=>C; B if B>A, A if A>=B More instructions, but NO JUMPS Supercompiler: exhaustive search of instruction patterns to uncover similar tricks


Download ppt "嵌入式處理器架構與程式設計 王建民 中央研究院 資訊所 2008 年 7 月. 2 Contents Introduction Computer Architecture ARM Architecture Development Tools  GNU Development Tools ARM Instruction."

Similar presentations


Ads by Google