Presentation is loading. Please wait.

Presentation is loading. Please wait.

Instructor: Erol Sahin

Similar presentations


Presentation on theme: "Instructor: Erol Sahin"— Presentation transcript:

1 Instructor: Erol Sahin
Machine Programming – Branching CENG331: Introduction to Computer Systems 6th Lecture Instructor: Erol Sahin Acknowledgement: Most of the slides are adapted from the ones prepared by R.E. Bryant, D.R. O’Hallaron of Carnegie-Mellon Univ.

2 Conditional Branch Example
absdiff: pushl %ebp movl %esp, %ebp movl 8(%ebp), %edx movl 12(%ebp), %eax cmpl %eax, %edx jle .L7 subl %eax, %edx movl %edx, %eax .L8: leave ret .L7: subl %edx, %eax jmp .L8 int absdiff(int x, int y) { int result; if (x > y) { result = x-y; } else { result = y-x; } return result; Setup Body1 Finish Body2

3 Conditional Branch Example (Cont.)
int goto_ad(int x, int y) { int result; if (x <= y) goto Else; result = x-y; Exit: return result; Else: result = y-x; goto Exit; } absdiff: pushl %ebp movl %esp, %ebp movl 8(%ebp), %edx movl 12(%ebp), %eax cmpl %eax, %edx jle .L7 subl %eax, %edx movl %edx, %eax .L8: leave ret .L7: subl %edx, %eax jmp .L8 C allows “goto” as means of transferring control Closer to machine-level programming style Generally considered bad coding style

4 Conditional Branch Example (Cont.)
int goto_ad(int x, int y) { int result; if (x <= y) goto Else; result = x-y; Exit: return result; Else: result = y-x; goto Exit; } absdiff: pushl %ebp movl %esp, %ebp movl 8(%ebp), %edx movl 12(%ebp), %eax cmpl %eax, %edx jle .L7 subl %eax, %edx movl %edx, %eax .L8: leave ret .L7: subl %edx, %eax jmp .L8

5 Conditional Branch Example (Cont.)
int goto_ad(int x, int y) { int result; if (x <= y) goto Else; result = x-y; Exit: return result; Else: result = y-x; goto Exit; } absdiff: pushl %ebp movl %esp, %ebp movl 8(%ebp), %edx movl 12(%ebp), %eax cmpl %eax, %edx jle .L7 subl %eax, %edx movl %edx, %eax .L8: leave ret .L7: subl %edx, %eax jmp .L8

6 Conditional Branch Example (Cont.)
int goto_ad(int x, int y) { int result; if (x <= y) goto Else; result = x-y; Exit: return result; Else: result = y-x; goto Exit; } absdiff: pushl %ebp movl %esp, %ebp movl 8(%ebp), %edx movl 12(%ebp), %eax cmpl %eax, %edx jle .L7 subl %eax, %edx movl %edx, %eax .L8: leave ret .L7: subl %edx, %eax jmp .L8

7 Conditional Branch Example (Cont.)
int goto_ad(int x, int y) { int result; if (x <= y) goto Else; result = x-y; Exit: return result; Else: result = y-x; goto Exit; } absdiff: pushl %ebp movl %esp, %ebp movl 8(%ebp), %edx movl 12(%ebp), %eax cmpl %eax, %edx jle .L7 subl %eax, %edx movl %edx, %eax .L8: leave ret .L7: subl %edx, %eax jmp .L8

8 General Conditional Expression Translation
C Code val = Test ? Then-Expr : Else-Expr; val = x>y ? x-y : y-x; Test is expression returning integer = 0 interpreted as false 0 interpreted as true Create separate code regions for then & else expressions Execute appropriate one Goto Version nt = !Test; if (nt) goto Else; val = Then-Expr; Done: . . . Else: val = Else-Expr; goto Done;

9 Conditionals: x86-64 Will disappear Blackboard? int absdiff(
int x, int y) { int result; if (x > y) { result = x-y; } else { result = y-x; } return result; absdiff: # x in %edi, y in %esi movl %edi, %eax # eax = x movl %esi, %edx # edx = y subl %esi, %eax # eax = x-y subl %edi, %edx # edx = y-x cmpl %esi, %edi # x:y cmovle %edx, %eax # eax=edx if <= ret Will disappear Blackboard?

10 Conditionals: x86-64 Conditional move instruction cmovC src, dest
int absdiff( int x, int y) { int result; if (x > y) { result = x-y; } else { result = y-x; } return result; absdiff: # x in %edi, y in %esi movl %edi, %eax # eax = x movl %esi, %edx # edx = y subl %esi, %eax # eax = x-y subl %edi, %edx # edx = y-x cmpl %esi, %edi # x:y cmovle %edx, %eax # eax=edx if <= ret Conditional move instruction cmovC src, dest Move value from src to dest if condition C holds More efficient than conditional branching (simple control flow) But overhead: both branches are evaluated

11 General Form with Conditional Move
C Code val = Test ? Then-Expr : Else-Expr; Conditional Move Version val1 = Then-Expr; val2 = Else-Expr; val1 = val2 if !Test; Both values get computed Overwrite then-value with else-value if condition doesn’t hold Don’t use when: Then or else expression have side effects Then and else expression are to expensive

12 “Do-While” Loop Example
C Code Goto Version int fact_do(int x) { int result = 1; do { result *= x; x = x-1; } while (x > 1); return result; } int fact_goto(int x) { int result = 1; loop: result *= x; x = x-1; if (x > 1) goto loop; return result; } Use backward branch to continue looping Only take branch when “while” condition holds

13 “Do-While” Loop Compilation
Registers: %edx x %eax result Goto Version Assembly int fact_goto(int x) { int result = 1; loop: result *= x; x = x-1; if (x > 1) goto loop; return result; } fact_goto: pushl %ebp # Setup movl %esp,%ebp # Setup movl $1,%eax # eax = 1 movl 8(%ebp),%edx # edx = x .L11: imull %edx,%eax # result *= x decl %edx # x-- cmpl $1,%edx # Compare x : 1 jg .L11 # if > goto loop movl %ebp,%esp # Finish popl %ebp # Finish ret # Finish Will disappear Blackboard?

14 “Do-While” Loop Compilation
Registers: %edx x %eax result Goto Version Assembly int fact_goto(int x) { int result = 1; loop: result *= x; x = x-1; if (x > 1) goto loop; return result; } fact_goto: pushl %ebp # Setup movl %esp,%ebp # Setup movl $1,%eax # eax = 1 movl 8(%ebp),%edx # edx = x .L11: imull %edx,%eax # result *= x decl %edx # x-- cmpl $1,%edx # Compare x : 1 jg .L11 # if > goto loop movl %ebp,%esp # Finish popl %ebp # Finish ret # Finish

15 General “Do-While” Translation
C Code Goto Version do Body while (Test); loop: Body if (Test) goto loop Body: Test returns integer = 0 interpreted as false 0 interpreted as true { Statement1; Statement2; Statementn; }

16 “While” Loop Example C Code Goto Version #1
int fact_while(int x) { int result = 1; while (x > 1) { result *= x; x = x-1; }; return result; } int fact_while_goto(int x) { int result = 1; loop: if (!(x > 1)) goto done; result *= x; x = x-1; goto loop; done: return result; } Is this code equivalent to the do-while version? Must jump out of loop if test fails

17 Alternative “While” Loop Translation
C Code Goto Version #2 int fact_while(int x) { int result = 1; while (x > 1) { result *= x; x = x-1; }; return result; } int fact_while_goto2(int x) { int result = 1; if (!(x > 1)) goto done; loop: result *= x; x = x-1; if (x > 1) goto loop; done: return result; } Historically used by GCC Uses same inner loop as do-while version Guards loop entry with extra test

18 General “While” Translation
While version while (Test) Body Do-While Version Goto Version if (!Test) goto done; do Body while(Test); done: if (!Test) goto done; loop: Body if (Test) goto loop; done:

19 New Style “While” Loop Translation
C Code Goto Version int fact_while(int x) { int result = 1; while (x > 1) { result *= x; x = x-1; }; return result; } int fact_while_goto3(int x) { int result = 1; goto middle; loop: result *= x; x = x-1; middle: if (x > 1) goto loop; return result; } Recent technique for GCC Both IA32 & x86-64 First iteration jumps over body computation within loop

20 Jump-to-Middle While Translation
C Code Avoids duplicating test code Unconditional goto incurs no performance penalty for loops compiled in similar fashion while (Test) Body Goto Version Goto (Previous) Version goto middle; loop: Body middle: if (Test) goto loop; if (!Test) goto done; loop: Body if (Test) goto loop; done:

21 Jump-to-Middle Example
int fact_while(int x) { int result = 1; while (x > 1) { result *= x; x--; }; return result; } # x in %edx, result in %eax jmp .L # goto Middle .L35: # Loop: imull %edx, %eax # result *= x decl %edx # x-- .L34: # Middle: cmpl $1, %edx # x:1 jg .L # if >, goto Loop

22 “For” Loop Example: Square-and-Multiply
/* Compute x raised to nonnegative power p */ int ipwr_for(int x, unsigned p) { int result; for (result = 1; p != 0; p = p>>1) { if (p & 0x1) result *= x; x = x*x; } return result; Algorithm Exploit bit representation: p = p0 + 2p1 + 22p2 + … 2n–1pn–1 Gives: xp = z0 · z1 2 · (z2 2) 2 · … · (…((zn –12) 2 )…) 2 zi = 1 when pi = 0 zi = x when pi = 1 Complexity O(log p) Example 310 = 32 * 38 = 32 * ((32)2)2 n–1 times

23 ipwr Computation before iteration result x=3 p=10
/* Compute x raised to nonnegative power p */ int ipwr_for(int x, unsigned p) { int result; for (result = 1; p != 0; p = p>>1) { if (p & 0x1) result *= x; x = x*x; } return result; before iteration result x=3 p=10 1 3 10=10102 2 9 5= 1012 81 2= 102 4 6561 1= 12 5 59049

24 “For” Loop Example int result;
for (result = 1; p != 0; p = p>>1) { if (p & 0x1) result *= x; x = x*x; } General Form for (Init; Test; Update) Body Test Init Update Body p != 0 result = 1 p = p >> 1 { if (p & 0x1) result *= x; x = x*x; }

25 “For” “While” “Do-While”
For Version While Version for (Init; Test; Update ) Body Init; while (Test ) { Body Update ; } Goto Version Init; if (!Test) goto done; loop: Body Update ; if (Test) goto loop; done: Do-While Version Init; if (!Test) goto done; do { Body Update ; } while (Test) done:

26 For-Loop: Compilation #1
For Version for (result = 1; p != 0; p = p>>1) { if (p & 0x1) result *= x; x = x*x; } for (Init; Test; Update ) Body Goto Version Init; if (!Test) goto done; loop: Body Update ; if (Test) goto loop; done: result = 1; if (p == 0) goto done; loop: if (p & 0x1) result *= x; x = x*x; p = p >> 1; if (p != 0) goto loop; done:

27 “For” “While” (Jump-to-Middle)
For Version for (Init; Test; Update ) Body Goto Version Init; goto middle; loop: Body Update ; middle: if (Test) goto loop; done: While Version Init; while (Test ) { Body Update ; }

28 For-Loop: Compilation #2
for (result = 1; p != 0; p = p>>1) { if (p & 0x1) result *= x; x = x*x; } For Version for (Init; Test; Update ) Body Goto Version result = 1; goto middle; loop: if (p & 0x1) result *= x; x = x*x; p = p >> 1; middle: if (p != 0) goto loop; done: Init; goto middle; loop: Body Update ; middle: if (Test) goto loop; done:

29 Implementing Loops IA32 x86-64 Why the difference
All loops translated into form based on “do-while” x86-64 Also make use of “jump to middle” Why the difference IA32 compiler developed for machine where all operations costly x86-64 compiler developed for machine where unconditional branches incur (almost) no overhead

30 Switch Statement Example
long switch_eg (long x, long y, long z) { long w = 1; switch(x) { case 1: w = y*z; break; case 2: w = y/z; /* Fall Through */ case 3: w += z; case 5: case 6: w -= z; default: w = 2; } return w; Switch Statement Example Multiple case labels Here: 5, 6 Fall through cases Here: 2 Missing cases Here: 4

31 Jump Table Structure Jump Targets Switch Form Jump Table Code Block
Targ0: switch(x) { case val_0: Block 0 case val_1: Block 1 • • • case val_n-1: Block n–1 } jtab: Targ0 Targ1 Targ2 Code Block 1 Targ1: Code Block 2 Targ2: Targn-1 Approximate Translation target = JTab[x]; goto *target; Code Block n–1 Targn-1:

32 Switch Statement Example (IA32)
long switch_eg(long x, long y, long z) { long w = 1; switch(x) { . . . } return w; Setup: switch_eg: pushl %ebp # Setup movl %esp, %ebp # Setup pushl %ebx # Setup movl $1, %ebx # w = 1 movl 8(%ebp), %edx # edx = x movl 16(%ebp), %ecx # ecx = z cmpl $6, %edx # x:6 ja .L61 # if > goto default jmp *.L62(,%edx,4) # goto JTab[x] Will disappear Blackboard?

33 Switch Statement Example (IA32)
long switch_eg(long x, long y, long z) { long w = 1; switch(x) { . . . } return w; Jump table .section .rodata .align 4 .L62: .long .L61 # x = 0 .long .L56 # x = 1 .long .L57 # x = 2 .long .L58 # x = 3 .long .L61 # x = 4 .long .L60 # x = 5 .long .L60 # x = 6 Setup: switch_eg: pushl %ebp # Setup movl %esp, %ebp # Setup pushl %ebx # Setup movl $1, %ebx # w = 1 movl 8(%ebp), %edx # edx = x movl 16(%ebp), %ecx # ecx = z cmpl $6, %edx # x:6 ja .L61 # if > goto default jmp *.L62(,%edx,4) # goto JTab[x] Indirect jump

34 Assembly Setup Explanation
Table Structure Each target requires 4 bytes Base address at .L62 Jumping Direct: jmp .L61 Jump target is denoted by label .L61 Indirect: jmp *.L62(,%edx,4) Start of jump table: .L62 Must scale by factor of 4 (labels have 32-bit = 4 Bytes on IA32) Fetch target from effective Address .L61 + edx*4 Only for 0  x  6 Jump table .section .rodata .align 4 .L62: .long .L61 # x = 0 .long .L56 # x = 1 .long .L57 # x = 2 .long .L58 # x = 3 .long .L61 # x = 4 .long .L60 # x = 5 .long .L60 # x = 6

35 Jump Table Jump table .section .rodata .align 4 .L62:
.long .L61 # x = 0 .long .L56 # x = 1 .long .L57 # x = 2 .long .L58 # x = 3 .long .L61 # x = 4 .long .L60 # x = 5 .long .L60 # x = 6 switch(x) { case 1: // .L56 w = y*z; break; case 2: // .L57 w = y/z; /* Fall Through */ case 3: // .L58 w += z; case 5: case 6: // .L60 w -= z; default: // .L61 w = 2; }

36 Code Blocks (Partial) switch(x) { . . . case 2: // .L57 w = y/z;
/* Fall Through */ case 3: // .L58 w += z; break; default: // .L61 w = 2; } .L61: // Default case movl $2, %ebx # w = 2 movl %ebx, %eax # Return w popl %ebx leave ret .L57: // Case 2: movl 12(%ebp), %eax # y cltd # Div prep idivl %ecx # y/z movl %eax, %ebx # w = y/z # Fall through .L58: // Case 3: addl %ecx, %ebx # w+= z movl %ebx, %eax # Return w

37 Code Blocks (Rest) switch(x) { case 1: // .L56 w = y*z; break; . . .
w -= z; } .L60: // Cases 5&6: subl %ecx, %ebx # w –= z movl %ebx, %eax # Return w popl %ebx leave ret .L56: // Case 1: movl 12(%ebp), %ebx # w = y imull %ecx, %ebx # w*= z

38 x86-64 Switch Implementation
Same general idea, adapted to 64-bit code Table entries 64 bits (pointers) Cases use revised code .section .rodata .align 8 .L62: .quad .L55 # x = 0 .quad .L50 # x = 1 .quad .L51 # x = 2 .quad .L52 # x = 3 .quad .L55 # x = 4 .quad .L54 # x = 5 .quad .L54 # x = 6 Jump Table switch(x) { case 1: // .L50 w = y*z; break; . . . } .L50: // Case 1: movq %rsi, %r8 # w = y imulq %rdx, %r8 # w *= z movq %r8, %rax # Return w ret

39 IA32 Object Code Setup Assembly Code Disassembled Object Code
Label .L61 becomes address 0x Label .L62 becomes address 0x80488dc Assembly Code switch_eg: . . . ja .L61 # if > goto default jmp *.L62(,%edx,4) # goto JTab[x] Disassembled Object Code <switch_eg>: . . . : 77 0c ja : ff dc jmp *0x80488dc(,%edx,4)

40 IA32 Object Code (cont.) Jump Table
Doesn’t show up in disassembled code Can inspect using GDB gdb asm-cntl (gdb) x/7xw 0x80488dc Examine 7 hexadecimal format “words” (4-bytes each) Use command “help x” to get format documentation 0x80488dc: 0x 0x 0x a 0x 0x

41 Disassembled Targets 8048630: bb 02 00 00 00 mov $0x2,%ebx
: d mov %ebx,%eax : b pop %ebx : c leave : c ret 804863a: b 45 0c mov 0xc(%ebp),%eax 804863d: cltd 804863e: f7 f idiv %ecx : c mov %eax,%ebx : cb add %ecx,%ebx : d mov %ebx,%eax : b pop %ebx : c leave : c ret : cb sub %ecx,%ebx 804864b: d mov %ebx,%eax 804864d: b pop %ebx 804864e: c leave 804864f: c ret : b 5d 0c mov 0xc(%ebp),%ebx : f af d imul %ecx,%ebx : d mov %ebx,%eax : b pop %ebx : c leave 804865a: c ret

42 Matching Disassembled Targets
: bb mov : d mov : b pop : c leave : c ret 804863a: b 45 0c mov 804863d: cltd 804863e: f7 f idiv : c mov : cb add : d mov : b pop : c leave : c ret : cb sub 804864b: d mov 804864d: b pop 804864e: c leave 804864f: c ret : b 5d 0c mov : f af d imul : d mov : b pop : c leave 804865a: c ret 0x 0x 0x a 0x 0x

43 x86-64 Object Code Setup Assembly Code Disassembled Object Code
Label .L61 becomes address 0x Label .L62 becomes address 0x Assembly Code switch_eg: . . . ja .L55 # if > goto default jmp *.L56(,%rdi,8) # goto JTab[x] Disassembled Object Code <switch_eg>: . . . 40070d: ja 40070f: ff 24 fd jmpq *0x400990(,%rdi,8)

44 x86-64 Object Code (cont.) Jump Table Can inspect using GDB
gdb asm-cntl (gdb) x/7xg 0x400990 Examine 7 hexadecimal format “giant words” (8-bytes each) Use command “help x” to get format documentation 0x400990: 0x 0x 0x 0x b 0x

45 Sparse Switch Example Not practical to use jump table
Would require 1000 entries Obvious translation into if-then-else would have max. of 9 tests /* Return x/111 if x is multiple && <= otherwise */ int div111(int x) { switch(x) { case 0: return 0; case 111: return 1; case 222: return 2; case 333: return 3; case 444: return 4; case 555: return 5; case 666: return 6; case 777: return 7; case 888: return 8; case 999: return 9; default: return -1; }

46 Sparse Switch Code (IA32)
Compares x to possible case values Jumps different places depending on outcomes movl 8(%ebp),%eax # get x cmpl $444,%eax # x:444 je L8 jg L16 cmpl $111,%eax # x:111 je L5 jg L17 testl %eax,%eax # x:0 je L4 jmp L14 . . . . . . L5: movl $1,%eax jmp L19 L6: movl $2,%eax L7: movl $3,%eax L8: movl $4,%eax

47 Sparse Switch Code Structure
444 < > = 4 111 777 > < > < = = 1 7 222 555 888 > = = = = -1 2 5 8 333 666 999 = = = -1 3 -1 6 -1 9 Organizes cases as binary tree Logarithmic performance

48 Summarizing C Control Assembler Control Standard Techniques
if-then-else do-while while, for switch Assembler Control Conditional jump Conditional move Indirect jump Compiler Must generate assembly code to implement more complex control Standard Techniques IA32 loops converted to do-while form x86-64 loops use jump-to-middle Large switch statements use jump tables Sparse switch statements may use decision trees (not shown) Conditions in CISC CISC machines generally have condition code registers

49 Instructor: Erol Sahin
Machine Programming – Procedures and IA32 Stack CENG331: Introduction to Computer Systems 7th Lecture Instructor: Erol Sahin Acknowledgement: Most of the slides are adapted from the ones prepared by R.E. Bryant, D.R. O’Hallaron of Carnegie-Mellon Univ.

50 IA32 Stack Stack “Bottom”
Region of memory managed with stack discipline Grows toward lower addresses Register %esp contains lowest stack address = address of “top” element Increasing Addresses Stack Grows Down Stack Pointer: %esp Stack “Top”

51 IA32 Stack: Push Stack “Bottom” pushl Src Stack Pointer: %esp
Fetch operand at Src Decrement %esp by 4 Write operand at address given by %esp Increasing Addresses Stack Grows Down -4 Stack Pointer: %esp Stack “Top”

52 IA32 Stack: Pop Stack “Bottom” popl Dest Stack Pointer: %esp
Read operand at address %esp Increment %esp by 4 Write operand to Dest Increasing Addresses Stack Grows Down +4 Stack Pointer: %esp Stack “Top”

53 Procedure Control Flow
Use stack to support procedure call and return Procedure call: call label Push return address on stack Jump to label Return address: Address of instruction beyond call Example from disassembly 804854e: e8 3d call b90 <main> : pushl %eax Return address = 0x Procedure return: ret Pop address from stack Jump to address

54 Procedure Call Example
804854e: e8 3d call b90 <main> : pushl %eax call b90 0x110 0x110 0x10c 0x10c 0x108 123 0x108 123 0x104 0x %esp 0x108 %esp 0x104 0x108 %eip 0x804854e %eip 0x8048b90 0x804854e %eip: program counter

55 Procedure Return Example
: c ret ret 0x110 0x110 0x10c 0x10c 0x108 123 0x108 123 0x104 0x 0x %esp 0x104 %esp 0x108 0x104 %eip 0x %eip 0x 0x %eip: program counter

56 Stack-Based Languages
Languages that support recursion e.g., C, Pascal, Java Code must be “Reentrant” Multiple simultaneous instantiations of single procedure Need some place to store state of each instantiation Arguments Local variables Return pointer Stack discipline State for given procedure needed for limited time From when called to when return Callee returns before caller does Stack allocated in Frames state for single procedure instantiation

57 Call Chain Example Example Call Chain yoo(…) { • who(); } yoo who(…) {
• • • amI(); } who amI(…) { amI(); } amI amI amI amI Procedure amI is recursive

58 Stack Frames Previous Frame Contents Frame for proc Management
Local variables Return information Temporary space Management Space allocated when enter procedure “Set-up” code Deallocated when return “Finish” code Frame Pointer: %ebp Frame for proc Stack Pointer: %esp Stack “Top”

59 Example Stack yoo(…) { • who(); } yoo %ebp yoo who %esp amI amI amI

60 Example Stack who(…) { • • • amI(); } yoo yoo who %ebp who amI amI
%esp amI amI

61 Example Stack amI(…) { • amI(); } yoo yoo who who amI amI %ebp amI amI
%esp amI

62 Example Stack amI(…) { • amI(); } yoo yoo who who amI amI amI amI amI
%ebp amI %esp

63 Example Stack amI(…) { • amI(); } yoo yoo who who amI amI amI amI amI
%ebp amI %esp

64 Example Stack amI(…) { • amI(); } yoo yoo who who amI amI amI amI amI
%ebp amI %esp

65 Example Stack amI(…) { • amI(); } yoo yoo who who amI amI %ebp amI amI
%esp amI

66 Example Stack who(…) { • • • amI(); } yoo yoo who %ebp who amI amI
%esp amI amI

67 Example Stack amI(…) { • } yoo yoo who who amI amI %ebp amI amI %esp

68 Example Stack who(…) { • • • amI(); } yoo yoo who %ebp who amI amI
%esp amI amI

69 Example Stack yoo(…) { • who(); } yoo %ebp yoo who %esp amI amI amI

70 IA32/Linux Stack Frame Current Stack Frame (“Top” to Bottom)
“Argument build:” Parameters for function about to call Local variables If can’t keep in registers Saved register context Old frame pointer Caller Stack Frame Return address Pushed by call instruction Arguments for this call Caller Frame Arguments Frame pointer %ebp Return Addr Old %ebp Saved Registers + Local Variables Argument Build Stack pointer %esp

71 Calling swap from call_swap
Revisiting swap Calling swap from call_swap int zip1 = 15213; int zip2 = 91125; void call_swap() { swap(&zip1, &zip2); } call_swap: • • • pushl $zip2 # Global Var pushl $zip1 # Global Var call swap Resulting Stack void swap(int *xp, int *yp) { int t0 = *xp; int t1 = *yp; *xp = t1; *yp = t0; } &zip2 &zip1 Rtn adr %esp

72 Revisiting swap Do on blackboard? swap: pushl %ebp movl %esp,%ebp
pushl %ebx movl 12(%ebp),%ecx movl 8(%ebp),%edx movl (%ecx),%eax movl (%edx),%ebx movl %eax,(%edx) movl %ebx,(%ecx) movl -4(%ebp),%ebx movl %ebp,%esp popl %ebp ret Set Up void swap(int *xp, int *yp) { int t0 = *xp; int t1 = *yp; *xp = t1; *yp = t0; } Body Finish Do on blackboard?

73 swap Setup #1 Entering Stack Resulting Stack • %ebp • %ebp &zip2 yp
xp Rtn adr %esp Rtn adr Old %ebp %esp swap: pushl %ebp movl %esp,%ebp pushl %ebx

74 swap Setup #1 Entering Stack • %ebp • %ebp &zip2 yp &zip1 xp Rtn adr
%esp Rtn adr Old %ebp %esp swap: pushl %ebp movl %esp,%ebp pushl %ebx

75 swap Setup #1 Entering Stack Resulting Stack • %ebp • &zip2 yp &zip1
xp Rtn adr %esp Rtn adr Old %ebp %ebp %esp swap: pushl %ebp movl %esp,%ebp pushl %ebx

76 swap Setup #1 Entering Stack • %ebp • &zip2 yp &zip1 xp Rtn adr %esp
Old %ebp %ebp %esp swap: pushl %ebp movl %esp,%ebp pushl %ebx

77 swap Setup #1 12 8 4 Entering Stack Resulting Stack • %ebp •
Offset relative to %ebp &zip2 12 yp &zip1 8 xp Rtn adr 4 %esp Rtn adr Old %ebp %ebp Old %ebx %esp movl 12(%ebp),%ecx # get yp movl 8(%ebp),%edx # get xp . . .

78 swap Finish #1 swap’s Stack Resulting Stack
yp yp xp xp Rtn adr Rtn adr Old %ebp %ebp Old %ebp %ebp Old %ebx %esp Old %ebx %esp movl -4(%ebp),%ebx movl %ebp,%esp popl %ebp ret Observation: Saved and restored register %ebx

79 swap Finish #2 swap’s Stack • • yp yp xp xp Rtn adr Rtn adr Old %ebp
Old %ebx %esp Old %ebx %esp movl -4(%ebp),%ebx movl %ebp,%esp popl %ebp ret

80 swap Finish #2 swap’s Stack Resulting Stack • • yp yp xp xp Rtn adr
Old %ebp %ebp Old %ebp %ebp %esp Old %ebx %esp movl -4(%ebp),%ebx movl %ebp,%esp popl %ebp ret

81 swap Finish #2 swap’s Stack • • yp yp xp xp Rtn adr Rtn adr Old %ebp
%esp Old %ebx %esp movl -4(%ebp),%ebx movl %ebp,%esp popl %ebp ret

82 swap Finish #3 swap’s Stack Resulting Stack • • %ebp yp yp xp xp
Rtn adr Rtn adr %esp Old %ebp %ebp Old %ebx %esp movl -4(%ebp),%ebx movl %ebp,%esp popl %ebp ret

83 swap Finish #4 swap’s Stack • • %ebp yp yp xp xp Rtn adr Rtn adr %esp
Old %ebp %ebp Old %ebx %esp movl -4(%ebp),%ebx movl %ebp,%esp popl %ebp ret

84 swap Finish #4 Observation swap’s Stack Resulting Stack
%ebp yp yp xp xp %esp Rtn adr Old %ebp %ebp Old %ebx %esp movl -4(%ebp),%ebx movl %ebp,%esp popl %ebp ret Observation Saved & restored register %ebx Didn’t do so for %eax, %ecx, or %edx

85 Disassembled swap Calling Code 080483a4 <swap>:
80483a4: push %ebp 80483a5: 89 e mov %esp,%ebp 80483a7: push %ebx 80483a8: 8b mov 0x8(%ebp),%edx 80483ab: 8b 4d 0c mov 0xc(%ebp),%ecx 80483ae: 8b 1a mov (%edx),%ebx 80483b0: 8b mov (%ecx),%eax 80483b2: mov %eax,(%edx) 80483b4: mov %ebx,(%ecx) 80483b6: 5b pop %ebx 80483b7: c leave 80483b8: c ret Calling Code : e8 96 ff ff ff call 80483a4 <swap> 804840e: 8b 45 f mov 0xfffffff8(%ebp),%eax

86 Register Saving Conventions
When procedure yoo calls who: yoo is the caller who is the callee Can Register be used for temporary storage? Contents of register %edx overwritten by who yoo: • • • movl $15213, %edx call who addl %edx, %eax ret who: • • • movl 8(%ebp), %edx addl $91125, %edx ret

87 Register Saving Conventions
When procedure yoo calls who: yoo is the caller who is the callee Can register be used for temporary storage? Conventions “Caller Save” Caller saves temporary in its frame before calling “Callee Save” Callee saves temporary in its frame before using

88 IA32/Linux Register Usage
%eax, %edx, %ecx Caller saves prior to call if values are used later %eax also used to return integer value %ebx, %esi, %edi Callee saves if wants to use them %esp, %ebp special %eax Caller-Save Temporaries %edx %ecx %ebx Callee-Save Temporaries %esi %edi %esp Special %ebp

89 Recursive Factorial Registers %eax used without first saving
.globl rfact .type rfact: pushl %ebp movl %esp,%ebp pushl %ebx movl 8(%ebp),%ebx cmpl $1,%ebx jle .L78 leal -1(%ebx),%eax pushl %eax call rfact imull %ebx,%eax jmp .L79 .align 4 .L78: movl $1,%eax .L79: movl -4(%ebp),%ebx movl %ebp,%esp popl %ebp ret Recursive Factorial int rfact(int x) { int rval; if (x <= 1) return 1; rval = rfact(x-1); return rval * x; } Registers %eax used without first saving %ebx used, but saved at beginning & restore at end

90 Pointer Code Recursive Procedure Top-Level Call
void s_helper (int x, int *accum) { if (x <= 1) return; else { int z = *accum * x; *accum = z; s_helper (x-1,accum); } int sfact(int x) { int val = 1; s_helper(x, &val); return val; } Pass pointer to update location

91 Creating & Initializing Pointer
int sfact(int x) { int val = 1; s_helper(x, &val); return val; } Variable val must be stored on stack Because: Need to create pointer to it Compute pointer as -4(%ebp) Push on stack as second argument 8 x 4 Rtn adr Initial part of sfact Old %ebp %ebp _sfact: pushl %ebp # Save %ebp movl %esp,%ebp # Set %ebp subl $16,%esp # Add 16 bytes movl 8(%ebp),%edx # edx = x movl $1,-4(%ebp) # val = 1 _sfact: pushl %ebp # Save %ebp movl %esp,%ebp # Set %ebp subl $16,%esp # Add 16 bytes movl 8(%ebp),%edx # edx = x movl $1,-4(%ebp) # val = 1 _sfact: pushl %ebp # Save %ebp movl %esp,%ebp # Set %ebp subl $16,%esp # Add 16 bytes movl 8(%ebp),%edx # edx = x movl $1,-4(%ebp) # val = 1 _sfact: pushl %ebp # Save %ebp movl %esp,%ebp # Set %ebp subl $16,%esp # Add 16 bytes movl 8(%ebp),%edx # edx = x movl $1,-4(%ebp) # val = 1 -4 val = 1 Unused Temp. Space %esp -8 -12 -16

92 Creating & Initializing Pointer
int sfact(int x) { int val = 1; s_helper(x, &val); return val; } Variable val must be stored on stack Because: Need to create pointer to it Compute pointer as -4(%ebp) Push on stack as second argument 8 x 4 Rtn adr Initial part of sfact Old %ebp %ebp _sfact: pushl %ebp # Save %ebp movl %esp,%ebp # Set %ebp subl $16,%esp # Add 16 bytes movl 8(%ebp),%edx # edx = x movl $1,-4(%ebp) # val = 1 _sfact: pushl %ebp # Save %ebp movl %esp,%ebp # Set %ebp subl $16,%esp # Add 16 bytes movl 8(%ebp),%edx # edx = x movl $1,-4(%ebp) # val = 1 _sfact: pushl %ebp # Save %ebp movl %esp,%ebp # Set %ebp subl $16,%esp # Add 16 bytes movl 8(%ebp),%edx # edx = x movl $1,-4(%ebp) # val = 1 _sfact: pushl %ebp # Save %ebp movl %esp,%ebp # Set %ebp subl $16,%esp # Add 16 bytes movl 8(%ebp),%edx # edx = x movl $1,-4(%ebp) # val = 1 -4 val = 1 Unused Temp. Space %esp -8 -12 -16

93 Passing Pointer Calling s_helper from sfact Stack at time of call
int sfact(int x) { int val = 1; s_helper(x, &val); return val; } 8 x 4 Rtn adr Old %ebp %ebp -4 val=x! val = 1 &val -8 Unused -12 -16 Calling s_helper from sfact leal -4(%ebp),%eax # Compute &val pushl %eax # Push on stack pushl %edx # Push x call s_helper # call movl -4(%ebp),%eax # Return val • • • # Finish leal -4(%ebp),%eax # Compute &val pushl %eax # Push on stack pushl %edx # Push x call s_helper # call movl -4(%ebp),%eax # Return val • • • # Finish leal -4(%ebp),%eax # Compute &val pushl %eax # Push on stack pushl %edx # Push x call s_helper # call movl -4(%ebp),%eax # Return val • • • # Finish %esp x

94 Passing Pointer Calling s_helper from sfact Stack at time of call
int sfact(int x) { int val = 1; s_helper(x, &val); return val; } 8 x 4 Rtn adr Old %ebp %ebp -4 val=x! val = 1 &val -8 Unused -12 -16 Calling s_helper from sfact leal -4(%ebp),%eax # Compute &val pushl %eax # Push on stack pushl %edx # Push x call s_helper # call movl -4(%ebp),%eax # Return val • • • # Finish leal -4(%ebp),%eax # Compute &val pushl %eax # Push on stack pushl %edx # Push x call s_helper # call movl -4(%ebp),%eax # Return val • • • # Finish leal -4(%ebp),%eax # Compute &val pushl %eax # Push on stack pushl %edx # Push x call s_helper # call movl -4(%ebp),%eax # Return val • • • # Finish %esp x

95 IA 32 Procedure Summary The Stack Makes Recursion Work
Private storage for each instance of procedure call Instantiations don’t clobber each other Addressing of locals + arguments can be relative to stack positions Managed by stack discipline Procedures return in inverse order of calls IA32 Procedures Combination of Instructions + Conventions Call / Ret instructions Register usage conventions Caller / Callee save %ebp and %esp Stack frame organization conventions Caller Frame Arguments Return Addr %ebp Old %ebp Saved Registers + Local Variables Argument Build %esp

96 Today Procedures (x86-64) Arrays Structures One-dimensional
Multi-dimensional (nested) Multi-level Structures

97 x86-64 Integer Registers %rax %r8 %rbx %r9 %rcx %r10 %rdx %r11 %rsi
%eax %r8d %rbx %r9 %ebx %r9d %rcx %r10 %ecx %r10d %rdx %r11 %edx %r11d %rsi %r12 %esi %r12d %rdi %r13 %edi %r13d %rsp %r14 %esp %r14d %rbp %r15 %ebp %r15d Twice the number of registers Accessible as 8, 16, 32, 64 bits

98 x86-64 Integer Registers %rax %r8 %rbx %r9 %rcx %r10 %rdx %r11 %rsi
Return value Argument #5 %rbx %r9 Callee saved Argument #6 %rcx %r10 Argument #4 Callee saved %rdx %r11 Argument #3 Used for linking %rsi %r12 Argument #2 C: Callee saved %rdi %r13 Argument #1 Callee saved %rsp %r14 Stack pointer Callee saved %rbp %r15 Callee saved Callee saved

99 x86-64 Registers Arguments passed to functions via registers
If more than 6 integral parameters, then pass rest on stack These registers can be used as caller-saved as well All references to stack frame via stack pointer Eliminates need to update %ebp/%rbp Other Registers 6+1 callee saved 2 or 3 have special uses

100 x86-64 Long Swap Operands passed in registers
movq (%rdi), %rdx movq (%rsi), %rax movq %rax, (%rdi) movq %rdx, (%rsi) ret void swap(long *xp, long *yp) { long t0 = *xp; long t1 = *yp; *xp = t1; *yp = t0; } Operands passed in registers First (xp) in %rdi, second (yp) in %rsi 64-bit pointers No stack operations required (except ret) Avoiding stack Can hold all local information in registers

101 x86-64 Locals in the Red Zone
swap_a: movq (%rdi), %rax movq %rax, -24(%rsp) movq (%rsi), %rax movq %rax, -16(%rsp) movq -16(%rsp), %rax movq %rax, (%rdi) movq -24(%rsp), %rax movq %rax, (%rsi) ret /* Swap, using local array */ void swap_a(long *xp, long *yp) { volatile long loc[2]; loc[0] = *xp; loc[1] = *yp; *xp = loc[1]; *yp = loc[0]; } Avoiding Stack Pointer Change Can hold all information within small window beyond stack pointer rtn Ptr %rsp −8 unused −16 loc[1] −24 loc[0]

102 x86-64 NonLeaf without Stack Frame
long scount = 0; /* Swap a[i] & a[i+1] */ void swap_ele_se (long a[], int i) { swap(&a[i], &a[i+1]); scount++; } No values held while swap being invoked No callee save registers needed swap_ele_se: movslq %esi,%rsi # Sign extend i leaq (%rdi,%rsi,8), %rdi # &a[i] leaq 8(%rdi), %rsi # &a[i+1] call swap # swap() incq scount(%rip) # scount++; ret

103 x86-64 Call using Jump Will disappear Blackboard? swap_ele:
long scount = 0; /* Swap a[i] & a[i+1] */ void swap_ele(long a[], int i) { swap(&a[i], &a[i+1]); } swap_ele: movslq %esi,%rsi # Sign extend i leaq (%rdi,%rsi,8), %rdi # &a[i] leaq 8(%rdi), %rsi # &a[i+1] jmp swap # swap() Will disappear Blackboard?

104 x86-64 Call using Jump When swap executes ret, it will return from swap_ele Possible since swap is a “tail call” (no instructions afterwards) long scount = 0; /* Swap a[i] & a[i+1] */ void swap_ele(long a[], int i) { swap(&a[i], &a[i+1]); } swap_ele: movslq %esi,%rsi # Sign extend i leaq (%rdi,%rsi,8), %rdi # &a[i] leaq 8(%rdi), %rsi # &a[i+1] jmp swap # swap()

105 x86-64 Stack Frame Example Blackboard?
swap_ele_su: movq %rbx, -16(%rsp) movslq %esi,%rbx movq %r12, -8(%rsp) movq %rdi, %r12 leaq (%rdi,%rbx,8), %rdi subq $16, %rsp leaq 8(%rdi), %rsi call swap movq (%r12,%rbx,8), %rax addq %rax, sum(%rip) movq (%rsp), %rbx movq 8(%rsp), %r12 addq $16, %rsp ret long sum = 0; /* Swap a[i] & a[i+1] */ void swap_ele_su (long a[], int i) { swap(&a[i], &a[i+1]); sum += a[i]; } Keeps values of a and i in callee save registers Must set up stack frame to save these registers Blackboard?

106 Understanding x86-64 Stack Frame
swap_ele_su: movq %rbx, -16(%rsp) # Save %rbx movslq %esi,%rbx # Extend & save i movq %r12, -8(%rsp) # Save %r12 movq %rdi, %r # Save a leaq (%rdi,%rbx,8), %rdi # &a[i] subq $16, %rsp # Allocate stack frame leaq 8(%rdi), %rsi # &a[i+1] call swap # swap() movq (%r12,%rbx,8), %rax # a[i] addq %rax, sum(%rip) # sum += a[i] movq (%rsp), %rbx # Restore %rbx movq 8(%rsp), %r # Restore %r12 addq $16, %rsp # Deallocate stack frame ret

107 Understanding x86-64 Stack Frame
swap_ele_su: movq %rbx, -16(%rsp) # Save %rbx movslq %esi,%rbx # Extend & save i movq %r12, -8(%rsp) # Save %r12 movq %rdi, %r # Save a leaq (%rdi,%rbx,8), %rdi # &a[i] subq $16, %rsp # Allocate stack frame leaq 8(%rdi), %rsi # &a[i+1] call swap # swap() movq (%r12,%rbx,8), %rax # a[i] addq %rax, sum(%rip) # sum += a[i] movq (%rsp), %rbx # Restore %rbx movq 8(%rsp), %r # Restore %r12 addq $16, %rsp # Deallocate stack frame ret rtn addr %r12 %rsp −8 %rbx −16 rtn addr %r12 %rsp +8 %rbx

108 Interesting Features of Stack Frame
Allocate entire frame at once All stack accesses can be relative to %rsp Do by decrementing stack pointer Can delay allocation, since safe to temporarily use red zone Simple deallocation Increment stack pointer No base/frame pointer needed

109 x86-64 Procedure Summary Heavy use of registers Minimal use of stack
Parameter passing More temporaries since more registers Minimal use of stack Sometimes none Allocate/deallocate entire block Many tricky optimizations What kind of stack frame to use Calling with jump Various allocation techniques

110 Today Procedures (x86-64) Arrays Structures One-dimensional
Multi-dimensional (nested) Multi-level Structures

111 Basic Data Types Integral Floating Point
Stored & operated on in general (integer) registers Signed vs. unsigned depends on instructions used Intel GAS Bytes C byte b 1 [unsigned] char word w 2 [unsigned] short double word l 4 [unsigned] int quad word q 8 [unsigned] long int (x86-64) Floating Point Stored & operated on in floating point registers Single s 4 float Double l 8 double Extended t 10/12/16 long double

112 Array Allocation Basic Principle T A[L];
Array of data type T and length L Contiguously allocated region of L * sizeof(T) bytes char string[12]; x x + 12 int val[5]; x x + 4 x + 8 x + 12 x + 16 x + 20 double a[3]; x + 24 x x + 8 x + 16 char *p[3]; x x + 4 x + 8 x + 12 IA32 x x + 8 x + 16 x + 24 x86-64

113 Array Access Basic Principle Reference Type Value Will disappear
T A[L]; Array of data type T and length L Identifier A can be used as a pointer to array element 0: Type T* Reference Type Value val[4] int 3 val int * x val+1 int * x + 4 &val[2] int * x + 8 val[5] int ?? *(val+1) int 5 val + i int * x + 4 i int val[5]; 1 5 2 3 x x + 4 x + 8 x + 12 x + 16 x + 20 Will disappear Blackboard?

114 Array Access Basic Principle Reference Type Value T A[L];
Array of data type T and length L Identifier A can be used as a pointer to array element 0: Type T* Reference Type Value val[4] int 3 val int * x val+1 int * x + 4 &val[2] int * x + 8 val[5] int ?? *(val+1) int 5 val + i int * x + 4 i int val[5]; 1 5 2 3 x x + 4 x + 8 x + 12 x + 16 x + 20

115 Array Example Declaration “zip_dig cmu” equivalent to “int cmu[5]”
typedef int zip_dig[5]; zip_dig cmu = { 1, 5, 2, 1, 3 }; zip_dig mit = { 0, 2, 1, 3, 9 }; zip_dig ucb = { 9, 4, 7, 2, 0 }; zip_dig cmu; 1 5 2 3 16 20 24 28 32 36 zip_dig mit; 2 1 3 9 36 40 44 48 52 56 zip_dig ucb; 9 4 7 2 56 60 64 68 72 76 Declaration “zip_dig cmu” equivalent to “int cmu[5]” Example arrays were allocated in successive 20 byte blocks Not guaranteed to happen in general

116 Array Accessing Example
zip_dig cmu; 1 5 2 3 16 20 24 28 32 36 int get_digit (zip_dig z, int dig) { return z[dig]; } Register %edx contains starting address of array Register %eax contains array index Desired digit at 4*%eax + %edx Use memory reference (%edx,%eax,4) IA32 # %edx = z # %eax = dig movl (%edx,%eax,4),%eax # z[dig]

117 Referencing Examples Reference Address Value Guaranteed?
zip_dig cmu; 1 5 2 3 16 20 24 28 32 36 zip_dig mit; 2 1 3 9 36 40 44 48 52 56 zip_dig ucb; 9 4 7 2 56 60 64 68 72 76 Reference Address Value Guaranteed? mit[3] * 3 = 48 3 mit[5] * 5 = 56 9 mit[-1] *-1 = 32 3 cmu[15] *15 = 76 ?? Will disappear Blackboard?

118 Referencing Examples Reference Address Value Guaranteed? Yes No
zip_dig cmu; 1 5 2 3 16 20 24 28 32 36 zip_dig mit; 2 1 3 9 36 40 44 48 52 56 zip_dig mit; 9 4 7 2 56 60 64 68 72 76 Reference Address Value Guaranteed? mit[3] * 3 = 48 3 mit[5] * 5 = 56 9 mit[-1] *-1 = 32 3 cmu[15] *15 = 76 ?? No bound checking Out of range behavior implementation-dependent No guaranteed relative allocation of different arrays Yes No No No

119 Array Loop Example Original Transformed As generated by GCC
int zd2int(zip_dig z) { int i; int zi = 0; for (i = 0; i < 5; i++) { zi = 10 * zi + z[i]; } return zi; Original Transformed As generated by GCC Eliminate loop variable i Convert array code to pointer code Express in do-while form (no test at entrance) int zd2int(zip_dig z) { int zi = 0; int *zend = z + 4; do { zi = 10 * zi + *z; z++; } while (z <= zend); return zi; }

120 Array Loop Implementation (IA32)
int zd2int(zip_dig z) { int zi = 0; int *zend = z + 4; do { zi = 10 * zi + *z; z++; } while(z <= zend); return zi; } # %ecx = z xorl %eax,%eax # zi = 0 leal 16(%ecx),%ebx # zend = z+4 .L59: leal (%eax,%eax,4),%edx # 5*zi movl (%ecx),%eax # *z addl $4,%ecx # z++ leal (%eax,%edx,2),%eax # zi = *z + 2*(5*zi) cmpl %ebx,%ecx # z : zend jle .L59 # if <= goto loop Will disappear Blackboard?

121 Array Loop Implementation (IA32)
Registers %ecx z %eax zi %ebx zend Computations 10*zi + *z implemented as *z + 2*(zi+4*zi) z++ increments by 4 int zd2int(zip_dig z) { int zi = 0; int *zend = z + 4; do { zi = 10 * zi + *z; z++; } while(z <= zend); return zi; } int zd2int(zip_dig z) { int zi = 0; int *zend = z + 4; do { zi = 10 * zi + *z; z++; } while(z <= zend); return zi; } int zd2int(zip_dig z) { int zi = 0; int *zend = z + 4; do { zi = 10 * zi + *z; z++; } while(z <= zend); return zi; } int zd2int(zip_dig z) { int zi = 0; int *zend = z + 4; do { zi = 10 * zi + *z; z++; } while(z <= zend); return zi; } int zd2int(zip_dig z) { int zi = 0; int *zend = z + 4; do { zi = 10 * zi + *z; z++; } while(z <= zend); return zi; } # %ecx = z xorl %eax,%eax # zi = 0 leal 16(%ecx),%ebx # zend = z+4 .L59: leal (%eax,%eax,4),%edx # 5*zi movl (%ecx),%eax # *z addl $4,%ecx # z++ leal (%eax,%edx,2),%eax # zi = *z + 2*(5*zi) cmpl %ebx,%ecx # z : zend jle .L59 # if <= goto loop # %ecx = z xorl %eax,%eax # zi = 0 leal 16(%ecx),%ebx # zend = z+4 .L59: leal (%eax,%eax,4),%edx # 5*zi movl (%ecx),%eax # *z addl $4,%ecx # z++ leal (%eax,%edx,2),%eax # zi = *z + 2*(5*zi) cmpl %ebx,%ecx # z : zend jle .L59 # if <= goto loop # %ecx = z xorl %eax,%eax # zi = 0 leal 16(%ecx),%ebx # zend = z+4 .L59: leal (%eax,%eax,4),%edx # 5*zi movl (%ecx),%eax # *z addl $4,%ecx # z++ leal (%eax,%edx,2),%eax # zi = *z + 2*(5*zi) cmpl %ebx,%ecx # z : zend jle .L59 # if <= goto loop # %ecx = z xorl %eax,%eax # zi = 0 leal 16(%ecx),%ebx # zend = z+4 .L59: leal (%eax,%eax,4),%edx # 5*zi movl (%ecx),%eax # *z addl $4,%ecx # z++ leal (%eax,%edx,2),%eax # zi = *z + 2*(5*zi) cmpl %ebx,%ecx # z : zend jle .L59 # if <= goto loop # %ecx = z xorl %eax,%eax # zi = 0 leal 16(%ecx),%ebx # zend = z+4 .L59: leal (%eax,%eax,4),%edx # 5*zi movl (%ecx),%eax # *z addl $4,%ecx # z++ leal (%eax,%edx,2),%eax # zi = *z + 2*(5*zi) cmpl %ebx,%ecx # z : zend jle .L59 # if <= goto loop

122 Nested Array Example “zip_dig pgh[4]” equivalent to “int pgh[4][5]”
#define PCOUNT 4 zip_dig pgh[PCOUNT] = {{1, 5, 2, 0, 6}, {1, 5, 2, 1, 3 }, {1, 5, 2, 1, 7 }, {1, 5, 2, 2, 1 }}; 1 5 2 6 1 5 2 3 1 5 2 7 1 5 2 zip_dig pgh[4]; 76 96 116 136 156 “zip_dig pgh[4]” equivalent to “int pgh[4][5]” Variable pgh: array of 4 elements, allocated contiguously Each element is an array of 5 int’s, allocated contiguously “Row-Major” ordering of all elements guaranteed

123 Multidimensional (Nested) Arrays
Declaration T A[R][C]; 2D array of data type T R rows, C columns Type T element requires K bytes Array Size R * C * K bytes Arrangement Row-Major Ordering A[0][0] A[0][C-1] A[R-1][0] • • • A[R-1][C-1] int A[R][C]; • • • A [0] [C-1] [1] [R-1] •  •  • 4*R*C Bytes

124 Nested Array Row Access
Row Vectors A[i] is array of C elements Each element of type T requires K bytes Starting address A + i * (C * K) int A[R][C]; • • • A [0] [C-1] A[0] • • • A [i] [0] [C-1] A[i] • • • A [R-1] [0] [C-1] A[R-1] •  •  • •  •  • A A+i*C*4 A+(R-1)*C*4

125 Nested Array Row Access Code
int *get_pgh_zip(int index) { return pgh[index]; } #define PCOUNT 4 zip_dig pgh[PCOUNT] = {{1, 5, 2, 0, 6}, {1, 5, 2, 1, 3 }, {1, 5, 2, 1, 7 }, {1, 5, 2, 2, 1 }}; What data type is pgh[index]? What is its starting address? # %eax = index leal (%eax,%eax,4),%eax # 5 * index leal pgh(,%eax,4),%eax # pgh + (20 * index) Will disappear Blackboard?

126 Nested Array Row Access Code
int *get_pgh_zip(int index) { return pgh[index]; } #define PCOUNT 4 zip_dig pgh[PCOUNT] = {{1, 5, 2, 0, 6}, {1, 5, 2, 1, 3 }, {1, 5, 2, 1, 7 }, {1, 5, 2, 2, 1 }}; # %eax = index leal (%eax,%eax,4),%eax # 5 * index leal pgh(,%eax,4),%eax # pgh + (20 * index) Row Vector pgh[index] is array of 5 int’s Starting address pgh+20*index IA32 Code Computes and returns address Compute as pgh + 4*(index+4*index)

127 Nested Array Row Access
Array Elements A[i][j] is element of type T, which requires K bytes Address A + i * (C * K) + j * K = A + (i * C + j)* K int A[R][C]; • • • A [0] [C-1] A[0] • • • • • • A [i] [j] A[i] • • • A [R-1] [0] [C-1] A[R-1] •  •  • •  •  • A A+i*C*4 A+(R-1)*C*4 A+i*C*4+j*4

128 Nested Array Element Access Code
int get_pgh_digit (int index, int dig) { return pgh[index][dig]; } # %ecx = dig # %eax = index leal 0(,%ecx,4),%edx # 4*dig leal (%eax,%eax,4),%eax # 5*index movl pgh(%edx,%eax,4),%eax # *(pgh + 4*dig + 20*index) Array Elements pgh[index][dig] is int Address: pgh + 20*index + 4*dig IA32 Code Computes address pgh + 4*dig + 4*(index+4*index) movl performs memory reference

129 Strange Referencing Examples
1 5 2 6 1 5 2 3 1 5 2 7 1 5 2 zip_dig pgh[4]; 76 96 116 136 156 Reference Address Value Guaranteed? pgh[3][3] 76+20*3+4*3 = 148 2 pgh[2][5] 76+20*2+4*5 = 136 1 pgh[2][-1] 76+20*2+4*-1 = 112 3 pgh[4][-1] 76+20*4+4*-1 = 152 1 pgh[0][19] 76+20*0+4*19 = 152 1 pgh[0][-1] 76+20*0+4*-1 = 72 ?? Will disappear

130 Strange Referencing Examples
1 5 2 6 1 5 2 3 1 5 2 7 1 5 2 zip_dig pgh[4]; 76 96 116 136 156 Reference Address Value Guaranteed? pgh[3][3] 76+20*3+4*3 = 148 2 pgh[2][5] 76+20*2+4*5 = 136 1 pgh[2][-1] 76+20*2+4*-1 = 112 3 pgh[4][-1] 76+20*4+4*-1 = 152 1 pgh[0][19] 76+20*0+4*19 = 152 1 pgh[0][-1] 76+20*0+4*-1 = 72 ?? Code does not do any bounds checking Ordering of elements within array guaranteed Yes Yes Yes Yes No

131 Multi-Level Array Example
Variable univ denotes array of 3 elements Each element is a pointer 4 bytes Each pointer points to array of int’s zip_dig cmu = { 1, 5, 2, 1, 3 }; zip_dig mit = { 0, 2, 1, 3, 9 }; zip_dig ucb = { 9, 4, 7, 2, 0 }; #define UCOUNT 3 int *univ[UCOUNT] = {mit, cmu, ucb}; cmu 1 5 2 3 16 20 24 28 32 36 36 160 16 56 164 168 univ mit 2 1 3 9 36 40 44 48 52 56 ucb 9 4 7 2 56 60 64 68 72 76

132 Element Access in Multi-Level Array
int get_univ_digit (int index, int dig) { return univ[index][dig]; } # %ecx = index # %eax = dig leal 0(,%ecx,4),%edx # 4*index movl univ(%edx),%edx # Mem[univ+4*index] movl (%edx,%eax,4),%eax # Mem[...+4*dig] Will disappear Blackboard?

133 Element Access in Multi-Level Array
int get_univ_digit (int index, int dig) { return univ[index][dig]; } # %ecx = index # %eax = dig leal 0(,%ecx,4),%edx # 4*index movl univ(%edx),%edx # Mem[univ+4*index] movl (%edx,%eax,4),%eax # Mem[...+4*dig] Computation (IA32) Element access Mem[Mem[univ+4*index]+4*dig] Must do two memory reads First get pointer to row array Then access element within array

134 Array Element Accesses
Nested array Multi-level array int get_pgh_digit (int index, int dig) { return pgh[index][dig]; } int get_univ_digit (int index, int dig) { return univ[index][dig]; } Access looks similar, but element: Mem[pgh+20*index+4*dig] Mem[Mem[univ+4*index]+4*dig]

135 Strange Referencing Examples
cmu 1 5 2 3 16 20 24 28 32 36 36 160 16 56 164 168 univ mit 2 1 3 9 36 40 44 48 52 56 ucb 9 4 7 2 56 60 64 68 72 76 Reference Address Value Guaranteed? univ[2][3] 56+4*3 = 68 2 univ[1][5] 16+4*5 = 36 0 univ[2][-1] 56+4*-1 = 52 9 univ[3][-1] ?? ?? univ[1][12] 16+4*12 = 64 7 Will disappear

136 Strange Referencing Examples
cmu 1 5 2 3 16 20 24 28 32 36 36 160 16 56 164 168 univ mit 2 1 3 9 36 40 44 48 52 56 ucb 9 4 7 2 56 60 64 68 72 76 Reference Address Value Guaranteed? univ[2][3] 56+4*3 = 68 2 univ[1][5] 16+4*5 = 36 0 univ[2][-1] 56+4*-1 = 52 9 univ[3][-1] ?? ?? univ[1][12] 16+4*12 = 64 7 Code does not do any bounds checking Ordering of elements in different arrays not guaranteed Yes No No No No

137 Using Nested Arrays Strengths Limitation
#define N 16 typedef int fix_matrix[N][N]; Strengths C compiler handles doubly subscripted arrays Generates very efficient code Avoids multiply in index computation Limitation Only works for fixed array size /* Compute element i,k of fixed matrix product */ int fix_prod_ele (fix_matrix a, fix_matrix b, int i, int k) { int j; int result = 0; for (j = 0; j < N; j++) result += a[i][j]*b[j][k]; return result; } a b j-th column x i-th row

138 Dynamic Nested Arrays Strength Programming Performance
Can create matrix of any size Programming Must do index computation explicitly Performance Accessing single element costly Must do multiplication int * new_var_matrix(int n) { return (int *) calloc(sizeof(int), n*n); } int var_ele (int *a, int i, int j, int n) { return a[i*n+j]; } movl 12(%ebp),%eax # i movl 8(%ebp),%edx # a imull 20(%ebp),%eax # n*i addl 16(%ebp),%eax # n*i+j movl (%edx,%eax,4),%eax # Mem[a+4*(i*n+j)]

139 Dynamic Array Multiplication
Without Optimizations Multiplies: 3 2 for subscripts 1 for data Adds: 4 2 for array indexing 1 for loop index /* Compute element i,k of variable matrix product */ int var_prod_ele (int *a, int *b, int i, int k, int n) { int j; int result = 0; for (j = 0; j < n; j++) result += a[i*n+j] * b[j*n+k]; return result; }

140 Optimizing Dynamic Array Multiplication
{ int j; int result = 0; for (j = 0; j < n; j++) result += a[i*n+j] * b[j*n+k]; return result; } Optimizations Performed when set optimization level to -O2 Code Motion Expression i*n can be computed outside loop Strength Reduction Incrementing j has effect of incrementing j*n+k by n Operations count 4 adds, 1 mult Compiler can optimize regular access patterns { int j; int result = 0; int iTn = i*n; int jTnPk = k; for (j = 0; j < n; j++) { result += a[iTn+j] * b[jTnPk]; jTnPk += n; } return result;

141 Structures Memory Layout Concept Accessing Structure Member
struct rec { int i; int a[3]; int *p; }; Memory Layout i a p 4 16 20 Concept Contiguously-allocated region of memory Refer to members within structure by names Members may be of different types Accessing Structure Member void set_i(struct rec *r, int val) { r->i = val; } IA32 Assembly # %eax = val # %edx = r movl %eax,(%edx) # Mem[r] = val

142 Generating Pointer to Structure Member
struct rec { int i; int a[3]; int *p; }; r r+4+4*idx i a p 4 16 20 Generating Pointer to Array Element Offset of each structure member determined at compile time int *find_a (struct rec *r, int idx) { return &r->a[idx]; } # %ecx = idx # %edx = r leal 0(,%ecx,4),%eax # 4*idx leal 4(%eax,%edx),%eax # r+4*idx+4

143 Structure Referencing (Cont.)
C Code struct rec { int i; int a[3]; int *p; }; i a p 4 16 20 i a void set_p(struct rec *r) { r->p = &r->a[r->i]; } 4 16 20 Element i # %edx = r movl (%edx),%ecx # r->i leal 0(,%ecx,4),%eax # 4*(r->i) leal 4(%edx,%eax),%eax # r+4+4*(r->i) movl %eax,16(%edx) # Update r->p


Download ppt "Instructor: Erol Sahin"

Similar presentations


Ads by Google