diff --git a/.crossnote/config.js b/.crossnote/config.js new file mode 100644 index 0000000..80613c4 --- /dev/null +++ b/.crossnote/config.js @@ -0,0 +1,15 @@ +({ + katexConfig: { + "macros": {} +}, + + mathjaxConfig: { + "tex": {}, + "options": {}, + "loader": {} +}, + + mermaidConfig: { + "startOnLoad": false +}, +}) \ No newline at end of file diff --git a/.crossnote/head.html b/.crossnote/head.html new file mode 100644 index 0000000..079058b --- /dev/null +++ b/.crossnote/head.html @@ -0,0 +1,6 @@ + + \ No newline at end of file diff --git a/.crossnote/parser.js b/.crossnote/parser.js new file mode 100644 index 0000000..0f6b5a9 --- /dev/null +++ b/.crossnote/parser.js @@ -0,0 +1,12 @@ +({ + // Please visit the URL below for more information: + // https://shd101wyy.github.io/markdown-preview-enhanced/#/extend-parser + + onWillParseMarkdown: async function(markdown) { + return markdown; + }, + + onDidParseMarkdown: async function(html) { + return html; + }, +}) \ No newline at end of file diff --git a/.crossnote/style.less b/.crossnote/style.less new file mode 100644 index 0000000..38a0ec9 --- /dev/null +++ b/.crossnote/style.less @@ -0,0 +1,16 @@ + +/* Please visit the URL below for more information: */ +/* https://shd101wyy.github.io/markdown-preview-enhanced/#/customize-css */ + +.markdown-preview.markdown-preview { + // modify your style here + // eg: background-color: blue; + font-size: 10pt; + .mermaid { + background-color: white; + } + + div[data-cmd="sh"] { + font-size: 8pt; + } +} diff --git a/.gitignore b/.gitignore index 07cc0ea..8b6700f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,6 @@ .vscode -.venv \ No newline at end of file +.venv + +*.out +notes/*.o +notes/*.s \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4e361d5 --- /dev/null +++ b/Makefile @@ -0,0 +1,2 @@ +all: + rm ./notes/*.out \ No newline at end of file diff --git a/notes/1.md b/notes/1.md index 609686c..a1da627 100644 --- a/notes/1.md +++ b/notes/1.md @@ -1,9 +1,55 @@ # Bits, Bytes, and Integers +In computers, everything consists of bits. +By encoding sets of bits in various ways, they made meanings: + +* instructions +* and data(numbers, sets, strings, etc..) + +## Boolean Algebra + +* and `A & B` +* or `A | B` +* not `~A` +* xor `A ^ B` + +### in C + +* Shift (`<<`, `>>`) + * Left Shift(`<<`) + Zero fill on right + * Right Shift(`>>`) + Logical Shift: zero fill with 0's on left + Arithmetic shift Relicate most significant bit on left + +```c {cmd="gcc" args=[-x c $input_file -O0 -m32 -o 1_1.out]} +#include + +int main() { + int a = 0x7fffffff; + int as = a << 1; + printf("shl of %d: %d(%08x)\n", a, as, as); + + + unsigned b = 0x7fffffff; + unsigned bs = b << 1; + printf("shl of %u: %u(%08x)\n", b, bs, bs); +} +``` + +```sh {cmd hide} +while ! [ -f 1_1.out ]; do sleep .1; done; ./1_1.out +``` + ## Integers -### Representation +### Representation & Encoding + +* for $w$ bits data $x$ + +$$B2U(X)=\sum_{i=0}^{w-1} x_i 2^{i}\quad B2T(X)=-x_{w-1}*2^{w-1} + \sum_{i=0}^{w-2}{x_i 2^i}$$ + ### Conversion diff --git a/notes/2.md b/notes/2.md index 2992031..05cf8d1 100644 --- a/notes/2.md +++ b/notes/2.md @@ -1,6 +1,207 @@ -# Machine Level Programming +# Floating Point -아키텍쳐(ISA) -* intel(x86): CISC -* ARM(aarch64, aarch32): RISC +## Fractional Binary Number +representation: + +* for $w = i + j + 1$ bits data $b$ +$$\sum_{k = -j}^{i}b_k\times 2^k$$ + +for example: +* $5+3/4 = 23/4 = 101.11_2$ +* $1 7/16 = 23/16 = 1.0111_2$ + +**Limitations** +* Can only exactly represent numbers of the form of $x/2^k$ +* Just one setting of binary point within the $w$ bits, which means that very small value or very large value cannot be represented + +## IEEE Floating Point Definition + +**IEEE Standard 754** + +Driven by numerical concerns: +* Nice standards for rounding, overflow, underflow +* But Hard to make fast in hardware + * Numberical Analysts predominated over hw designers in defining standard + + +### Representation + +**Form** + +$$(-1)^s M 2^E$$ + +* $s$: sign bit +* $M$: mantissa fractional value in $[1.0,2.0)$ +* $E$: exponent + +**Encoding** + +```mermaid +--- +title: "Single Precision" +config: + packet: + bitsPerRow: 32 + rowHeight: 32 +--- +packet ++1: "s" ++8: "exp" ++23: "frac" +``` + +```mermaid +--- +title: "Double Precision" +config: + packet: + bitsPerRow: 32 + rowHeight: 32 +--- +packet ++1: "s" ++11: "exp" ++52: "frac" +``` + +There is three kinds of `float`: **normalized**, **denormalized**, **special** + +**normalized** + +$E = \exp - B$ +$B = 2^{k-1}-1$ where $k$ is number of exp bits +* single: 127 +* double: 1023 + +$M = 1.xxxxx$ +minumum when $\text {frac} = 0000...\quad (M = 1.0)$ +maximum when $\text{frac }= 1111... \quad (M = 2.0 - \epsilon)$ + +**denormalized** + +when `exp=000...0` + +$\exp = 1 - Bias$ + +$M = 0.xxxxx$ + +**special** + +when `exp = 111...1` + +* case `exp = 111...1, frac = 000...0` + + * repr $\infty$ + * operation that overflows + +* case `exp = 111...1, frac = 111...1` + + * repr `NaN` + * repr case when no numeric value can be determined + * e.g., `sqrt(-1)`, `inf - inf`, `inf * 0` + +```c {cmd="gcc-14" args=[-x c $input_file --std=c23 -O0 -m32 -o 2_1.out]} +#include + +int main() { + unsigned x_a = 0b0'11111111'00000000000000000000000; + unsigned x_b = 0b0'11111111'00000000000000000000001; + unsigned x_c = 0b0'01111111'00000000000000000000000; + float a = *(float*)&x_a; + float b = *(float*)&x_b; + float c = *(float*)&x_c; + double cx = c; + printf("%08x: %f\n", x_a, a); + printf("%08x: %f\n", x_b, b); + printf("%08x: %f\n", x_c, c); + printf("%016llx: %f\n", *(unsigned long long *)&cx, cx); + return 0; +} +``` + +```sh {cmd hide} +while ! [ -f 2_1.out ]; do sleep .1; done; ./2_1.out +``` + +### Properties + +* FP0 is Same as Int0 + +* Can (almost) use unsigned int comparison + +### Arithmetic + +$x + y = \text{Round}(x+y)$ +$x \times y = \text{Round}(x\times y)$ + +Idea: +1. compute exact result +2. Make it fit into desired precision + * overflow if too large + * **round** to fit into frac + +#### Rounding + +* Twowards zero +* Round down +* Round up +* **Nearest Even***(default) + +**Nearest Even** is default rounding mode +Any other kind rounding mode is hard to get without dropping into assembly, but C99 has support for rounding mode management. + +This rounding mode is used because **reduced statistically bias**. + +For binary fractional numbers: +* "Even" when least significant bit is $0$ +* "Half way" when bits to right of rounding position is $100..._2$ + +so for example of rounding to neareast $1/4$: + +| Binary Value | Rounded | Action | +| ------------ | ------- | ---------- | +| `10.00011` | `10.00` | (<1/2)Down | +| `10.00110` | `10.01` | (>1/2)Up | +| `10.11100` | `11.00` | (=1/2)Up | +| `10.10100` | `10.10` | (=1/2)Down | + +`BBGRXXX` + +* `G`: **G**uard bit: LSB of result +* `R`: **R**ound bit: first bit of removed +* `X`: **S**ticky bits: OR of remaining bits(001 = 1, 000 = 0) + +Round up conditions +1. R = 1, S = 1 -> `>.5` +2. G = 1, R = 1, S = 0 -> Round to even + +```c {cmd="gcc-14" args=[-x c $input_file --std=c23 -O0 -m32 -o 2_2.out]} +#include + +int main() { + unsigned long long + tb = 0b0'10000010000'0000000000000000000001010000000000000000000000000000; + unsigned xb = 0b0'10000001'01000000000000000000011; + double t = *(double*)&tb; + float x = t; + for(int i=31; i>=0;i--) { + if(i == 31 - 1) { + printf("/"); + } else if (i == 31 - 1 - 8){ + printf("/"); + } + printf("%d", !!((*(unsigned *)&x) & (1< + + + +```c +int swap (long *xp, long *yp) { + long t0 = *xp; + long t1 = *yp; + *xp = t1; + *yp = t0; +} +``` + + + +```nasm +swap: + movq (%rdi), %rax + movq (%rsi), %rdx + movq %rdx, (%rdi) + movq %rax, (%rsi) + ret +``` + + + + +Complete form of memory addressing modes: +`D(Rb, Ri, S)` means `Mem[Reg[Rb] + S*Reg[Ri] + D]` +* `D`: Constant "displacement" +* `Rb`: Base Register +* `Ri`: Index Register +* `S`: Scale Factor(1, 2, 4, or 8) + +for example: + +| `%rdx` | `%rcx` | +| -------- | -------- | +| `0xf000` | `0x0100` | + +* `0x8(%rdx)` = `0xf008` +* `(%rdx, %rcx)` = `0xf100` +* `(%rdx, %rcx, 4)` = `0xf400` +* `0x80(,%rdx, 2)` = `0x1e080` + +#### Arithmetic & Logical Operations + +* `leaq $src, $dst` + * computing address without memory reference like `p = &x[i]` + * computing arithmetic expression `x + k * y` + +* `addq $src, $dst` +* `subq $src, $dst` +* `imulq $src, $dst` +* `salq $src, $dst` +* `sarq $src, $dst` +* `shrq $src, $dst` +* `xorq $src, $dst` +* `andq $src, $dst` +* `orq $src, $dst` +all the above operator operates like `dest = dest # src` + +* `incq $dest` +* `decq $dest` +* `negq $dest` +* `notq $dest` + +## Control + +**Processor State(x86-64, Partial)** +* Temporary data(`%rax`, ...) +* Location of runtime stack(`%rsp`) +* Location of current code control point(`%rip`, instruction point) +* Status of recent tests(`CF`, `ZF`, `SF`, `OF`) + +### Condition Codes + +* Single bit registers + * `CF` Carry flag (for unsigned) + * `SF` Sign flag (for signed) + * `ZF` Zero flag + * `OF` Overflow flag (for signed) + +**Conditional Codes(Implicit Setting)** + +Implicit setting is codes are set by arithmetic operations(`addq`, `subq`, `mulq`) +for example: `addq`: `t = a + b` +* `CF` set if carry out from most significant bit or unsigned overflow +* `ZF` set if `t == 0` +* `SF` set if `t < 0` (as signed) +* `OF` set if two's-complement overflow or signed overflow +`(a > 0 && b > 0 && (a + b) < 0) || (a < 0 && b < 0 && (a + b) >= 0)` + +The codes are not implictly set by `leaq`, because it is not designed to be used as arithmetic but used as **address calculation**. so it cannot affect to conditional codes. + +**Conditional Codes(Explicit Setting)** + +The codes are set explictly by compare instruction. + +`cmpq b, a` is computing `a - b` without setting destination. + +* `CF` set if carry out from most significant bit or unsigned overflow +* `ZF` set if `a == b` or `a - b == 0` +* `SF` set if `(a - b) < 0` (as signed) +* `OF` set if two's-complement overflow or signed overflow +`(a > 0 && b > 0 && (a - b) < 0) || (a < 0 && b < 0 && (a - b) >= 0)` + +And explictly set by test instruction + +`testq b, a` is computing `a & b` without setting destination. + +Sets condition codes based on value of `a & b` it is useful to have one of the operands be a mask. + +* `ZF` set when `a & b == 0` +* `SF` set when `a & b < 0` + +**Reading Condition Codes** + +`setX`: set single byte based on combination of condition codes + +| setX | effect | desc | +| ------- | ---------------- | ------------------------- | +| `sete` | `ZF` | Equal / Zero | +| `setne` | `~ZF` | Not Equal / Not Zero | +| `sets` | `SF` | Negative | +| `setns` | `~SF` | Nonnegative | +| `setg` | `~(SF^OF) & ~ZF` | Greater (signed) | +| `setge` | `~(SF^OF)` | Greater or Equal (signed) | +| `setl` | `SF^OF` | Less (signed) | +| `setle` | `(SF^OF) \| ZF` | Less or Equal (signed) | +| `seta` | `~CF & ~ZF` | Above (unsigned) | +| `setb` | `CF` | Below (unsigned) | + +it deos not alter remaining bytes of registers. only use 1 byte register(`%al`, `%bl`) + +```nasm +cmpq %rsi(y), %rdi(x) # compare x and y +setg %al # set when >(greater) +movzbl %al, %eax # move zero extend byte to long +ret +``` + +### Conditional Branches + +#### Jumping + +`jX` jump to different part of code depending on condition codes. + +| jX | condition | desc | +| ----- | ---------------- | ------------------------- | +| `jmp` | 1 | Unconditional | +| `je` | `ZF` | Equal / Zero | +| `jne` | `~ZF` | Not Equal / Not Zero | +| `js` | `SF` | Negative | +| `jns` | `~SF` | Nonnegative | +| `jg` | `~(SF^OF) & ~ZF` | Greater (signed) | +| `jge` | `~(SF^OF)` | Greater or Equal (signed) | +| `jl` | `SF^OF` | Less (signed) | +| `jle` | `(SF^OF) \| ZF` | Less or Equal (signed) | +| `ja` | `~CF & ~ZF` | Above (unsigned) | +| `jb` | `CF` | Below (unsigned) | + +Old Style Conditional Branch + +```c {cmd=gcc args=[-Og -x c -fno-if-conversion -c $input_file -o 3_3.o]} +long absdiff(long x, long y) { + long result; + if (x > y) result = x - y; + else result = y - x; + return result; +} +``` + +```sh { cmd hide } +while ! [ -f 3_3.o ]; do sleep .1; done; objdump -d 3_3.o -Msuffix +``` + +**expressing with `goto`** + +```c {cmd=gcc args=[-Og -x c -fno-if-conversion -c $input_file -o 3_4.o]} +long absdiff_j(long x, long y) { + long result; + int ntest = x <= y; + if (ntest) goto Else; + result = x-y; + goto Done; +Else: + result = y-x; +Done: + return result; +} +``` + +#### Conditional Move + +But this branchings are very disruptive to instruction flow through pipelines, **Conditional Moves** are highly used because they do not require control transfer. + +```c {cmd=gcc args=[-O3 -x c -c $input_file -o 3_5.o]} +long absdiff(long x, long y) { + long result; + if (x > y) result = x - y; + else result = y - x; + return result; +} +``` + +```sh {cmd hide} +while ! [ -f 3_5.o ]; do sleep .1; done; objdump -d 3_5.o -Msuffix +``` + +However, there are several *bad cases* for conditional move. + +* expansive computations +```c +val = Test(x) ? Hard1(x) : Hard2(x); +``` +because both values are get computed. only simple computations are effective for conditional moves. +* risky computations +```c +val = p ? *p : 0; +``` +both values get computed may have undesiarable effects. +* Computations with side effects +```c +val = x > 0 ? x*=7 : x+=3; +``` +each expression has side-effect. + +### Loop + +#### do-while + + + + + + +
+ +```c +long pcount_do(unsigned long x) { + long result = 0; + do { + result += x & 0x1; + x >>= 1; + } while (x); + return result; +} +``` + + +```c {cmd=gcc args=[-Og -x c -c $input_file -o 3_6.o]} +long pcount_goto(unsigned long x) { + long result = 0; +loop: + result += x & 0x1; + x >>= 1; + if (x) goto loop; + return result; +} +``` +
+ +```sh {cmd hide} +while ! [ -f 3_6.o ]; do sleep .1; done; objdump -d 3_6.o -Msuffix +``` + +**general do-while translation** + + + + + + +
+ +```c +do { + Body +} while (Test); +``` + + +```c +loop: + Body + if (Test) goto loop; +``` +
+ +#### while + +**general while translation#1** + +it is called **jump-to-middle translation**, used with `-O0` (or `-Og`) flag. + + + + + + +
+ +```c +while(Test) { + Body +} +``` + + +```c + goto test; +loop: + Body +test: + if (Test) + goto loop; +done: +``` +
+ +```c {cmd=gcc args=[-Og -x c -c $input_file -o 3_7.o]} +long pcount_while(unsigned long x) { + long result = 0; + while (x) { + result += x & 0x1; + x >>= 1; + } + return result; +} +``` +```sh {cmd hide} +echo "jmp-to-middle translation" +while ! [ -f 3_7.o ]; do sleep .1; done; objdump -d 3_7.o -Msuffix +``` + +**general while translation#2** + +while to do-while conversion, used with `-O1` flag. + + + + + + + +
+ +```c +while(Test) { + Body +} +``` + + +```c +if (!Test) goto done; +do { + Body +} while (Test); +done: +``` + + +```c +if (!Test) goto done; +loop: + Body + if (Test) goto loop; +done: +``` +
+ +```c {cmd=gcc args=[-O1 -x c -c $input_file -o 3_8.o]} +long pcount_while(unsigned long x) { + long result = 0; + while (x) { + result += x & 0x1; + x >>= 1; + } + return result; +} +``` +```sh {cmd hide} +echo "while to do-while conversion" +while ! [ -f 3_8.o ]; do sleep .1; done; objdump -d 3_8.o -Msuffix +``` + +#### for loop form + + + + + + +
+ +```c +for (init; test; update) { + Body +} +``` +
+ +**for-to-while conversion** + + + + + + + + + + + + + +
+ +```c +for (Init; Test; Update) { + Body +} +``` + + +```c +Init; +while(Test) { + Body + Update; +} +``` +
+ +```c {cmd=gcc args=[-O3 -x c -c $input_file -o 3_9.o]} +#include +#define WSIZE 8 * sizeof(int) + +long pcount_for(unsigned long x) { + size_t i; + long result = 0; + for (i = 0; i < WSIZE; i++) { + unsigned bit = (x >> i) & 0x1; + result += bit; + } + return result; +} +``` + + +```c {cmd=gcc args=[-O3 -x c -c $input_file -o 3_10.o]} +#include +#define WSIZE 8 * sizeof(int) +long pcount_for(unsigned long x) { + size_t i; + long result = 0; + i = 0; + while(i < WSIZE) { + unsigned bit = (x >> i) & 0x1; + result += bit; + i++; + } + return result; +} +``` +
+ +```sh {cmd hide} +while ! [ -f 3_9.o ]; do sleep .1; done; objdump -d 3_9.o -Msuffix +``` + + +```sh {cmd hide} +while ! [ -f 3_10.o ]; do sleep .1; done; objdump -d 3_10.o -Msuffix +``` +
+ +for to do-while conversion, initial test can be optimized away. + +### Switch + +#### Jump Table Structure + +Switch form + + + + + + +
+ +```c {cmd=gcc args=[-Og -fno-asynchronous-unwind-tables -fno-stack-protector -x c -S $input_file -o 3_11.s]} +long switch_eg (long x, long y, long z) { + long w = 1; + switch(x) { + case 1: + w = y*z; + break; + case 2: + w = y/z; + /* Fall Through */ + case 3: + w += z; + break; + case 5: + case 6: + w -= z; + break; + case 7: + w *= z; + break; + default: + w = 2; + } + return w; +} +``` + + +```sh {cmd hide} +while ! [ -f 3_11.s ]; do sleep .1; done; cat 3_11.s +``` +
+## Procedures + +Mechanisms in Procedures + +* **Passing control** + * to beginning of procedure code + * back to return point +* **Passing data** + * procedure arguments + * return value +* **Memory management** + * allocate during procedure execution + * deallocate upon return + +this mechanisms are all implemented with machine instructions. **x86-64 implementation** of a procedure used only those mechanisms required. + +### Stack Structure + +**x86-64 Stack** + +Region of memory managed with *stack discipline*. It grows toward lower addresses. `%rsp` contains lowest stack address(address of top element). + +`pushq $src` +* fetches operand at src +* decrement `%rsp` by 8 +* write operand at address given by `%rsp` + +`popq $dest` +* read value at address given by `%rsp` +* increment `%rsp` by 8 +* store value at dest(must be register) + +### Procedure Control Flow + +```c {cmd=gcc args=[-Og -x c -c $input_file -o 3_12.o]} +long mult2(long x, long y) { + long t = x * y; + return t; +} + +void multstore(long x, long y, long *dest) { + long t = mult2(x, y); + *dest = t; +} + +``` + +```sh {cmd hide} +while ! [ -f 3_12.o ]; do sleep .1; done; objdump -d 3_12.o -Msuffix +``` + +Procedure call `call label` +* push return address on stack +* jmp to label +Return address: +* Address of the next instruction right after call +Procedure return: `ret` \ No newline at end of file diff --git a/notes/4.md b/notes/4.md new file mode 100644 index 0000000..2992031 --- /dev/null +++ b/notes/4.md @@ -0,0 +1,6 @@ +# Machine Level Programming + +아키텍쳐(ISA) +* intel(x86): CISC +* ARM(aarch64, aarch32): RISC +