From c0677a1dcad48e372299cf00764eaf9f7a344b12 Mon Sep 17 00:00:00 2001 From: Minseong Jang Date: Tue, 17 Sep 2024 19:51:49 +0900 Subject: [PATCH] Update docs --- doc/docs/SUMMARY.md | 9 +- doc/docs/examples/cpu.md | 557 -- doc/docs/examples/cpu/decode.md | 116 + doc/docs/examples/cpu/exe.md | 127 + doc/docs/examples/cpu/fetch.md | 84 + doc/docs/examples/cpu/hazard.md | 218 + doc/docs/examples/cpu/implementation.md | 91 + doc/docs/examples/cpu/mem.md | 177 + doc/docs/examples/cpu/overview.md | 110 + doc/docs/examples/cpu/wb.md | 58 + doc/docs/figure/branch_mis.drawio.svg | 4 - doc/docs/figure/combinator-fifo-waveform.svg | 4870 +++++++++++++++++ doc/docs/figure/combinator-fifo.svg | 245 + .../figure/combinator-fsm-egress-waveform.svg | 4750 ++++++++++++++++ .../combinator-fsm-ingress-waveform.svg | 4736 ++++++++++++++++ .../figure/combinator-reg-fwd-waveform.svg | 4715 ++++++++++++++++ doc/docs/figure/combinator-reg-fwd.svg | 245 + doc/docs/figure/combinator-sink-waveform.svg | 4438 +++++++++++++++ doc/docs/figure/combinator-sink.svg | 141 + .../figure/combinator-source-waveform.svg | 4438 +++++++++++++++ doc/docs/figure/combinator-source.svg | 141 + doc/docs/figure/cpu-hazard-control-bp.svg | 754 +++ .../figure/cpu-hazard-control-exception.svg | 828 +++ .../cpu-hazard-data-bypassing-execute.svg | 357 ++ .../cpu-hazard-data-bypassing-memory.svg | 461 ++ .../cpu-hazard-data-bypassing-writeback.svg | 557 ++ doc/docs/figure/cpu-hazard-data-csr.svg | 674 +++ doc/docs/figure/cpu-hazard-data-load-use.svg | 465 ++ doc/docs/figure/cpu-hazard-structural.svg | 525 ++ doc/docs/figure/cpu-implementation-decode.svg | 515 ++ doc/docs/figure/cpu-implementation-exe.svg | 515 ++ doc/docs/figure/cpu-implementation-fetch.svg | 522 ++ doc/docs/figure/cpu-implementation-mem.svg | 907 +++ doc/docs/figure/cpu-implementation-wb.svg | 303 + doc/docs/figure/cpu-structure-ideal.svg | 295 + doc/docs/figure/cpu-structure.svg | 511 ++ doc/docs/figure/dcache_miss.drawio.svg | 4 - doc/docs/figure/dcahce_miss.drawio.svg | 4 - doc/docs/figure/fencei.drawio.svg | 4 - doc/docs/figure/hazard.svg | 276 + doc/docs/figure/interface.drawio.svg | 4 - doc/docs/figure/load_use_stall.drawio.svg | 4 - doc/docs/figure/pipeline_kill.drawio.svg | 4 - doc/docs/lang/combinator.md | 480 +- doc/docs/lang/interface.md | 45 +- doc/docs/lang/module.md | 20 +- doc/docs/tutorial/fir_filter.md | 4 +- doc/docs/tutorial/masked_merge.md | 12 +- doc/docs/tutorial/tutorial.md | 12 +- hazardflow-designs/src/cpu/exe.rs | 2 +- hazardflow-designs/src/cpu/mem.rs | 34 +- 51 files changed, 38634 insertions(+), 734 deletions(-) delete mode 100644 doc/docs/examples/cpu.md create mode 100644 doc/docs/examples/cpu/decode.md create mode 100644 doc/docs/examples/cpu/exe.md create mode 100644 doc/docs/examples/cpu/fetch.md create mode 100644 doc/docs/examples/cpu/hazard.md create mode 100644 doc/docs/examples/cpu/implementation.md create mode 100644 doc/docs/examples/cpu/mem.md create mode 100644 doc/docs/examples/cpu/overview.md create mode 100644 doc/docs/examples/cpu/wb.md delete mode 100644 doc/docs/figure/branch_mis.drawio.svg create mode 100644 doc/docs/figure/combinator-fifo-waveform.svg create mode 100644 doc/docs/figure/combinator-fifo.svg create mode 100644 doc/docs/figure/combinator-fsm-egress-waveform.svg create mode 100644 doc/docs/figure/combinator-fsm-ingress-waveform.svg create mode 100644 doc/docs/figure/combinator-reg-fwd-waveform.svg create mode 100644 doc/docs/figure/combinator-reg-fwd.svg create mode 100644 doc/docs/figure/combinator-sink-waveform.svg create mode 100644 doc/docs/figure/combinator-sink.svg create mode 100644 doc/docs/figure/combinator-source-waveform.svg create mode 100644 doc/docs/figure/combinator-source.svg create mode 100644 doc/docs/figure/cpu-hazard-control-bp.svg create mode 100644 doc/docs/figure/cpu-hazard-control-exception.svg create mode 100644 doc/docs/figure/cpu-hazard-data-bypassing-execute.svg create mode 100644 doc/docs/figure/cpu-hazard-data-bypassing-memory.svg create mode 100644 doc/docs/figure/cpu-hazard-data-bypassing-writeback.svg create mode 100644 doc/docs/figure/cpu-hazard-data-csr.svg create mode 100644 doc/docs/figure/cpu-hazard-data-load-use.svg create mode 100644 doc/docs/figure/cpu-hazard-structural.svg create mode 100644 doc/docs/figure/cpu-implementation-decode.svg create mode 100644 doc/docs/figure/cpu-implementation-exe.svg create mode 100644 doc/docs/figure/cpu-implementation-fetch.svg create mode 100644 doc/docs/figure/cpu-implementation-mem.svg create mode 100644 doc/docs/figure/cpu-implementation-wb.svg create mode 100644 doc/docs/figure/cpu-structure-ideal.svg create mode 100644 doc/docs/figure/cpu-structure.svg delete mode 100644 doc/docs/figure/dcache_miss.drawio.svg delete mode 100644 doc/docs/figure/dcahce_miss.drawio.svg delete mode 100644 doc/docs/figure/fencei.drawio.svg create mode 100644 doc/docs/figure/hazard.svg delete mode 100644 doc/docs/figure/interface.drawio.svg delete mode 100644 doc/docs/figure/load_use_stall.drawio.svg delete mode 100644 doc/docs/figure/pipeline_kill.drawio.svg diff --git a/doc/docs/SUMMARY.md b/doc/docs/SUMMARY.md index 3b5ab43..8f1e272 100644 --- a/doc/docs/SUMMARY.md +++ b/doc/docs/SUMMARY.md @@ -19,7 +19,14 @@ # Case Studies -- [CPU Core (5-Stage Pipelined)](./examples/cpu.md) +- [CPU Core (5-Stage Pipelined)](./examples/cpu/overview.md) + + [Hazards](./examples/cpu/hazard.md) + + [Implementation](./examples/cpu/implementation.md) + + [Fetch](./examples/cpu/fetch.md) + + [Decode](./examples/cpu/decode.md) + + [Execute](./examples/cpu/exe.md) + + [Memory](./examples/cpu/mem.md) + + [Writeback](./examples/cpu/wb.md) - [NPU Core (Based on Systolic Array)](./examples/npu.md) # Appendix diff --git a/doc/docs/examples/cpu.md b/doc/docs/examples/cpu.md deleted file mode 100644 index 50dc3b4..0000000 --- a/doc/docs/examples/cpu.md +++ /dev/null @@ -1,557 +0,0 @@ -# CPU Core (5-Stage Pipelined) - -We will use the 5-Stage pipelined Sodor CPU as an implementation example for the HazardFlow HDL. -The Sodor CPU is an educational, open-source processor developed in the [RISC-V](https://riscv.org/) project. - -## Pipelined Design - -Pipelined design can improve the overall processor performance with the trade-off of adding design complexity. - -- The overall performance is improved by breaking down the critical path into multiple stages, while multiple instructions are processing **simultaneously** at different stages. -* The design complexity comes from the necessity of **hazard** from the later stages to the earlier stages to make sure the execution result is correct. - -### Dataflow Overview - -

- -

- -**Payload:** - -* Each stage calculates its payload every clock cycle. -* Payloads flow horizontally from left to right through the stages. -* Payload will be passed to the next stage in the next clock cycle. -* Payload might get dropped (not passing to the next stage) because of hazards from later stages. -* Payload might get stalled (stay in the same stage) because of hazards from later stages. -* Payload might receive data from later stages' hazards and get updated before passing to the next stage. -* Payload sending to the next stage contains the necessary information for the next stage to calculate its payload, resolver, and update its state. - -**Resolver:** - -* Each stage calculates its resolver every clock cycle. -* Resolver flows horizontally from right to left through the stages. -* Resolver passes to the earlier stages within the same clock cycle. -* Resolvers from later stages contains the necessary information for the previous stages to construct their payloads and resolvers. - -**State:** - -* Each stage keeps its state in some registers (A.K.A latches). -* The state in each stage might be coming from 3 different sources: - * The payload from the previous stage (decode, execution, memory). - * The resolver from the earlier stage (fetch). - * The stage maintains its state (register file in write-back stage). -* The state can be used to calculate the payload or resolver within each stage. -* State might get extracted out of the registers and not pass to the next stage as a payload (get dropped) because of hazards from later stages. - -### 5-Stage Overview - -* Fetch: Retrieve the next instruction to be executed from the instruction memory. -* Decode: Decode the fetched instruction. -* Execution: Perform the operation specified by the instruction. -* Memory: Access data memory or CSR if the instruction involves memory operations or CSR operation. -* Write-Back: Write the result of the execution, memory access, or CSR access back to the register file. - -### Pipelining (w/o Hazard) - -The ideal pipelined design works like the following figure if there is no dependency between the instructions. - -

- -

- -### Pipelining (w/ Hazard) - -However, most of the time, there are some dependencies between instructions. -The later stages need to send back resolvers to the earlier stages to make sure the execution result is correct. -The resolvers might cause the early stages to stall or extract the payload out of their latches and drop their payloads (kill). - -#### Branch Misprediciton - -The fetch stage will try to fetch the next instruction as early as possible even if the next `pc_sel` has not yet been calculated in the execution stage. -We consider it a right prediction when the calculated `pc_sel` in the execution stage is PLUS 4; otherwise, it is a misprediction. -We need to discard the mispredicted instructions in the fetch stage and decode stage if there is a misprediction. - -``` -I1: be x1, x2, target -I2: ADD x5, x6, x7 -I3: LW x5, 8(x6) - -target: -# Instructions to execute if x1 == x2 -I4: SUB x5, x6, x7 -... -``` - -

- -

- -* At `Cycle 3`, the execution stage will pass its resolver `exe_r` to the decode stage. -* `exe_r` contains the control information to kill the mispredicted instruction in fetch stage and decode stage. -* The decode stage will extract the data from its latch and drop its payload in the next cycle when it receives the resolver from the execution stage. -* The decode stage will calculate its resolver, including the correct `pc_sel` and the kill signal, and send it to the fetch stage. -* The fetch stage will extract the data from its latch and drop its payload in the next cycle when it receives the resolver from the decode stage. -* The fetch stage will fetch the correct `pc` with the correct `pc_sel` in the next clock cycle. - - -#### The `FENCE.I` Instruction - -RISC-V does not guarantee that stores to instruction memory `imem` will be made visible to instruction fetches until a `FENCE.I` instruction is executed (The `FENCE.I` instruction reaches the memory stage). - -``` -I1: SW x2, 0(x1) # Store the value in x2 to instruction memory at address 0(x1) -I2: FENCE.I -I3: .... -``` - -

- -

- -* `I1` is writing data to the instruction memory `imem` at the address `0(x1)`. -* `I3` is fetched from instruction memory at the address `0(x1)`. -* The fetch stage **CANNOT** sees the changes to the instruction memory **until** the `FENCE.I` instruction reaches the memory stage. -* At cycle 5, the `FENCE.I` instruction reaches the memory stage. -* At cycle 5, the fetch stage can see the newest changes in the instruction memory and fetches `I3`. -* The `pc_sel` should stay the same at cycle 3 and cycle 4. - -#### Pipeline Kill - -There are 2 cases we need to kill the entire pipeline (the fetch stage, the decode stage, and the execution stage) and disregard stalls. -* When the processor returns from an exception. - * The CSR's response contains the `eret` signal. -* When the processor encounters an illegal instruction. - -Dataflow when the processor encounters an illegal instruction: -* An unsupported instruction or illegal is decoded in the decode stage. -* The exception information is passed as a payload to the execution stage. -* The execution stage will pass the exception information to the memory stage. -* The memory stage will make a CSR request and write the exception information to the CSR. -* The memory stage will send out the resolver to execution stage in the same clock cycle containing: - * The `pipeline_kill` signal to kill all the earlier stage. - * The address of the exception handler. -* The execution stage receives the resolver from memory stage. - * It extracts the payload from its latch and drops the payload in the next clock cycle. - * It updates the `pc_sel` to the exception handler. - * It sends out the resolver containing the updated `pc_sel` to decode stage in the same cycle. -* The decode stage receives the resolver from execution stage. - * It extracts the payload from its latch and drops the payload in the next clock cycle. - * It sends out the resolver containing the updated `pc_sel` and the kill signal `if_kill` to fetch stage in the same cycle. -* The fetch stage receives the resolver from decode stage. - * It extracts the payload from its latch and drops the payload in the next clock cycle. - * It will fetch the exception handler in the next clock cycle. - -``` -I1: some illegal instruction -I2: SW x2, 0(x1) -I3: ADD x5, x6, x7 -I4: LW x5, 8(x6) -``` - -

- -

- -#### Load-Use Stall - -When read-after-write (RAW) dependency happens, we need to stall the instruction in decode stage until the instruction in execution stage reaches the memory stage. - -``` -I1: ADD x3, x4, x5 -I2: LW x6, 8(x5) -I3: MV x1, x6 -``` - -* `I2` is reading the memory address `8(x5)`, then it will write the result to `x6`. -* `I3` needs to read the data in `x6`. -* The `x6` can only be updated when `I2` reaches the memory stage. -* We need to stall `I3` at the decode stage until `I2` reaches the memory stage (Adding a bubble between `I2` and `I3`). -* After `I2` gets the result from memory response, `I3` can be decoded with [data bypassing](#data-bypassing-in-decode-stage). - -

- -

- -#### Data Cache Miss - -If there is a data cache miss, the processor must go to the lower memory hierarchy to search the data, which will take multiple cycles. -* The instruction in execution stage needs to be stalled, since the memory stage is taking multiple cycles to get the memory response. -* The instruction in decode stage needs to be stalled for 2 reasons: - * It is a pipelined design and the execution stage is stalled. - * Data could be [bypassed](#data-bypassing-in-decode-stage) from the memory stage. -* The instruction in the fetch stage will be stalled since the `false` ready signal from the memory module will go all the way down to the fetch stage's egress resolver. (Minseong please have a look about this sentence) - -``` -I1: LW x5, 8(x3) -I2: ADD x3, x4, x6 -I3: MV x7, x3 -``` - -

- -

- -#### Data Bypassing in Decode Stage - -We must know certain registers' values in the decode stage then we can pass the instruction to the later stages for other processing. -The most straightforward method is to read the registers' value from the register file. -However, we do not need to always wait for the older instructions to reach the last write-back stage where writing the result to the register file happens. - -* Data bypassing from execution stage: - ``` - I1: ADD x3, x4, x5 - I2: LW x6, 0(x3) - ``` - * When `I1` is in the execution stage, `I2` is still in decode stage. - * The execution stage can bypass the result of `x3` to `I2` in the decode stage. - -

- -

- -* Data bypassing from the memory stage: - ``` - I1: LW x3, 0(x4) - I2: LW x6, 0(x3) - ``` - * When `I1` is in the execution stage, `I2` is still in decode stage, and [load-use stall](#load-use-stall) will happen. - * When `I1` reaches the memory stage, `I2` is still in the decode stage. - * When `I1` gets the value of `x3` from memory, the memory stage can bypass the value of `x3` to the `I2` in decode stage. - -

- -

- -* Data bypassing from the write-back stage: - ``` - I1: LW x3, 0(x4) - I2: ADD x5, x6, x7 - I3: SUB x8, x9, x1 - I4: LW x6, 0(x3) - ``` - * When `I1` reaches write-back stage, `I4` is in decode stage. - * Write-back stage can bypass the value of `x3` to `I4` in decode stage. - -

- -

- -#### Decode Stall by CSR - -The CSR could write its response to certain registers. If we need to decode the value of those registers in the decode stage, then we need to stall the instruction in the decode stage until the response coming back from the CSR in the memory stage. - -``` -I1: li t1, 2 -I2: csrr t0, mcause -I3: bne t1, t0, 0x80000210 -``` - -* The branch instruction `I3` needs to read the value of registers `t0` and `t1` in the decode stage. -* The value of `t0` is coming from the CSR, which is located in the memory stage. -* The branch instruction need to be stalled when the CSR instruction is in the execute stage. -* The value of `t0` will be bypassed from memory stage. - -

- -

- -* At cycle 4, the execution resolver stalls the payload in decode stage. -* At cycle 5, the value of `t0` will be bypassed to the decode stage from memory stage. - -## Specification - -In this section, we will explain the specification of each stage. - -### Fetch Stage - -The fetch stage must calculate the next program counter `pc` and drop the invalid `pc`. -* The egress payload of this stage is the next instruction's data and address from the instruction memory's response `mem_resp`. -* The egress resolver of this stage indicates if the current `pc` should be killed and the program counter selector `pc_sel`. -* This is the first stage, there is no ingress interface. - -

- -

- -**Calculate the Next Program Counter** - -* We can get the program counter selector `pc_sel` from the later stages as part of the egress resolver to the fetch stage. -* We use the current `pc` and `pc_sel` to calculate the next `pc`. -* The `pc_sel` specifies the next `pc` in 3 cases. - * The current `pc` + 4. - * A specific target. - * Stays the same as the current `pc`. - -

- -

- -**Store The Current PC and Extract The Invalid PC** - -* We need to store the current `pc` for two reasons: - * We must pass the current `pc` as a resolver to previous combinators for calculating the next `pc`. - * We want to extract the current `pc` from the register and drop the payload if certain hazards happen. -* Whether the current `pc` should be killed will be passed to the fetch stage as an egress resolver. - -

- -

- -**Request Instruction Memory and Discard Invalid PC Response** - -* We construct instruction memory request from the `pc`. -* The instruction memory is provided as a black box module, we can use `comb` to attach this module. -* We can assume that the `imem` can provide the response in the same cycle. -* We filter out the response if certain hazards happen and send it out as the egress payload of the fetch stage. - -

- -

- -### Decode Stage - -The decode stage decodes the instruction data from the fetch stage, -calculates the payload passing to the execution stage, -and also calculates the resolver to the fetch stage containing program counter selector `pc_sel`, -and information indicating if the current `pc` should be killed `if_kill`. - -* The ingress payload is the instruction memory response containing the instruction's data and address `mem_resp`. -* The ingress resolver is `pc_sel` and `if_kill`. -* The egress payload contains the information of the decoded instruction `DecEP`. -* The egress resolver contains the resolvers from later stage. - -

- -

- -**Calculate Ingress Resolver for Fetch Stage** - -* Calculate the resolver from decode stage to fetch stage. -* Resolvers from later stages will be used here to calculate `pc_sel` and `if_kill` - -

- -

- -**Store The Instruction Memory Response and Decode The Instruction** - -* We store the `imem` response into the latch. -* We decode the current `imem` response and calculate the instruction. - -

- -

- -**Stall the Payload and Pass Back The Instruction** - -* We need to stall the payload if certain certain hazards happen. -* We need to pass back the decoded instruction to previous combinators for calculating resolver. - -

- -

- -**Calculate the Egress Payload for Execution Stage** - -* Calculate the payload for execution stage. -* Drop the payload if certain hazards happen. - -

- -

- -### Execution Stage - -The execution stage executes instruction from the decode stage, -calculates the payload passing to the memory stage, -and also calculates the resolver passing to the decode stage. - -* The ingress payload is decode stage's egress payload `DecEP`. -* The ingress resolver is the resolvers from execute stage and later stages `(exe_r, mem_r, wb_r)`. -* The egress payload should contain necessary information for the memory stage `exe_ep`. -* The egress resolver contains the resolver from memory stage and write-back stage `(mem_r, wb_r)`. - -

- -

- -**Calculate Ingress Resolver for Decode Stage** - -* Calculate the execution stage resolver and pass it with `mem_r` and `wb_r` to the decode stage. - -

- -

- -**Store the Decode Stage Egress Payload** - -* Store the `dec_ep` into register for passing back to previous combinators for calculating `exe_r`. - -

- -

- -**Execute The Instruction** - -* Execute the instruction and pass the `alu_out` to the next combinator. - -

- -

- -**Stall The Payload and Pass Back The Result of ALU** - -* We need to stall the payload if certain certain hazards happen. -* We need to pass back the result of ALU `alu_out` to previous combinators for calculating resolver. - -

- -

- -**Calculate The Payload For Memory Stage** - -* Calculate the payload for the memory stage. -* Drop the payload if certain hazards happen. - -

- -

- -### Memory Stage - -The memory stage sends the requests to the memory `dmem` module and the CSR module according to the instruction types. -When the memory stage gets the response from `dmem` or CSR, it will calculate the egress payload of the memory stage and send the payload to the write-back stage. -Also, the memory stage will calculate its resolver from the response from `dmem` and CSR and send it back to the execution stage. - -* The ingress payload is execution stage's egress payload `ExeEP`. -* The ingress resolver is the resolvers from memory stage and the write-back stages `(mem_r, wb_r)`. -* The egress payload should contain necessary information for the write-back stage `mem_ep`. -* The egress resolver contains the resolver from the write-back stage `wb_r`. - -**Store The Execution Stage Egress Payload and Filter out Unnecessary Information** - -* Store the `exe_ep` into the latch to create a pipeline -* Filter out the unnecessary information and pass the resolver to execution stage. - -

- -

- -**Calculate The Resolver For Execution Stage** - -* Clear up the resolver from later combinators. -* Calculate the resolver for the execution stage. - -

- -

- -**Split The Ingress Interface For Different Memory Request** - -* We split the one ingress interface into three egress interfaces for requesting different module (memory or CSR). -* We need to calculate the branch selector for splitting interface. -* We only select one of the egress interfaces to transfer the payload, also combines all the egress interfaces' resolvers into the ingress resolver. - -Calculate the branch selector: - -

- -

- -Split the ingress interface into three egress interfaces - -

- -

- -**Calculate The Memory Request and Request The Memory Module** - -* The memory module's egress resolver ready signal is `false` until the memory module gets the data from memory. -* The memory module might take multiple cycles to find the desired data if data cache miss happens. -* If data cache miss happens, certain instructions in other stages need to be stalled. - -

- -

- -**Calculate The CSR Request and Request The CSR Module** - -* In RISC-V, handling exceptions and interrupts involves setting up the exception vectors and managing the states when exceptions occur. Here are relevant instructions and registers: -* CSR provides the exception and interrupt handler `evec`. -* CSR provides the state of the program to continue execution after handling the exception `eret`. - -

- -

- -**Pass Back The Execution Egress Payload To Earlier Combinators** - -* The earlier combinator needs the egress payload `exe_ep` for calculating the memory stage's resolver. -* We pass the `exe_ep` back in each branch. - -

- -

- -**Formatting The Ingress Interfaces For Merge** - -* To merge the three branches, we need to format the ingress interfaces. -* For the fields which is not in the ingress payload, we simply set it as `None`. - -

- -

- -**Merge The Branches** - -* This combinator will select one from the ingress interfaces to deliver the ingress payload to the egress payload and also leave the inner of the egress resolver untouched to the ingress interfaces. - -

- -

- -**Calculate The Egress Payload of Memory Stage** - -* Calculate the egress payload of memory stage and pass it to the write-back stage - -

- -

- -### Writeback Stage - -This is the last stage of the pipelined design. -In this stage we need to write the data back to the register file. - -* The ingress payload is the egress payload of the memory stage `mem_ep`. -* The ingress resolver is the resolver from write-back stage itself `wb_r`. -* This is the last stage of the pipelined design, there is no egress interface. - -**Calculate The Write-Back Stage Resolver** - -The resolver contains: -* The write-back register used in the decode stage data bypassing. -* The whole register file used in the decode stage to decode operands. -* The retire flag for writing back to the CSR module. - -

- -

- -**Store The Execution Stage Egress Payload Pass Back For Calculating** - -* Store the execution stage egress payload to create a latch. -* Pass the execution stage egress payload to previous combinator for calculating the write-back stage payload. - -

- -

- -**Register File** - -* We update the register file every clock cycle. -* Send back the whole register file as a resolver for the decode stage and retire flag for CSR module. - -

- -

diff --git a/doc/docs/examples/cpu/decode.md b/doc/docs/examples/cpu/decode.md new file mode 100644 index 0000000..b466c59 --- /dev/null +++ b/doc/docs/examples/cpu/decode.md @@ -0,0 +1,116 @@ +# Decode stage + +The decode stage mainly do the following things: + +1. Decodes the instruction. +2. Reads the value of source registers. + +It can be decomposed into combinators as follows ([code](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/cpu/decode.rs)): + +

+ +

+ +## Input and Output + +The IO interface type of the decode stage is as follows: + +### Ingress + +It takes an ingress interface with type `I, { Dep::Demanding }>`. + +You can check the explanation of `FetEP` and `DecR` in [here](fetch.md#egress). + +### Egress + +It returns an egress interface with type `I, { Dep::Demanding }>`. + +Each of `DecEP` and `ExeR` is defined as a struct with the following fields: + +**DecEP** (in [decode.rs](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/cpu/decode.rs)): + +- `wb_info`: Writeback information which contains the writeback address and selector. +- `br_info`: Branch information which contains the branch type, target address' base and offset. +- `alu_input`: ALU input. +- `mem_info`: Memory information. +- `csr_info`: CSR information. +- `is_illegal`: Indicates that the instruction is illegal or not. +- `pc`: PC. +- `debug_inst`: Instruction (for debugging purpose). + +**ExeR** (in [exe.rs](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/cpu/exe.rs)): + +- `bypass_from_exe`: Bypassed data from the execute stage. +- `bypass_from_mem`: Bypassed data from the memory stage. +- `bypass_from_wb`: Bypassed data from the writeback stage. +- `stall`: Destination register address of load or CSR instruction in the execute stage. +- `redirect`: Redirection PC. +- `rf`: Register file. + +## Behavior + +Each combinator do the following things: + +**M0** ([`map_resolver_inner`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map_resolver_inner)): + +- Constructs the ingress resolver of the decode stage. + +**M1** ([`reg_fwd`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.reg_fwd)): + +- Creates a pipelined stage before decoding the instruction. +- Sends a ready signal which indicates it will be free in the next cycle. + +**M2** ([`map`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map-1)): + +- Decodes the instruction. + +**M3** ([`map_resolver_block`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map_resolver_block)): + +- Stalls until the value of source registers are visible. + +**M4** ([`filter_map_drop_with_r`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.filter_map_drop_with_r)): + +- Reads the value of source registers and attaches them to the payload. +- Filters out the payload when the redirection happens. + + diff --git a/doc/docs/examples/cpu/exe.md b/doc/docs/examples/cpu/exe.md new file mode 100644 index 0000000..b0a78a1 --- /dev/null +++ b/doc/docs/examples/cpu/exe.md @@ -0,0 +1,127 @@ +# Execute stage + +The execute stage mainly do the following things: + +1. Executes the ALU. +2. Resolves the branch misprediction. + +It can be decomposed into combinators as follows ([code](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/cpu/exe.rs)): + +

+ +

+ +## Input and Output + +The IO interface type of the execute stage is as follows: + +### Ingress + +It takes an ingress interface with type `I, { Dep::Demanding }>`. + +You can check the explanation of `DecEP` and `ExeR` in [here](decode.md#egress). + +### Egress + +It returns an egress interface with type `I, { Dep::Demanding }>`. + +Each of `ExeEP` and `MemR` is defined as a struct with the following fields: + +**ExeEP** (in [exe.rs](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/cpu/exe.rs)): + +- `wb_info`: Writeback information which contains the writeback address and selector. +- `alu_out`: ALU output. +- `mem_info`: Memory information. +- `csr_info`: CSR information. +- `is_illegal`: Indicates that the instruction is illegal or not. +- `pc`: PC. +- `debug_inst`: Instruction (for debugging purpose). + +**MemR** (in [mem.rs](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/cpu/mem.rs)): + +- `bypass_from_mem`: Bypassed data from the memory stage. +- `bypass_from_wb`: Bypassed data from the writeback stage. +- `redirect`: Redirection PC. +- `rf`: Register file. + +## Behavior + +Each combinator do the following things: + +**M0** ([`map_resolver_inner`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map_resolver_inner)): + +- Resolves the branch misprediction based on the branch type and ALU output. +- Constructs the ingress resolver of the execute stage. + + Attaches the bypassed data, stall, and redirection PC for resolving data hazards. + +**M1** ([`reg_fwd`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.reg_fwd)): + +- Creates a pipelined stage before executing the ALU. +- Sends a ready signal which indicates it will be free in the next cycle. + +**M2** ([`map`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map-1)): + +- Executes the ALU. + +**M3** ([`map_resolver_block_with_p`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map_resolver_block_with_p)): + +- Attaches the ALU output to the resolver signal for the redirection PC calculation. +- Stalls until the data hazards have been resolved. + +**M4** ([`filter_map_drop_with_r_inner`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.filter_map_drop_with_r_inner)): + +- Attaches the ALU output to the payload. +- Filters out the payload when the redirection happens. + + diff --git a/doc/docs/examples/cpu/fetch.md b/doc/docs/examples/cpu/fetch.md new file mode 100644 index 0000000..b088340 --- /dev/null +++ b/doc/docs/examples/cpu/fetch.md @@ -0,0 +1,84 @@ +# Fetch stage + +The fetch stage mainly do the following things: + +1. Calculates the next PC for the upcoming instruction. +2. Accesses IMEM to retrieve the instruction bytecode. + +It can be decomposed into combinators as follows ([code](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/cpu/fetch.rs)): + +

+ +

+ +## Input and Output + +The IO interface type of the fetch stage is as follows: + +### Ingress + +This is the first stage, it does not take any ingress interface. + +### Egress + +It returns an egress interface with type `I, { Dep::Demanding }>`. + +Each of `FetEP` and `DecR` is defined as a struct with the following fields: + +**FetEP** (in [fetch.rs](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/cpu/fetch.rs)): + +- `imem_resp`: IMEM response which contains the address (PC) and the data (inst bytecode). + +**DecR** (in [decode.rs](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/cpu/decode.rs)): + +- `redirect`: Represents the redirection PC when the control hazard occurs. + +## Behavior + +Each combinator do the following things: + +**M0** ([`source_drop`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.source_drop)): + +- Forwards the current IMEM response and the redirection PC from the resolver to the payload. + +**M1** ([`filter_map`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.filter_map-1)): + +- Calculates the next PC based on the incoming payload. + + If the redirection PC exists, go to it. + + Otherwise, go to the next sequential address (PC + 4). + +**M2** ([`reg_fwd_with_init`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.reg_fwd_with_init)): + +- Creates a pipelined stage before accessing IMEM by storing the next PC in a register. +- When the circuit is reset, it is initialized with the designated start address (`START_ADDR`). + +**M3** ([`map`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map-1) + [`comb`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/interface/trait.Interface.html#method.comb) + [`map`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map-1)): + +- Constructs the IMEM request with `map` combinator. +- Accesses the external IMEM module to fetch the instruction bytecode with `comb` combinator. + + We use an asynchronous memory for memory, it provide the response in the same cycle. + + We used [`attach_resolver`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/valid_ready/fn.attach_resolver.html) module combinator to attach additional resolver to the IMEM. +- Deconstructs the IMEM response with `map` combinator. + +**M4** ([`map_resolver_drop_with_p`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map_resolver_drop_with_p)): + +- Attaches the IMEM response to the resolver signal for the next PC calculation. +- Turns on the ready signal when control hazard occurs to extract the payload from **M2**. + +**M5** ([`filter_map_drop_with_r_inner`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.filter_map_drop_with_r_inner)): + +- Filters out the payload when the redirection happens. + + diff --git a/doc/docs/examples/cpu/hazard.md b/doc/docs/examples/cpu/hazard.md new file mode 100644 index 0000000..ce366eb --- /dev/null +++ b/doc/docs/examples/cpu/hazard.md @@ -0,0 +1,218 @@ +# Hazards + +In this section, we will explain the hazards that can occur in our baseline CPU. + +The hazards could be classified into: *[structural](#structural-hazards)*, *[data](#data-hazards)*, and *[control](#control-hazards)*. + +## Structural Hazards + +Structural hazard occurs when multiple pipeline stages need to access the same hardware resource such as memory, register file, and arithmetic logic unit (ALU) at the same clock cycle. +It should be resolved by *stalling* the stage which trying to access later: + +### Solution: Stall + +```armasm +I1: sub x4, x3, x2 # read from x2 requires several cycle in the decode stage +I2: add x4, x3, x2 # need to wait in the fetch stage +``` + +

+ +

+ +In the example above, suppose that `I1` has to wait several cycles in the decode stage to read the value of `x2` (you can find the example [here](#from-the-load-instruction)). + +As a result, structural hazard occurs at cycle T+1 because `I2` is ready to move to the decode stage while `I1` still needs to occupy the decode stage. +To resolve this hazard, the decode stage asks `I2` to stall at cycle T+1. +It is usually done by turning off the ready bit in the [valid-ready protocol](../../lang/interface.md#motivation-handshake). + +## Data Hazards + +Data hazard occurs when the processing of a pipeline stage depends on the result of later stages. +It should be resolved by *stalling* the stage if its data dependency is not made available yet; +or *bypassing* the necessary data from later stages in the same clock cycle. + +For instance, a data hazard due to read-after-write dependency in CPU core is resolved either by stall the read instruction in the decode stage or by bypassing the result of the write instruction in the later stages to the read instruction in the decode stage. + +### Solution (1/2): Bypassing + +In the decode stage, we need to read the value of source registers. +When the RAW dependency happens, we can *bypass* the values from the later stages to the decode stage. + +#### From the execute stage: + +```armasm +I1: add x3, x2, x1 # write to x3 +I2: sub x5, x4, x3 # read from x3 +``` + +

+ +

+ +At cycle T+2, `I1` can bypass the value of `x3` to the `I2`. + +#### From the memory stage: + +```armasm +I1: add x3, x2, x1 # write to x3 +I2: nop +I3: sub x5, x4, x3 # read from x3 +``` + +

+ +

+ +At cycle T+3, `I1` can bypass the value of `x3` to the `I3`. + + + +#### From the writeback stage: + +```armasm +I1: add x3, x2, x1 # write to x3 +I2: nop +I3: nop +I4: sub x5, x4, x3 # read from x3 +``` + +

+ +

+ +At cycle T+4, `I1` can bypass the value of `x3` to the `I4`. + + + +### Solution (2/2): Stall + +When data from a later stage is not yet ready, bypassing is not possible, and a stall becomes necessary. +In our baseline CPU, there are two sources of stall: load instruction, and CSR instruction. + +#### From the load instruction: + +```armasm +I1: lw x2, 0(x1) # write to x2 +I2: sub x4, x3, x2 # read from x2 +``` + +

+ +

+ +- At cycle T+2, `I2` need to be stalled at the decode stage because `I1` did not reach the memory. +- At cycle T+3, `I1` gets the value of `x2` from the memory, now can bypass the value to the `I2`. + + + +#### From the CSR instruction: + + + +```armasm +I1: csrr x2, mcause # write to x2 +I2: bgez x2, 8000003c # read from x2 +``` + +

+ +

+ +In our baseline CPU, the CSR is located in the memory stage. + +- At T+2, `I2` need to be stalled at the decode stage becuase `I1` did not reach the CSR. +- At T+3, `I1` gets the value of `x2` from the CSR, now can bypass the value to the `I2`. + + + +## Control Hazards + +Control hazard occurs when a pipeline stage makes wrong predictions on which instructions to execute in the next clock cycle. +It should be resolved, e.g., when the execute stage detects a misprediction, by *discarding* the mispredicted instructions at the fetch and decode stages and *restarting* from the correct next instruction. + +### Solution: Discarding and Restarting + +In our baseline CPU, there are two sources of control hazard: branch misprediction and exception. + +#### Branch Misprediction + +```armasm +I1: beq x2, x1, target # mispredicted to not taken +I2: add x5, x4, x3 # should be killed +I3: lw x5, 0(x4) # should be killed + +target: +I4: sub x5, x4, x3 # correct target address +``` + +

+ +

+ +At cycle T+1, the fetch stage speculates that `I1`'s next instruction is `I2` so that it is fetched from the instruction memory. +But at cycle T+2, the execute stage deduces that `I1`'s next instruction is in fact `I4`. +As such, the mispredicted instructions `I2` and `I3` are discarded at cycle T+2, and the fetch stage is restarted with the correct next instruction `I4` at cycle T+3. + +#### Exception + +```armasm +I1: unimp # illegal instruction, redirect to trap vector +I2: add x4, x3, x2 # should be killed +I3: sub x4, x3, x2 # should be killed +I4: lw x4, 0(x3) # should be killed + +trap_vector: +I5: csrr x5, mcause # trap handling logic +``` + +

+ +

+ +The illegal instruction `I1` generates the exception and should be redirected to the trap vector to handle the exception. +At cycle T+3, the illegal instruction `I1` reaches the CSR in the memory stage, and it returns the trap vector address. +As such, the mispredicted instructions `I2`, `I3`, and `I4` are discarded at cycle T+3, and the fetch stage is restarted with the correct next instruction `I5` at cycle T+4. + + diff --git a/doc/docs/examples/cpu/implementation.md b/doc/docs/examples/cpu/implementation.md new file mode 100644 index 0000000..b11eae9 --- /dev/null +++ b/doc/docs/examples/cpu/implementation.md @@ -0,0 +1,91 @@ +# Implementation + +With the existence of hazards, the 5-stage pipelined CPU core can be decomposed as follows: + +

+ +

+ +with the following implementation ([riscv32_5stage.rs](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/cpu/riscv32_5stage.rs)): + +```rust,noplayground +const START_ADDR: u32 = 0x80000000; + +fn core( + imem: impl FnOnce(Vr) -> Vr, + dmem: impl FnOnce(Vr) -> Vr, +) { + fetch::(imem) + .comb(decode) + .comb(exe) + .comb(move |i| mem(i, dmem)) + .comb(wb) +} +``` + +- `imem` and `dmem` are modules for instruction memory and data memory, respectively. +- We chain the 5 sub-modules `fetch`, `decode`, `exe`, `mem`, and `wb` by using the `comb` method. + +In the following subsections, we will explain the implementation details for each stage. + + diff --git a/doc/docs/examples/cpu/mem.md b/doc/docs/examples/cpu/mem.md new file mode 100644 index 0000000..cb1fef8 --- /dev/null +++ b/doc/docs/examples/cpu/mem.md @@ -0,0 +1,177 @@ +# Memory stage + +The memory stage mainly do the following things: + +1. Accesses memory for load/store instructions. +2. Accesses CSR for illegal instruction and CSR instructions. + +It can be decomposed into combinators as follows ([code](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/cpu/mem.rs)): + +

+ +

+ +## Input and Output + +The IO interface type of the memory stage is as follows: + +### Ingress + +It takes an ingress interface with type `I, { Dep::Demanding }>`. + +You can check the explanation of `ExeEP` and `MemR` in [here](exe.md#egress). + +### Egress + +It returns an egress interface with type `I, { Dep::Demanding }>`. + +Each of `MemEP` and `WbR` is defined as a struct with the following fields: + +**MemEP** (in [mem.rs](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/cpu/mem.rs)): + +- `wb_info`: Writeback information which contains the writeback address and data. +- `debug_pc`: PC (for debugging purpose). +- `debug_inst`: Instruction (for debugging purpose). + +**WbR** (in [wb.rs](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/cpu/wb.rs)): + +- `bypass_from_wb`: Bypassed data from the writeback stage. +- `rf`: Register file. + +## Behavior + +Each combinator do the following things: + +**M0** ([`map_resolver_inner`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map_resolver_inner)): + +- Constructs the ingress resolver of the memory stage. + + Attaches the bypassed data and redirection PC for resolving data hazards. + +**M1** ([`reg_fwd`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.reg_fwd)): + +- Creates a pipelined stage before accessing DMEM or CSR. +- Sends a ready signal which indicates it will be free in the next cycle. + +**M2** ([`map`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map-1) + [`branch`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.branch-14)): + +- Computes the branch selector with `map` combinator. +- Branches the interface into three for accessing different module (DMEM / CSR / None). + + +**M3** ([`map`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map-1) + [`comb`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/interface/trait.Interface.html#method.comb)): + +- Constructs DMEM request with `map` combinator. +- Accesses the external DMEM module with `comb` combinator. + + We use an asynchronous memory for memory, it provide the response in the same cycle. + + We used [`attach_resolver`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/valid_ready/fn.attach_resolver.html) and [`attach_payload`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/valid_ready/fn.attach_payload.html) to attach additional resolver/payload to the DMEM. + +**M4** ([`map_resolver_with_p`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map_resolver_with_p-1) + [`map`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map-1)): + +- Attaches the DMEM response to the resolver signal for the bypassing data calculation. +- Constructs the memory stage egress payload with `map` combinator. + +**M5** ([`map`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map-1) + [`comb`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/interface/trait.Interface.html#method.comb)): + +- Constructs CSR request with `map` combinator. +- Accesses the CSR module with `comb` combinator. + + It provide the response in the same cycle. + +**M6** ([`map_resolver_with_p`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map_resolver_with_p-1) + [`map`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map-1)): + +- Attaches the CSR response to the resolver signal for the bypassing data calculation. + + It contains the redirection PC when exception happens. +- Constructs the memory stage egress payload with `map` combinator. + +**M7** ([`map_resolver_with_p`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map_resolver_with_p-1) + [`map`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map-1)): + +- Directly attaches the payload to the resolver signal bypassing data calculation. +- Constructs the memory stage egress payload with `map` combinator. + +**M8** ([`merge`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/combinators/merge/trait.MergeExt.html#impl-MergeExt%3CN,+%7B+Dep::Demanding+%7D%3E-for-%5BI%3CAndH%3CH%3E,+D%3E;+N%5D)): + +- Selects one of transferrable egress interface of **M4** (DMEM), **M6** (CSR), and **M7** (None). + + It is guaranteed to be processed in-order manner because the maximum concurrent instruction in the memory stage is limited to one. + + diff --git a/doc/docs/examples/cpu/overview.md b/doc/docs/examples/cpu/overview.md new file mode 100644 index 0000000..dcd4238 --- /dev/null +++ b/doc/docs/examples/cpu/overview.md @@ -0,0 +1,110 @@ +# CPU Core (5-Stage Pipelined) + +Our baseline CPU implements the RV32I+Zicsr ISA using a 5-stage pipelined design. +It is based on the [riscv-sodor](https://github.com/ucb-bar/riscv-sodor) processor developed by Berkeley, with slight modifications. + +## Overview: Pipelined Design + +The 5-stage pipelined CPU core would ideally be decomposed as follows: + +

+ +

+ +At a high level, each stage performs the following tasks: + +- **Fetch**: Computes the next PC and fetches the instruction bytecode from memory. + +- **Decode**: Decodes the bytecode and reads the value of source registers. + +- **Execute**: Performs arithmetic and bitwise operations based on the instruction. + +- **Memory**: Accesses memory for load/store instructions. + +- **Writeback**: Writes the result back to the destination register. + +### Example: `lw` instruction + +Let's consider the following program with next PC was computed as `0x8000012c` in the fetch stage. + +By the previous `auipc` and `addi` instructions, the value of `x3` becomes `0x80002000`, which is the start address of the data section (we omit the details of `auipc` and `addi` instructions here). + +```text +Disassembly of section .text: + +... +80000124: 00002197 auipc x3, 0x2 # x3 <- 80002124 +80000128: edc18193 addi x3, x3, -292 # x3 <- 80002000 +8000012c: 0040a703 lw x4, 4(x3) # x4 <- mem[x3+4] +... + +Disassembly of section .data: + +... +80002004: deadbeef +... +``` + +Then the `lw` instruction will be processed in each stage as follows: + +- **Fetch**: Accesses memory with the next PC `0x8000012c` and fetch the bytecode `0x0040a703`. + +- **Decode**: + + + Decodes the bytecode `0x0040a703`. It contains the operations in the later stages: + + + In the execute stage, computes the memory access address `x3 + 4`. + + In the memory stage, loads data from the memory with the computed address. + + In the writeback stage, writes the load data to the destination register `x4`. + + + Reads the value of the source register `x3` (= `0x80002000`). + +- **Execute**: Computes the memory access address `x3 + 4` (= `0x80002004`). + +- **Memory**: Accesses the memory with address `0x80002004` and get the data `0xdeadbeef`. + +- **Writeback**: It writes the data `0xdeadbeef` to the `x4`. + +--- + +It is quite intuitive, but it becomes more complex due to hazards between pipeline stages. + +We will look deeper into hazards and their handling in the following subsections. + + + + diff --git a/doc/docs/examples/cpu/wb.md b/doc/docs/examples/cpu/wb.md new file mode 100644 index 0000000..f0b517b --- /dev/null +++ b/doc/docs/examples/cpu/wb.md @@ -0,0 +1,58 @@ +# Writeback stage + +The writeback stage mainly do the following things: + +1. Write the result back to the destination register. + +It can be decomposed into combinators as follows ([code](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/cpu/wb.rs)): + +

+ +

+ +## Input and Output + +The IO interface type of the writeback stage is as follows: + +### Ingress + +It takes an ingress interface with type `I, { Dep::Demanding }>`. + +You can check the explanation of `MemEP` and `WbR` in [here](mem.md#egress). + +### Egress + +This is the last stage, it does not return any egress interface. + +## Behavior + +Each combinator do the following things: + +**M0** ([`map_resolver_inner`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.map_resolver_inner)): + +- Constructs the ingress resolver of the writeback stage. + + Attaches the bypassed data and register file for resolving data hazards. + +**M1** ([`reg_fwd`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.reg_fwd)): + +- Creates a pipelined stage before accessing regfile. +- Sends a ready signal which indicates it will be free in the next cycle. + +**M2** ([`sink_fsm_map`](https://kaist-cp.github.io/hazardflow/docs/hazardflow_designs/std/hazard/struct.I.html#method.sink_fsm_map)): + +- Updates the register file. +- Attaches the register file to the resolver for reading value of source registers. + + diff --git a/doc/docs/figure/branch_mis.drawio.svg b/doc/docs/figure/branch_mis.drawio.svg deleted file mode 100644 index 98936bb..0000000 --- a/doc/docs/figure/branch_mis.drawio.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
Decode
Execute
Fetch
Memory
Write Back
Cycle 1
Cycle 2
Cycle 3
I1
I1
I2
exe_r
I1
kill, pc_sel
I2
I3
Cycle 4
I4
I1
\ No newline at end of file diff --git a/doc/docs/figure/combinator-fifo-waveform.svg b/doc/docs/figure/combinator-fifo-waveform.svg new file mode 100644 index 0000000..0fa4817 --- /dev/null +++ b/doc/docs/figure/combinator-fifo-waveform.svg @@ -0,0 +1,4870 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 +1 +2 +3 +4 +5 +6 +7 + + + clk + + + + + + + + + + + + + + + + + + + + + + + + + valid + + + + + + + + + + + + + + + + + + + + + payload + + + + + + + + + + + + + + + + + + 0 + 1 + 2 + 3 + 4 + + + + ready + + + + + + + + + + + + + + + + + + + + + state + + + + + + + + + + + + + + + + + + [0] + [1] + [2,1] + [3,2,1] + [3,2] + [4,3] + + + + valid + + + + + + + + + + + + + + + + + + + + + payload + + + + + + + + + + + + + + + + + + 0 + 1 + 2 + 3 + + + + ready + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fwd + + + + + + bwd + + + + + + ingress + + + + + + fwd + + + + + + bwd + + + + + + egress + + + + + + diff --git a/doc/docs/figure/combinator-fifo.svg b/doc/docs/figure/combinator-fifo.svg new file mode 100644 index 0000000..d9e4dbb --- /dev/null +++ b/doc/docs/figure/combinator-fifo.svg @@ -0,0 +1,245 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/combinator-fsm-egress-waveform.svg b/doc/docs/figure/combinator-fsm-egress-waveform.svg new file mode 100644 index 0000000..5f89038 --- /dev/null +++ b/doc/docs/figure/combinator-fsm-egress-waveform.svg @@ -0,0 +1,4750 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 +1 +2 +3 +4 +5 + + + clk + + + + + + + + + + + + + + + + + + + + + valid + + + + + + + + + + + + + + + + + payload + + + + + + + + + + + + + + 0 + 1 + 2 + + + + ready + + + + + + + + + + + + + + + + + + + + + state + + + + + + + + + + + + + + (0, 0) + (0, 1) + (0, 2) + (1, 0) + (1, 1) + (1, 2) + + + + + + + + valid + + + + + + + + + + + + + + + + + payload + + + + + + + + + + + + + + 0 + 1 + 2 + 1 + 2 + 3 + + + + ready + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fwd + + + + + + bwd + + + + + + ingress + + + + + + fwd + + + + + + bwd + + + + + + egress + + + + + + diff --git a/doc/docs/figure/combinator-fsm-ingress-waveform.svg b/doc/docs/figure/combinator-fsm-ingress-waveform.svg new file mode 100644 index 0000000..4ba0940 --- /dev/null +++ b/doc/docs/figure/combinator-fsm-ingress-waveform.svg @@ -0,0 +1,4736 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 +1 +2 +3 +4 +5 + + + clk + + + + + + + + + + + + + + + + + + + + + valid + + + + + + + + + + + + + + + + + payload + + + + + + + + + + + + + + 3 + 9 + 5 + 6 + 2 + + + + ready + + + + + + + + + + + + + + + + + + + + + state + + + + + + + + + + + + + + 0, F + 3, F + 12, T + 0, F + 5, F + 11, T + + + + + + + + valid + + + + + + + + + + + + + + + + + payload + + + + + + + + + + + + + + 12 + 11 + + + + ready + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fwd + + + + + + bwd + + + + + + ingress + + + + + + fwd + + + + + + bwd + + + + + + egress + + + + + + diff --git a/doc/docs/figure/combinator-reg-fwd-waveform.svg b/doc/docs/figure/combinator-reg-fwd-waveform.svg new file mode 100644 index 0000000..5a0543a --- /dev/null +++ b/doc/docs/figure/combinator-reg-fwd-waveform.svg @@ -0,0 +1,4715 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 +1 +2 +3 +4 +5 + + + clk + + + + + + + + + + + + + + + + + + + + + valid + + + + + + + + + + + + + + + + + payload + + + + + + + + + + + + + + 11 + 12 + 13 + 14 + + + + ready + + + + + + + + + + + + + + + + + + + + + state + + + + + + + + + + + + + + 11 + 12 + 13 + + + + + + + + valid + + + + + + + + + + + + + + + + + payload + + + + + + + + + + + + + + 11 + 12 + 13 + + + + ready + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fwd + + + + + + bwd + + + + + + ingress + + + + + + fwd + + + + + + bwd + + + + + + egress + + + + + + diff --git a/doc/docs/figure/combinator-reg-fwd.svg b/doc/docs/figure/combinator-reg-fwd.svg new file mode 100644 index 0000000..c35722b --- /dev/null +++ b/doc/docs/figure/combinator-reg-fwd.svg @@ -0,0 +1,245 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/combinator-sink-waveform.svg b/doc/docs/figure/combinator-sink-waveform.svg new file mode 100644 index 0000000..fbc0c44 --- /dev/null +++ b/doc/docs/figure/combinator-sink-waveform.svg @@ -0,0 +1,4438 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 +1 +2 +3 +4 +5 + + + clk + + + + + + + + + + + + + + + + + + + + + valid + + + + + + + + + + + + + + + + + payload + + + + + + + + + + + + + + 0 + 1 + 2 + 3 + 4 + + + + ready + + + + + + + + + + + + + + + + + inner + + + + + + + + + + + + + + 0 + 1 + 2 + 3 + 4 + + + + + + + + + + + + + + + + + + fwd + + + + + + bwd + + + + + + ingress + + + + + + diff --git a/doc/docs/figure/combinator-sink.svg b/doc/docs/figure/combinator-sink.svg new file mode 100644 index 0000000..ae74286 --- /dev/null +++ b/doc/docs/figure/combinator-sink.svg @@ -0,0 +1,141 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/combinator-source-waveform.svg b/doc/docs/figure/combinator-source-waveform.svg new file mode 100644 index 0000000..cd3031c --- /dev/null +++ b/doc/docs/figure/combinator-source-waveform.svg @@ -0,0 +1,4438 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 +1 +2 +3 +4 +5 + + + clk + + + + + + + + + + + + + + + + + + + + + valid + + + + + + + + + + + + + + + + + payload + + + + + + + + + + + + + + 0 + 1 + 2 + 3 + 4 + + + + ready + + + + + + + + + + + + + + + + + inner + + + + + + + + + + + + + + 0 + 1 + 2 + 3 + 4 + + + + + + + + + + + + + + + + + + fwd + + + + + + bwd + + + + + + egress + + + + + + diff --git a/doc/docs/figure/combinator-source.svg b/doc/docs/figure/combinator-source.svg new file mode 100644 index 0000000..4422f70 --- /dev/null +++ b/doc/docs/figure/combinator-source.svg @@ -0,0 +1,141 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/cpu-hazard-control-bp.svg b/doc/docs/figure/cpu-hazard-control-bp.svg new file mode 100644 index 0000000..c1ca25c --- /dev/null +++ b/doc/docs/figure/cpu-hazard-control-bp.svg @@ -0,0 +1,754 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/cpu-hazard-control-exception.svg b/doc/docs/figure/cpu-hazard-control-exception.svg new file mode 100644 index 0000000..bf07e2d --- /dev/null +++ b/doc/docs/figure/cpu-hazard-control-exception.svg @@ -0,0 +1,828 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/cpu-hazard-data-bypassing-execute.svg b/doc/docs/figure/cpu-hazard-data-bypassing-execute.svg new file mode 100644 index 0000000..2306fc8 --- /dev/null +++ b/doc/docs/figure/cpu-hazard-data-bypassing-execute.svg @@ -0,0 +1,357 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/cpu-hazard-data-bypassing-memory.svg b/doc/docs/figure/cpu-hazard-data-bypassing-memory.svg new file mode 100644 index 0000000..6ee3624 --- /dev/null +++ b/doc/docs/figure/cpu-hazard-data-bypassing-memory.svg @@ -0,0 +1,461 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/cpu-hazard-data-bypassing-writeback.svg b/doc/docs/figure/cpu-hazard-data-bypassing-writeback.svg new file mode 100644 index 0000000..f5247ae --- /dev/null +++ b/doc/docs/figure/cpu-hazard-data-bypassing-writeback.svg @@ -0,0 +1,557 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/cpu-hazard-data-csr.svg b/doc/docs/figure/cpu-hazard-data-csr.svg new file mode 100644 index 0000000..cef926f --- /dev/null +++ b/doc/docs/figure/cpu-hazard-data-csr.svg @@ -0,0 +1,674 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/cpu-hazard-data-load-use.svg b/doc/docs/figure/cpu-hazard-data-load-use.svg new file mode 100644 index 0000000..caa187d --- /dev/null +++ b/doc/docs/figure/cpu-hazard-data-load-use.svg @@ -0,0 +1,465 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/cpu-hazard-structural.svg b/doc/docs/figure/cpu-hazard-structural.svg new file mode 100644 index 0000000..c9648f6 --- /dev/null +++ b/doc/docs/figure/cpu-hazard-structural.svg @@ -0,0 +1,525 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/cpu-implementation-decode.svg b/doc/docs/figure/cpu-implementation-decode.svg new file mode 100644 index 0000000..1ba88e6 --- /dev/null +++ b/doc/docs/figure/cpu-implementation-decode.svg @@ -0,0 +1,515 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/cpu-implementation-exe.svg b/doc/docs/figure/cpu-implementation-exe.svg new file mode 100644 index 0000000..5f252e0 --- /dev/null +++ b/doc/docs/figure/cpu-implementation-exe.svg @@ -0,0 +1,515 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/cpu-implementation-fetch.svg b/doc/docs/figure/cpu-implementation-fetch.svg new file mode 100644 index 0000000..debdf42 --- /dev/null +++ b/doc/docs/figure/cpu-implementation-fetch.svg @@ -0,0 +1,522 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/cpu-implementation-mem.svg b/doc/docs/figure/cpu-implementation-mem.svg new file mode 100644 index 0000000..4afb001 --- /dev/null +++ b/doc/docs/figure/cpu-implementation-mem.svg @@ -0,0 +1,907 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/cpu-implementation-wb.svg b/doc/docs/figure/cpu-implementation-wb.svg new file mode 100644 index 0000000..1f94de7 --- /dev/null +++ b/doc/docs/figure/cpu-implementation-wb.svg @@ -0,0 +1,303 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/cpu-structure-ideal.svg b/doc/docs/figure/cpu-structure-ideal.svg new file mode 100644 index 0000000..ea0c8f4 --- /dev/null +++ b/doc/docs/figure/cpu-structure-ideal.svg @@ -0,0 +1,295 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/cpu-structure.svg b/doc/docs/figure/cpu-structure.svg new file mode 100644 index 0000000..01d57c8 --- /dev/null +++ b/doc/docs/figure/cpu-structure.svg @@ -0,0 +1,511 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/dcache_miss.drawio.svg b/doc/docs/figure/dcache_miss.drawio.svg deleted file mode 100644 index 8a401bf..0000000 --- a/doc/docs/figure/dcache_miss.drawio.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
Decode
Execute
Fetch
Memory
Write Back
Cycle 1
Cycle 2
Cycle 3
I1
I1
I2
I1
I2
I3
Cycle 4
I4
mem_r
I1
mem_r
I2
I3
Cycle 5
I4
mem_r
I1
I3
mem_r
I2
Cycle 6
I4
mem_r
I1
I3
mem_r
I2
Cycle 7
I5
I2
I4
I3
I1
\ No newline at end of file diff --git a/doc/docs/figure/dcahce_miss.drawio.svg b/doc/docs/figure/dcahce_miss.drawio.svg deleted file mode 100644 index 54197ba..0000000 --- a/doc/docs/figure/dcahce_miss.drawio.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
Decode
Execute
Fetch
Memory
Write Back
Cycle 1
Cycle 2
Cycle 3
I1
I1
I2
I1
I2
I3
Cycle 4
I4
I1
I2
I3
Cycle 5
I4
mem_r
I1
I3
mem_r
I2
Cycle 6
I4
mem_r
I1
I3
mem_r
I2
Cycle 7
I5
I2
I4
I3
I1
\ No newline at end of file diff --git a/doc/docs/figure/fencei.drawio.svg b/doc/docs/figure/fencei.drawio.svg deleted file mode 100644 index 8d290ca..0000000 --- a/doc/docs/figure/fencei.drawio.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
Decode
Execute
Fetch
Memory
Write Back
Cycle 1
Cycle 2
Cycle 3
I1
I1
I2
I1
kill, pc_sel
I2
I3
Cycle 4
I3
I1
exe_r
I2
kill, pc_sel
Cycle 5
I3
I2
I1
\ No newline at end of file diff --git a/doc/docs/figure/hazard.svg b/doc/docs/figure/hazard.svg new file mode 100644 index 0000000..e874341 --- /dev/null +++ b/doc/docs/figure/hazard.svg @@ -0,0 +1,276 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/docs/figure/interface.drawio.svg b/doc/docs/figure/interface.drawio.svg deleted file mode 100644 index f95af91..0000000 --- a/doc/docs/figure/interface.drawio.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
Interface
Fwd: Opt<H::P>
Bwd: H::R
fn ready(p: Self::P, r: Self::R) -> bool
\ No newline at end of file diff --git a/doc/docs/figure/load_use_stall.drawio.svg b/doc/docs/figure/load_use_stall.drawio.svg deleted file mode 100644 index e6a5ef2..0000000 --- a/doc/docs/figure/load_use_stall.drawio.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
Decode
Execute
Fetch
Memory
Write Back
Cycle 1
Cycle 2
Cycle 3
I1
I1
I2
I1
I2
I3
Cycle 4
I4
I1
exe_r
I2
I3
Cycle 5
I4
I2
I3
I1
\ No newline at end of file diff --git a/doc/docs/figure/pipeline_kill.drawio.svg b/doc/docs/figure/pipeline_kill.drawio.svg deleted file mode 100644 index 5785086..0000000 --- a/doc/docs/figure/pipeline_kill.drawio.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
Decode
Execute
Fetch
Memory
Write Back
Cycle 1
Cycle 2
Cycle 3
I1
I1
I2
I1
I2
I3
Cycle 4
I4
mem_r
I1
(exe_r, mem_r)
I2
kill, pc_sel
I3
\ No newline at end of file diff --git a/doc/docs/lang/combinator.md b/doc/docs/lang/combinator.md index b7d628b..6b5ae7c 100644 --- a/doc/docs/lang/combinator.md +++ b/doc/docs/lang/combinator.md @@ -395,7 +395,7 @@ impl Vr<(P, BoundedU)> { * `self` is the ingress interface `Vr<(P, BoundedU)>`. * We can interpret the `BoundedU` as the selector to choose egress interfaces for transferring the ingress payload. -* When the selected egress interface ready signal is `true,`, and also the ingress payload is valid, both ingress transfer and egress transfer happen, else both ingress and egress do not happen. +* When the selected egress interface ready signal is `true`, and also the ingress payload is valid, both ingress transfer and egress transfer happen, else both ingress and egress do not happen. * Ingress payload will only be transferred to the selected egress interface when both ingress and egress transfer happen. * Ingress resolver and all the egress resolvers are `()`. @@ -581,7 +581,6 @@ impl MergeExt for (Vr

, Vr

) { } ``` -* `SelH` wraps `H` with additional selector bit in payload `P = (H::P, BoundedU)`. * This combinator will select the first ingress interface, whose ingress payload is valid, from the array of the ingress interfaces, when the egress interface is ready to receive the payload. The example cycle-level behavior of `merge` is as follows: @@ -655,16 +654,9 @@ Similar to other combinators, register style combinators have other variants too #### `reg_fwd` -This combinator could be a pipeline when the `pipe` parameter is `true`. -* The current state is the valid egress payload. -* The ingress interface is ready to receive a valid payload whenever the current register is empty or the egress transfer happens. -* If the ingress transfer happens, then the register stores the new valid payload as the next state. -* If the egress transfer happens, but the ingress transfer does not happen, then the register will be empty in the next cycle. -* If neither ingress transfer nor egress transfer happens, then the next state stays the same as the current state. -* The only difference between `pipe` is `true` or `false` is the ingress transfer happens only when the current register is empty, delaying one cycle compared to the pipeline. +We can use `reg_fwd` to reduce the clock cycle times. -We demonstrate how to use `reg_fwd` with the ingress interface `Vr

` to reduce the clock cycle times. -Let's assume we have a circuit: +Let's assume we have a circuit with ingress interface `Vr

`:

@@ -679,45 +671,171 @@ If we add a `reg_fwd` combinator between those two `map` combinators. The total clock times is reduced to `Max(lat(f1), lat(f2))`. +

+ +

+ +```rust,noplayground +impl Vr

{ + fn reg_fwd(self, pipe: bool) -> Vr

{ + self.fsm::, HOption

>(None, |ip, er, s| { + let ep = s; + let et = ep.is_some_and(|p| er.ready); + + let ir = if pipe { + Ready::new(s.is_none() || et, (er.inner, s)) + } else { + Ready::new(s.is_none(), (er.inner, s)) + }; + let it = ip.is_some_and(|p| ir.ready); + + let s_next = if it { + ip + } else if et { + None + } else { + s + }; + + (ep, ir, s_next) + }) + } +} +``` + +This combinator could be a pipeline when the `pipe` parameter is `true`. +* The current state is the valid egress payload. +* The ingress interface is ready to receive a valid payload whenever the current register is empty or the egress transfer happens. +* If the ingress transfer happens, then the register stores the new valid payload as the next state. +* If the egress transfer happens, but the ingress transfer does not happen, then the register will be empty in the next cycle. +* If neither ingress transfer nor egress transfer happens, then the next state stays the same as the current state. +* The only difference between `pipe` is `true` or `false` is the ingress transfer happens only when the current register is empty, delaying one cycle compared to the pipeline. + Let's assume the ingress interface type is `Vr` and we turn on the `pipe`. -The cycle level behavior of `reg_fwd`: -| cycle | ip | er | state (= ep) | ir | -| ----- | ---------- | --- | ------------ | --- | -| 0 | `Some(11)` | `T` | `None` | `T` | -| 1 | `Some(12)` | `T` | `Some(11)` | `T` | -| 2 | `Some(13)` | `F` | `Some(12)` | `F` | -| 3 | `Some(13)` | `T` | `Some(12)` | `T` | -| 4 | `Some(14)` | `T` | `Some(13)` | `T` | +The example cycle-level behavior of `reg_fwd` is as follows: + +

+ +

+ +- Cycle 0, 1, 3, 5: Ingress transfer happens. +- Cycle 1, 2, 5: Egress transfer happens. -- At cycle 0, 1, 3, 4, ingress transfer happens. -- At cycle 1, 3, 4, egress transfer happens. + #### fifo This combinator is a pipelined FIFO queue, it can accept a new element every cycle if the queue is not full. -Let's assume the ingress interface type is `Vr` and queue size is 3. -The cycle level behavior of `fifo`: +

+ +

+ +```rust,noplayground +impl Vr

{ + fn fifo(self) -> Vr

{ + self.fsm::, FifoS>(FifoS::default(), |ip, er, s| { + let FifoS { inner, raddr, waddr, len } = s; + + let empty = len == U::from(0); + let full = len == U::from(N); + + let enq = ip.is_some() && !full; + let deq = er.ready && !empty; + + let ep = if s.len == 0.into_u() { None } else { Some(s.inner[s.raddr]) }; + let ir = Ready::new(!full, ()); + + let inner_next = if enq { inner.set(waddr.resize::<{ clog2(N) }>(), ip.unwrap()) } else { inner }; + let len_next = (len + U::from(enq).resize() - if deq { 1.into_u() } else { 0.into_u() }).resize(); + let raddr_next = if deq { wrapping_inc::<{ clog2(N) }>(raddr, N.into_u()) } else { raddr }; + let waddr_next = if enq { wrapping_inc::<{ clog2(N) }>(waddr, N.into_u()) } else { waddr }; + + let s_next = FifoS { inner: inner_next, raddr: raddr_next, waddr: waddr_next, len: len_next }; + + (ep, ir, s_next) + }) + } +} +``` + +Let's assume the ingress interface type is `Vr` with capacity is 3. -| cycle | ip | er | state | ep | ir | -| ----- | --------- | --- | ----------------------------- | --------- | --- | -| 0 | `Some(0)` | `T` | `init_state (input): empty` | `None` | `T` | -| 1 | `Some(1)` | `T` | `[Some(0)]` | `Some(0)` | `T` | -| 2 | `Some(2)` | `F` | `[Some(1)]` | `Some(1)` | `T` | -| 3 | `Some(3)` | `F` | `[Some(2), Some(1)]` | `Some(1)` | `T` | -| 4 | `Some(4)` | `F` | `[Some(3), Some(2), Some(1)]` | `Some(1)` | `F` | -| 5 | `Some(4)` | `T` | `[Some(3), Some(2), Some(1)]` | `Some(1)` | `F` | -| 6 | `Some(4)` | `T` | `[Some(3), Some(2)]` | `Some(2)` | `T` | -| 7 | `Some(5)` | `T` | `[Some(4), Some(3)]` | `Some(3)` | `T` | +The example cycle-level behavior of `fifo` is as follows: -- At cycle 0, 1, 2, 3, 6, 7, ingress transfer happens. -- At cycle 1, 5, 6, 7, egress transfer happens. +

+ +

+ + - The ingress ready signal is true as long as the queue is not full. -- At `T5`, even though the egress transfer happens, the ingress transfer does not happen since the queue is still full. -- The ingress transfer happens again at `T6` since the queue is not full anymore. -- The state is updated in the next clock cycle. +- Pipeline is disabled for the FIFO queue. +- Cycle 0, 1, 2, 3, 6: Ingress transfer happens. +- Cycle 1, 5, 6, 7: Egress transfer happens. ### Source and sink @@ -730,12 +848,16 @@ We demonstrate the two most representative combinators: `source` and `sink`. This combinator immediately returns the data to the payload when the data is coming from resolver. The `source` combinator only has the egress interface. +

+ +

+ ```rust,noplayground impl I, { Dep::Demanding }> { fn source() -> I, { Dep::Demanding }> { ().fsm::, { Dep::Demanding }>, ()>((), |_, er, _| { - let ip = if er.ready { Some(er.inner) } else { None }; - (ip, (), ()) + let ep = if er.ready { Some(er.inner) } else { None }; + (ep, (), ()) }) } } @@ -745,24 +867,89 @@ impl I, { Dep::Demanding }> { - The egress transfer happens as long as the egress resolver ready signal is true. - It transfer the resolver to the payload within the same clock cycle with egress transfer happens. +When `P` is `u32`, the example cycle-level behavior of `source` is as follows: + +

+ +

+ + + +- Cycle 0, 1, 2, 4, 5: Egress transfer happens. +- Cycle 3: Egress transfer does not happen because egress ready signal is `false`. + #### sink This combinator maintains a state and calculates the ingress resolver with `f`, which takes the current state and ingress payload as inputs. The `sink` combinator only has the ingress interface. +

+ +

+ + ```rust,noplayground -impl Vr

{ +impl I>, { Dep::Helpful }> { fn sink(self) { self.fsm::<(), ()>((), |ip, _, _| { - let ir = Ready::valid(()); + let ir = Ready::valid(ip); ((), ir, ()) }) } } ``` +When `P` is `u32`, the example cycle-level behavior of `sink` is as follows: + +

+ +

+ + + - The ingress resolver is calculated every clock cycle. -- The state is updated only when ingress transfer happens. ### FSM @@ -777,67 +964,138 @@ Since they have more complex behavior, we demonstrate their usage here. #### `fsm_ingress` -This combinator allows you to accumulate successive ingress payloads into an internal FSM state, then output the -resulting state once it is ready. +It allows you to accumulate successive ingress payloads into an internal FSM state, then output the resulting state once it is ready. After the resulting state is transferred, the FSM is reset and starts accumulating again. ```rust,noplayground -// @zhao: please fill out this. -fn fsm_ingress(self, init: S, f: impl Fn(P, R, S) -> (S, bool)) -> I, { Dep::Helpful }> +impl Vr

{ + fn fsm_ingress(self, init: S, f: impl Fn(P, S) -> (S, bool)) -> Vr { + self.fsm::, (S, bool)>((init, false), |ip, er, (s, done)| { + let ir = Ready::new(!done, er.inner); + + let it = ip.is_some() && !done; + let et = er.ready && done; + + let ep = if done { Some(s) } else { None }; + + let s_next = if it { + f(ip.unwrap(), s) + } else if et { + (init, false) + } else { + (s, done) + }; + + (ep, ir, s_next) + }) + } +} ``` -* `self` is the ingress interface `I, D>`. -* `init` is the initial state for the FSM. -* `f` takes the ingress payload, the egress resolver, and the current FSM state as parameters. It returns the next state and whether the FSM is done. -* The combinator outputs the resulting FSM state after the FSM is done. +It takes the initial state and the combinational logic which calculate the next state and whether the FSM is done. + +For example, let's consider the following `sum_until_10` function. -Let's assume there is an interface `input` which outputs a number that increases by 1 each transfer. -If we want to sum 3 consecutive numbers and output the results (conceptually, [0, 1, 2, 3, 4, 5, ...] -> [3, 12, ...]), we can utilize `fsm_ingress`. +It accumulates the input numbers until it becomes greater or equal to 10, and outputs the result. ```rust,noplayground -fn sum_3(input: Vr) -> Vr { +fn sum_until_10(input: Vr) -> Vr { input - .fsm_ingress((0, 0), |ip, _, (count, sum)| { - let count_next = count + 1; + .fsm_ingress(0, |ip, _, sum| { let sum_next = sum + ip; - let done_next = count_next == 3; - ((count_next, sum_next), done_next) + let done = sum >= 10; + + (sum_next, done_next) }) - .map(|(_, sum)| sum) } ``` -The cycle level behavior of `fsm_ingress` in `sum_3`: - -| cycle | ip | er | state | ep | ir | -| ----- | --------- | --- | -------------- | --------------- | --- | -| 0 | `Some(0)` | `T` | `((0, 0), F)` | `None` | `T` | -| 1 | `Some(1)` | `T` | `((1, 0), F)` | `None` | `T` | -| 2 | `Some(2)` | `T` | `((2, 1), F)` | `None` | `T` | -| 3 | `Some(3)` | `T` | `((3, 3), T)` | `Some((3, 3))` | `F` | -| 4 | `Some(3)` | `T` | `((0, 0), F)` | `None` | `T` | -| 5 | `Some(4)` | `T` | `((1, 3), F)` | `None` | `T` | -| 6 | `Some(5)` | `T` | `((2, 7), F)` | `None` | `T` | -| 7 | `Some(6)` | `T` | `((3, 12), T)` | `Some((3, 12))` | `F` | - -- At cycle 0-2, ingress transfer happens. -- At cycle 3, egress transfer happens. -- At cycle 4-6, ingress transfer happens. -- At cycle 7, egress transfer happens. - -* From T0 to T2, the combinator is accumulating. -* At T2, `f` returns true for `done_next`, signaling that the FSM is done. -* At T3, the combinator outputs the accumulated result `(3, 3)` and an egress transfer happens. -* The FSM is reset and the same thing repeats for T4-T7, transferring `(3, 12)` at T7. +The example cycle-level behavior of `fsm_ingress` is as follows: + +

+ +

+ + + +- At cycle 0-1 and cycle 3-4, ingress transfer happens (accumulates the input). +- At cycle 2 and cycle 5, egress transfer happens (outputs the result). #### `fsm_egress` -This combinator runs an FSM for each transferred ingress payload, allowing you to process an ingress payload using multiple FSM states. +It runs an FSM for each transferred ingress payload, allowing you to process an ingress payload using multiple FSM states. Only after the FSM is finished, the combinator can accept a new ingress payload. ```rust,noplayground -// @zhao: please fill out this. -fn fsm_egress(self, init: S, flow: bool, f: impl Fn(P, S) -> (EP, S, bool)) -> I, D> +impl Vr

{ + fn fsm_egress(self, init: S, flow: bool, f: impl Fn(P, S) -> (EP, S, bool)) -> Vr { + self.fsm::, (HOption

, S)>((None, init), |ip, er, (sp, s)| { + let (ep, s_next, is_last) = if let Some(p) = sp { + let (ep, s_next, is_last) = f(p, s); + (Some(ep), s_next, is_last) + } else if flow && ip.is_some() && sp.is_none() { + let (ep, s_next, is_last) = f(ip.unwrap(), s); + (Some(ep), s_next, is_last) + } else { + (None, s, false) + }; + + let et = ep.is_some() && er.ready; + let ir = Ready::new(sp.is_none() || (et && is_last), ()); + let it = ip.is_some() && ir.ready; + + let (sp_next, s_next) = if flow && it && et && sp.is_none() { + if is_last { + (None, init) + } else { + (ip, s_next) + } + } else if it { + (ip, init) + } else if et && is_last { + (None, init) + } else if et { + (sp, s_next) + } else { + (sp, s) + }; + + (ep, ir, (sp_next, s_next)) + }) + } +} ``` * `self` is the ingress interface `I, D>`. @@ -845,8 +1103,9 @@ fn fsm_egress(self, init: S, flow: bool, f: impl Fn(P, S) -> * `flow` determines whether the FSM starts immediately or from the next cycle of an ingress transfer. * `f` takes the current saved ingress payload and the current FSM state. It returns an egress payload, the next state, and whether this is the last state for the FSM. -Let's use the same interface `increasing` from the `fsm_ingress` example. -If we want to output 3 consecutive numbers starting from each number (conceptually, [0, 1, ...] -> [0, 1, 2, 1, 2, 3, ...]), we can utilize `fsm_egress`. +For example, let's consider the following `consecutive_3` example. + +It outputs 3 consecutive numbers starting from each input number. ```rust,noplayground fn consecutive_3(input: Vr) -> Vr { @@ -859,8 +1118,47 @@ fn consecutive_3(input: Vr) -> Vr { } ``` -The cycle level behavior of `fsm_egress` in `consecutive_3`: +The example cycle-level behavior of `fsm_egress` is as follows: +

+ +

+ + + + -- At cycle 0, both ingress and egress transfer happens. -- At cycle 1, egress transfer happens. -- At cycle 2, both ingress and egress transfer happens. -- At cycle 3-4, egress transfer happens. -- At cycle 5, both ingress and egress transfer happens. + * At T0, an ingress transfer happens and an FSM is started immediately (since `flow` is true). * The ingress payload `Some(0)` is saved to `sp`, and will be available from the next cycle. diff --git a/doc/docs/lang/interface.md b/doc/docs/lang/interface.md index 5503fe0..eaa0a60 100644 --- a/doc/docs/lang/interface.md +++ b/doc/docs/lang/interface.md @@ -64,13 +64,11 @@ pub trait Hazard { } ``` -For any hazard type `H`, its member type and functions has the following meaning: +For any hazard type `H`, its member type and functions have the following meaning: - `H::P`: Payload signal type. - `H::R`: Resolver signal type. -- `H::ready`: Indicates whether the receiver is ready to receive with current pair of payload and resolver. - - +- `H::ready`: Returns if the receiver is ready to receive with the current payload and resolver pair. ### Examples @@ -82,9 +80,9 @@ We provide a few handy primitive hazard interfaces for developers.

-`ValidH` represents a communication without backpressure. +`ValidH` represents a communication without backpressure (always ready to receive). -It always ready to receive the payload, it has the following specification: +It has the following specification: ```rust,noplayground struct ValidH; @@ -99,9 +97,7 @@ impl Hazard for ValidH { } ``` -For reusability, we allow `ValidH` to have resolver signals that simply flow from the receiver to the sender. - - +For reusability, we added additional resolver signals that simply flow from the receiver to the sender. #### `AndH` @@ -141,14 +137,14 @@ The `ready` field of the `Ready` struct represents the availability of the recei

-We define the valid-ready `VrH` specification as `AndH>`. - -For reusability, we allow `VrH` to have resolver signals that simply flow from the receiver to the sender. +We defined the valid-ready hazard `VrH` as `AndH>`. ```rust,noplayground type VrH = AndH>; ``` +For reusability, we added additional resolver signals that simply flow from the receiver to the sender. + +Typically, a single interface is composed of zero, one, or multiple hazard interfaces. ### Specification @@ -173,17 +167,20 @@ pub trait Interface { } ``` -For any interface type `I`, its member type has the following meaning: +For any interface type `I`, its member types have the following meaning: - `I::Fwd`: Forward signal type. - `I::Bwd`: Backward signal type. -It contains the other functions related to the [combinator](./combinator.md) and [module](./module.md), please refer to these sections for further reading. +It contains the other methods related to the [module](./module.md) and [combinator](./combinator.md), please refer to these sections for further reading. + + ### Hazard Interface

- +

For an arbitraty hazard specification `H`, we define the hazard interface `I`, where `D` is the dependency type. (For more information of the dependency, please refer to the [dependency section](../advanced/dependency.md)) @@ -215,7 +212,7 @@ type Vr

= I, { Dep::Helpful }>; Compound types such as tuple, struct, and array also implement the `Interface` trait. -These interfaces are commonly used for IO of "1-to-N" or "N-to-1" modules. +These interfaces are commonly used for IO of [1-to-N](combinator.md#1-to-n) or [N-to-1](combinator.md#n-to-1) combinators. #### Tuple @@ -229,8 +226,8 @@ impl Interface for (If1, If2) { } ``` -- The forward signal of the array interface is the tuple of the interface's forward signal. -- The backward signal of the array interface is the tuple of the interface's backward signal. +- The forward signal of the array interface is the tuple of the interfaces' forward signals. +- The backward signal of the array interface is the tuple of the interfaces' backward signals. #### Struct @@ -242,7 +239,7 @@ struct StructIf { } ``` -By applying the `Interface` derive macro to a struct in which all fields are of interface type, the struct itself can also become an interface type. +By applying the `Interface` derive macro to a struct in which all fields are interface type, the struct itself can also become an interface type. #### Array @@ -255,5 +252,5 @@ impl Interface for [If; N] { } ``` -- The forward signal of the array interface is the array of the interface's forward signal. -- The backward signal of the array interface is the array of the interface's backward signal. +- The forward signal of the array interface is the array of the interfaces' forward signal. +- The backward signal of the array interface is the array of the interfaces' backward signal. diff --git a/doc/docs/lang/module.md b/doc/docs/lang/module.md index c5b9ab5..7d14709 100644 --- a/doc/docs/lang/module.md +++ b/doc/docs/lang/module.md @@ -15,14 +15,14 @@ We can construct a module as a class of interface combinators. Please refer to t The `comb` method within the interface trait is used to combine the black box module to the given interface and return the egress interface. ```rust,noplayground fn comb(self, m: impl FnOnce(Self) -> E) -> E { - m(self) + m(self) } ``` - Applying the given interface to the module is essentially applying the module function `m` to the ingress interface. - It is useful when we want to combine multiple modules together. -In the CPU core, we can combine the multiple stage modules by using `comb`. +For example, we can combine multiple stage modules with `comb` in the CPU core. ```rust,noplayground pub fn core( @@ -40,7 +40,21 @@ pub fn core( - `imem` and `dmem` are modules for instruction memory and data memory, respectively. - We chain the 5 sub-modules `fetch`, `decode`, `exe`, `mem`, and `wb` by using the `comb` method. -TODO: more module combinators @minseong +## Module Combinators + +We provide some handy module combinators that manipulates modules. + +### `from_fn` + +TODO(@minseong) + +### `seq` + +TODO(@minseong) + +### `flip` + +TODO(@minseong) diff --git a/doc/docs/tutorial/fir_filter.md b/doc/docs/tutorial/fir_filter.md index 121ece4..cb6cee8 100644 --- a/doc/docs/tutorial/fir_filter.md +++ b/doc/docs/tutorial/fir_filter.md @@ -26,7 +26,7 @@ For example, the IO signals of a FIR filter of order 2 with coefficients [4, 2, | 4 | 7 | 4·7 + 2·2 + 3·3 = 41 | | 5 | 0 | 4·0 + 2·7 + 3·2 = 20 | -For more details, please consult [Wikipedia](https://en.wikipedia.org/wiki/Finite_impulse_response). +For more details, please refer to [Wikipedia](https://en.wikipedia.org/wiki/Finite_impulse_response). ## Modular Design @@ -65,7 +65,7 @@ fn fir_filter(input: Valid) -> Valid { ``` We can describe the FIR filter with `window`, `weight`, and `sum` combinators in the HazardFlow HDL and we assume the input interface `Valid` is provided. -`Valid` is a **valid hazard interface**, its payload is `Opt`, the resolver is empty `()`, and its `ready` function always returns `true`. +`Valid` is a **valid interface**, its payload is `Opt`, the resolver is empty `()`, and its `ready` function always returns `true`. In other words, as long as the input interface's forward signal is `Some(u32)` at a specific clock cycle, the receiver receives a valid payload. We can interpret this input interface as a stream of signal values flowing through the wires. diff --git a/doc/docs/tutorial/masked_merge.md b/doc/docs/tutorial/masked_merge.md index 8ddbb32..90d2faa 100644 --- a/doc/docs/tutorial/masked_merge.md +++ b/doc/docs/tutorial/masked_merge.md @@ -12,9 +12,9 @@ The `masked_merge` combinator takes `N` valid-ready `Vr

` interfaces as the in * We can think of a valid-ready Interface as a valid interface `Valid

` with an extra ready signal (Boolean value) in its resolver. * The transfer happens only when the payload is `Some(P)`, and the ready signal in the resolver is `true`. * We can represent the ingress interface type as `[Vr; N]`. -* For more information about the valid-ready interface please refer to the [valid-ready interface](../lang/interface.md#valid-ready). +* For more information about the valid-ready interface please refer to the [valid-ready interface](../lang/interface.md#vrh). * For more information about the compound interface type, please refer to the [compound interface section](../lang/interface.md#compound-interface). -The Masked Merge combinator egress interface is also a valid-ready interface `I`. +The Masked Merge combinator egress interface is also a valid-ready hazard interface `I`. * We define the egress hazard as `type EH = VrH<(P, U<{ clog2(N) }>), Array>`. * The payload type is a tuple type. * `Opt

` contains the real data we want to send through the wires. @@ -72,9 +72,9 @@ The FIFO Queue egress interface is a simple valid-ready interface `Vr

`. * We use `u32` as the actual payload type for demonstrating a more concrete example. * We also set the number of ingress interfaces as 5, the same as the queue size. * `fifo_s.inner` is the inner elements of the queue. -* We `fold` the queue: - * The initializer is a Boolean array with all elements as `false` of size 5. - * The index of the array indicates the index of the ingress interfaces. +* We `fold` the inner elements of the queue: + * The initializer is a Boolean array with all elements as `false` of size 5. + * The index of the initializer array indicates the index of the ingress interfaces. * We iterate through all the elements within the queue and set the accumulator's value at the index in each queue element to `true`. * The final result indicates which ingress interfaces are present in the current queue. * We send back this resolver to the Masked Merge combinator to make decision for choosing the next ingress interface. @@ -95,6 +95,6 @@ pub fn m(ingress: [Vr; 5]) -> Vr { } ``` -You can find the implementation in [masked_merge.rs](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/examples/masked_merge.rs). +You can find the full implementation in [masked_merge.rs](https://github.com/kaist-cp/hazardflow/blob/main/hazardflow-designs/src/examples/masked_merge.rs). Congratulations! You finished the tutorial! diff --git a/doc/docs/tutorial/tutorial.md b/doc/docs/tutorial/tutorial.md index 7fc1f72..91e6dd2 100644 --- a/doc/docs/tutorial/tutorial.md +++ b/doc/docs/tutorial/tutorial.md @@ -10,8 +10,8 @@ For more details please refer to the [Language Reference](./reference.md) sectio ### Hazard Interface -In HazardFlow HDL, we define the hazard as a protocol including the payload, resolver, and transfer condition. -We model the hazard interface as a structure containing its forward signal, backward signal, and the transfer condition specified by its hazard. +In HazardFlow HDL, we define the hazard as a protocol including the payload, resolver, and ready condition. +We model the hazard interface as a structure containing its forward signal, backward signal, and the ready condition specified by its hazard. We define the hazard protocol as `H`, its payload and resolver as `H::P` and `H::R` respectively.

@@ -28,11 +28,11 @@ We define the hazard protocol as `H`, its payload and resolver as `H::P` and `H: * It represents the hazard generated by the later stage from the circuits and it will be sent from the receiver to the sender. * We define the resolver signal as `H::R` -**Transfer condition:** +**Ready condition:** * Indicates whether the receiver is ready to receive the payload. -* We define the transfer condition as `fn ready(p: Self::P, r: Self::R) -> bool;` where the `p` is the payload and `r` is the resolver. -* When the forward signal is `Some(p)`, which means the forward signal is valid, **and** the transfer condition returns `true`, which means the receiver is ready to receive the valid payload, then the transfer happens. -* When the forward signal is `None`, which means the forward signal is invalid, **or** the transfer condition returns `false`, which means the receiver is not ready to receive the valid payload, then the transfer is not happening. +* We define the ready condition as `fn ready(p: Self::P, r: Self::R) -> bool;` where the `p` is the payload and `r` is the resolver. +* When the forward signal is `Some(p)`, which means the forward signal is valid, **and** the ready condition returns `true`, which means the receiver is ready to receive the valid payload, then the transfer happens. +* When the forward signal is `None`, which means the forward signal is invalid, **or** the ready condition returns `false`, which means the receiver is not ready to receive the valid payload, then the transfer is not happening. ### Combinator diff --git a/hazardflow-designs/src/cpu/exe.rs b/hazardflow-designs/src/cpu/exe.rs index 26f5cdc..21eceea 100644 --- a/hazardflow-designs/src/cpu/exe.rs +++ b/hazardflow-designs/src/cpu/exe.rs @@ -148,5 +148,5 @@ pub fn exe(i: I, { Dep::Demanding }>) -> I, { AluOp::Mext(_) => todo!("assignment 3"), }) .map_resolver_block_with_p::>(|ip, er| (ip, er.inner)) - .filter_map_drop_with_r(|(ip, alu_out), er| gen_payload(ip, alu_out, er.inner)) + .filter_map_drop_with_r_inner(|(ip, alu_out), er| gen_payload(ip, alu_out, er)) } diff --git a/hazardflow-designs/src/cpu/mem.rs b/hazardflow-designs/src/cpu/mem.rs index 9f31e4a..e40affc 100644 --- a/hazardflow-designs/src/cpu/mem.rs +++ b/hazardflow-designs/src/cpu/mem.rs @@ -89,10 +89,10 @@ pub fn mem( dmem: impl FnOnce(Vr) -> Vr, ) -> I, { Dep::Demanding }> { let exep = i - .reg_fwd(true) .map_resolver_inner::<(HOption<(MemRespWithAddr, ExeEP)>, HOption<(CsrResp, ExeEP)>, (HOption, WbR))>( gen_resolver, - ); + ) + .reg_fwd(true); let (dmem_req, csr_req, exep) = exep .map(|p| { @@ -121,7 +121,11 @@ pub fn mem( }) .comb(attach_resolver(attach_payload(dmem))) .map_resolver_with_p::(|ip, _| ip) - .map(|ip| (Some(ip), None, None)); + .map(|(dmem_resp, ip)| MemEP { + wb_info: ip.wb_info.map(|(addr, _)| Register::new(addr, dmem_resp.data)), + debug_inst: ip.debug_inst, + debug_pc: ip.pc, + }); let csr_resp = csr_req .map(|ip| { @@ -133,15 +137,17 @@ pub fn mem( }) .comb(csr_wrap) .map_resolver_with_p::(|ip, _| ip) - .map(|ip| (None, Some(ip), None)); - - let exep = exep.map_resolver_with_p::(|ip, er| (ip, er.inner)).map(|ip| (None, None, Some(ip))); - - [dmem_resp, csr_resp, exep].merge().map(|(mem_resp, csr_resp, exep)| { - let exep = mem_resp.map(|(_, p)| p).or(csr_resp.map(|(_, p)| p)).or(exep).unwrap(); - let mem_resp = mem_resp.map(|(p, _)| p); - let csr_resp = csr_resp.map(|(p, _)| p); - - MemEP { wb_info: get_wb(exep, mem_resp, csr_resp), debug_inst: exep.debug_inst, debug_pc: exep.pc } - }) + .map(|(csr_resp, ip)| MemEP { + wb_info: ip.wb_info.map(|(addr, _)| Register::new(addr, csr_resp.rdata)), + debug_inst: ip.debug_inst, + debug_pc: ip.pc, + }); + + let exep = exep.map_resolver_with_p::(|ip, er| (ip, er.inner)).map(|ip| MemEP { + wb_info: ip.wb_info.map(|(addr, _)| Register::new(addr, ip.alu_out)), + debug_inst: ip.debug_inst, + debug_pc: ip.pc, + }); + + [dmem_resp, csr_resp, exep].merge() }