// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// The following module contains an erroneous processor pipeline that suffers //
// from various RAW and control conflicts. It is used for teaching purposes   //
// to show the potential errors due to pipeline conflicts.                    //
// Even though the pipeline constructed here ignores RAW conflicts between the//
// instructions inside the pipeline, it already considers the information flow//
// from instruction fetch to write back in that variables that are written in //
// one stage and used in later stages are forwarded to the later stages. To   //
// this end, one analyzes which variables are read and written by the modules //
// of the structural implementation given in ScalarHW (we ignore variables    //
// used for the memory transaction):                                          //
//                                                                            //
//    Decode:                                                                 //
//        read:   instr,Reg,overflw                                           //
//        write:  opc,fnc,adr,rd,opS,opL,opR                                  //
//    Execute                                                                 //  
//        read:   opc,fnc,opL,opR                                             //
//        write:  AluRes,CndRes                                               //
//    MemAccess                                                               //
//        read:   opc,fnc,opS,AluRes                                          //
//        write:  LoadRes                                                     //
//    WriteBack                                                               //
//        read:   opc,fnc,adr,rd,AluRes,LoadRes,CndRes                        //
//        write:  pc,Reg,overflw                                              //
//                                                                            //
// We therefore obtain the following lifetimes of the variables:              //
//                                                                            //
//    +------------------------------------+                                  //
//    | variable    generated   last used  |                                  //
//    |------------------------------------|                                  //
//    | opc         Decode      WriteBack  |                                  //
//    | fnc         Decode      WriteBack  |                                  //
//    | adr         Decode      WriteBack  |                                  //
//    | rd          Decode      WriteBack  |                                  //
//    | opS         Decode      MemAccess  |                                  //
//    | opL         Decode      Execute    |                                  //
//    | opR         Decode      Execute    |                                  //
//    | AluRes      Execute     WriteBack  |                                  //
//    | CondRes     Execute     WriteBack  |                                  //
//    | LoadRes     MemAccess   WriteBack  |                                  //
//    | Reg         WriteBack   --         |                                  //
//    | overflw     WriteBack   --         |                                  //
//    +------------------------------------+                                  //
//                                                                            //
// The implementation of the pipeline will therefore use additional copies of //
// the variables e.g. opc_ID, opc_EX, opc_MA, opc_WB to forward the values to // 
// the next pipeline stages (instead of AluRes,CondRes, and LoadRes, we use   //
// the shorter names alu_EX,alu_MA,aluWB,cnd_EX,cnd_MA,cndWB,ld_MA,ld_WB):    //
//                                                                            //
//   +------+    +-------+    +--------+    +--------+    +--------+          //
//   |pc    |    |instr  |    | opc_EX |    | opc_MA |    | opc_WB |          //
//   |Mem[0]|    |overflw|    | fnc_EX |    | fnc_MA |    | fnc_WB |          //
//   |Mem[1]| IF |Reg[0] | ID | adr_EX | EX | adr_MA | MA | adr_WB | WB       //
//   |  :   |--->|   :   |--->|  rd_EX |--->|  rd_MA |--->|  rd_WB |---+      //
//   |      |    |Reg[7] |    | opS_EX |    | opS_MA |    | alu_WB |   |      //
//   |      |    |       |    | opL_EX |    | alu_MA |    | cnd_WB |   |      //
//   |      |    |       |    | opR_EX |    | cnd_MA |    |  ld_WB |   |      //
//   +------+    +-------+    +--------+    +--------+    +--------+   |      //
//      ^            ^        | alu_EX |    |  ld_MA |                 |      //
//      |            |        | cnd_EX |    +--------+                 |      //
//      |            |        +--------+                               |      //
//      |            |                                                 |      //
//      +------------+-------------------------------------------------+      //
//                                                                            //
// The blocks are thereby viewed as registers where the predecessor stage     //
// writes to (with a delayed write) and where the successor stage immediately //
// reads from. As can be seen, we also use variables alu_EX and cnd_EX which  //
// are immediately written by EX, so that result forwarding from EX to ID     //
// becomes possible. Similarly, we let MA immediately write a value ld_MA to  //
// forward the latter to ID one step earlier. Even though these variables are //
// written in EX and MA, respectively, we put them in the registers in front  //
// of these stages due to the immediate writes.                               //
//  The resulting pipeline has the following conflicts:                       //
//  (1) InstructionFetch reads the pc, which is written only in the WB phase. //
//  (2) InstructionDecode reads the register file which may be updated by WB. //
//      The same holds for the special register overflw.                      //
// Note that InstructionFetch also reads the main memory, but we distinguish  //
// between program and data memory, and therefore, there is no conflict. Since//
// the data memory is both read and written only by MA, there is also no      //
// conflict on the data memory.                                               //
// ************************************************************************** //

macro DataWidth =  8; // bit-width of registers


module PipeWithConflicts (
    bv{16} ?instr,               // instruction to be performed now    
    nat pc,                      // program counter
    event nat !adrBus,           // address for memory access
    event bv{DataWidth} dataBus, // data for memory access
    event readMem,writeMem,      // whether data is read or written to memory
    event reqMem,ackMem,doneMem  // signals for memory transaction
    )
{
    // ----------------------------------------------------------------------
    // scalar registers
    // ----------------------------------------------------------------------    
    [8]bv{DataWidth} Reg;       // scalar registers
    bv{DataWidth} overflw;      // overflw register (completing ALU result)
    // ----------------------------------------------------------------------
    // auxiliary local variables
    // ----------------------------------------------------------------------    
    bv{6}  opc_ID,opc_EX,opc_MA,opc_WB;     // opcodes of instructions
    bv{7}  fnc_EX,fnc_MA,fnc_WB;            // function codes or 7-bit constant
    bv{10} adr_EX,adr_MA,adr_WB;            // jump addresses of J-type instruction
    nat{8} rd_EX, rd_MA, rd_WB;             // destination registers
    bv{DataWidth} opS_EX,opS_MA;            // values to be stored
    bv{DataWidth} opL_EX,opR_EX;            // ALU operands
    bv{2*DataWidth} alu_EX,alu_MA,alu_WB;   // ALU results
    bool cnd_EX,cnd_MA,cnd_WB;              // results of branch condition
    bv{DataWidth} ld_MA,ld_WB;              // load results
    
    next(pc) = 0;
    loop {
        waitInstr: pause;
        // instruction decode: determine operands and parts of instruction word
        Decode(instr,Reg,overflw,
                     opc_ID,opc_EX,fnc_EX,adr_EX,rd_EX,opS_EX,opL_EX,opR_EX);
        // execute current instruction: (AluRes,CondRes) = opc-fnc(opL,opR)
        Execute(opc_EX,fnc_EX,adr_EX,rd_EX,opS_EX,opL_EX,opR_EX,
                opc_MA,fnc_MA,adr_MA,rd_MA,opS_MA,alu_EX,alu_MA,cnd_EX,cnd_MA);
        // memory transaction
        MemAccess(pc,opc_MA,fnc_MA,adr_MA,rd_MA,opS_MA,alu_MA,cnd_MA,
                     opc_WB,fnc_WB,adr_WB,rd_WB,      alu_WB,cnd_WB,
                     ld_MA,ld_WB,
                     adrBus,dataBus,
                     readMem,writeMem,reqMem,ackMem,doneMem);
        // write back results in registers
        WriteBack(pc,opc_WB,fnc_WB,adr_WB,rd_WB,alu_WB,ld_WB,cnd_WB,
                     Reg,overflw);
    }
}
drivenby DataConflictsALU {
    [13]bv{16} Prog;
    Prog = [
        0b1000000010000001,  //   0:     mov R1,1
        0b1000000100000010,  //   1:     mov R2,2
        0b1000000110000011,  //   2:     mov R3,3
        0b1000001000000100,  //   3:     mov R4,4
        0b1000001010000101,  //   4:     mov R5,5
        0b0000000010100110,  //   5: I1: add R1,R2,R3    
        0b0000001000011000,  //   6: I2: add R4,R1,R4    
        0b0000001010010010,  //   7: I3: add R5,R1,R1    
        0b0000001000010100,  //   8: I3: add R4,R1,R2    
        0b0000100000000000,  //   9:     nop
        0b0000100000000000,  //  10:     nop
        0b0000100000000000,  //  11:     nop
        0b0000100000000000   //  12:     nop
    ];
    pause;
    weak abort {
        loop {
            instr = Prog[pc];
            pause;
        }
    } when(pc>=12);
}