// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// This file contains a first step towards a useful hardware implementation of//
// the scalar RISC processor defined in file ScalarBehav. The idea is thereby //
// to rewrite the definition of the instruction set architecture so that as   //
// few as possible operational units like adders, multipliers etc. are used.  //
// To this end, an ALU is used that is capable of the following operations:   //
//                                                                            //
//    ADDS,SUBS,MULS,DIVS,LESS,LEQS,                                          //
//    ADDU,SUBU,MULU,DIVU,LESU,LEQU,                                          //
//    SEQ,SNE,AND,OR,NAND,NOR.                                                //
//                                                                            //
// The decode phase should provide the operands opL and opR for the ALU, while//
// the opcode of the ALU is still the opcode of the instruction. The result of//
// the ALU will always be put in a variable AluRes of width 2*DataWidth. The  //
// writeback phase will then take the result of AluRes and will either        //
//    - put AluRes in register Reg[rd]                                        //
//    - perform a memory transaction with the address contained in AluRes     //
//    - perform a branch to the address contained in AluRes                   //
//                                                                            //
// The definition of the operands opL and opR as well as the target memory of //
// the result are given in the following table (derived from the ISA def.).   //
// As can be seen, almost all instructions can be mapped to ALU operations    //
// with operands opL and opR whose target is Reg[rd] or Reg[0]. Of course,    //
// branch and jump operations overwrite the program counter pc, and the new   //
// pc is obtained by address calculation of the ALU. The bigger difference are//
// the store operations that have no target register, but three operands: opL //
// and opR are used for the address calculation and a further variable opS is //
// defined to maintain the value to be stored.                                // 
//                                                                            //
//--------------------------------------------------------------------------- //
//  instruction  |  encoding               | AluOp  opL      opR      target  //
//--------------------------------------------------------------------------- //
// add r1,r2,r3  | "000000"+r1+r2+r3+"0"   | ADDS   Reg[rs1] Reg[rs2] Reg[rd] //
// addu r1,r2,r3 | "000001"+r1+r2+r3+"0"   | ADDU   Reg[rs1] Reg[rs2] Reg[rd] //
// addi r1,r2,c  | "000010"+r1+r2+(SE 4 c) | ADDS   Reg[rs1] cst4     Reg[rd] //
// addiu r1,r2,c | "000011"+r1+r2+(ZE 4 c) | ADDU   Reg[rs1] cst4     Reg[rd] //
// sub r1,r2,r3  | "000100"+r1+r2+r3+"0"   | SUBS   Reg[rs1] Reg[rs2] Reg[rd] //
// subu r1,r2,r3 | "000101"+r1+r2+r3+"0"   | SUBU   Reg[rs1] Reg[rs2] Reg[rd] //
// subi r1,r2,c  | "000110"+r1+r2+(SE 4 c) | SUBS   Reg[rs1] cst4     Reg[rd] //
// subiu r1,r2,c | "000111"+r1+r2+(ZE 4 c) | SUBU   Reg[rs1] cst4     Reg[rd] //
// mul r1,r2,r3  | "001000"+r1+r2+r3+"0"   | MULS   Reg[rs1] Reg[rs2] Reg[rd] //
// mulu r1,r2,r3 | "001001"+r1+r2+r3+"0"   | MULU   Reg[rs1] Reg[rs2] Reg[rd] //
// muli r1,r2,c  | "001010"+r1+r2+(SE 4 c) | MULS   Reg[rs1] cst4     Reg[rd] //
// muliu r1,r2,c | "001011"+r1+r2+(ZE 4 c) | MULU   Reg[rs1] cst4     Reg[rd] //
// div r1,r2,r3  | "001100"+r1+r2+r3+"0"   | DIVS   Reg[rs1] Reg[rs2] Reg[rd] //
// divu r1,r2,r3 | "001101"+r1+r2+r3+"0"   | DIVU   Reg[rs1] Reg[rs2] Reg[rd] //
// divi r1,r2,c  | "001110"+r1+r2+(SE 4 c) | DIVS   Reg[rs1] cst4     Reg[rd] //
// diviu r1,r2,c | "001111"+r1+r2+(ZE 4 c) | DIVU   Reg[rs1] cst4     Reg[rd] //
// slt r1,r2,r3  | "010000"+r1+r2+r3+"0"   | LESS   Reg[rs1] Reg[rs2] Reg[rd] //
// sltu r1,r2,r3 | "010001"+r1+r2+r3+"0"   | LESU   Reg[rs1] Reg[rs2] Reg[rd] //
// sle r1,r2,r3  | "010010"+r1+r2+r3+"0"   | LEQS   Reg[rs1] Reg[rs2] Reg[rd] //
// sleu r1,r2,r3 | "010011"+r1+r2+r3+"0"   | LEQU   Reg[rs1] Reg[rs2] Reg[rd] //
// seq r1,r2,r3  | "010100"+r1+r2+r3+"0"   | SEQ    Reg[rs1] Reg[rs2] Reg[rd] //
// sne r1,r2,r3  | "010101"+r1+r2+r3+"0"   | SNE    Reg[rs1] Reg[rs2] Reg[rd] //
// and r1,r2,r3  | "010110"+r1+r2+r3+"0"   | AND    Reg[rs1] Reg[rs2] Reg[rd] //
// or r1,r2,r3   | "010111"+r1+r2+r3+"0"   | OR     Reg[rs1] Reg[rs2] Reg[rd] //
// nand r1,r2,r3 | "011000"+r1+r2+r3+"0"   | NAND   Reg[rs1] Reg[rs2] Reg[rd] //
// nor r1,r2,r3  | "011001"+r1+r2+r3+"0"   | NOR    Reg[rs1] Reg[rs2] Reg[rd] //
// ld r1,r2,c    | "011010"+r1+r2+(ZE 4 c) | ADDU   Reg[rs1] cst4     Reg[rd] //
// st r1,r2,c    | "011011"+r1+r2+(ZE 4 c) | ADDU   Reg[rs1] cst4     --      //
// ll r1,r2,c    | "011110"+r1+r2+(ZE 4 c) | ADDU   Reg[rs1] cst4     Reg[rd] //
// sc r1,r2,c    | "011111"+r1+r2+(ZE 4 c) | ADDU   Reg[rs1] cst4     --      //
// mov r1,c      | "100000"+r1+(SE 7 c)    | ADDS   Reg[0]   cst7     Reg[rd] //
// movu r1,c     | "100001"+r1+(ZE 7 c)    | ADDU   Reg[0]   cst7     Reg[rd] //
// bez r1,r2,c   | "100010"+r1+r2+(SE 4 c) | ADDS   Reg[rs1] cst4     pc      //
// bnz r1,r2,c   | "100011"+r1+r2+(SE 4 c) | ADDS   Reg[rs1] cst4     pc      //
// jmp r1,c      | "100100"+r1+(SE 7 c)    | ADDS   Reg[rd]  cst7     pc      //
// j c           | "100101"+(SE 10 c)      | ADDS   Reg[0]   Reg[0]   pc      //
// sync          | "100111"+"000"+"0000000"| ADDS   Reg[0]   Reg[0]   Reg[0]  //
// ovf r1        | "100111"+r1+"0000001"   | ADDU   Reg[0]   overflw  Reg[rd] //
//--------------------------------------------------------------------------- //
//                                                                            //
// In contrast to the later pipeline implementation, the modules used here for//
// decode, execute, memory access and write back essentially perform immediate//
// assignments (except for the assignments to memory and the registers). The  //
// body statements of these modules only consist of instantaneous code (except//
// for the memory access module. To avoid synchronization between the modules,//
// these are called in a sequence whose computation time is the time required //
// for the memory access.                                                     //
// ************************************************************************** //

macro DataWidth =  8; // bit-width of registers


module ScalarHW (
    bv{16} ?instr,               // instruction to be performed now    
    nat pc,                      // program counter
    event nat !adrMem,           // address for memory access
    event bv{DataWidth} dataMem, // data for memory access
    event readMem,writeMem,      // whether data is read or written to memory
    event reqMem,ackMem,doneMem  // signals for memory transaction
    )
{
    // ----------------------------------------------------------------------
    // scalar registers
    // ----------------------------------------------------------------------    
    [8]bv{DataWidth} Reg;       // scalar registers
    bv{DataWidth} overflw;      // overflw register (completing ALU result)
    // ----------------------------------------------------------------------
    // auxiliary local variables
    // ----------------------------------------------------------------------    
    bv{6} opc;              // opcode of instr
    bv{7} fnc;              // function code or 7-bit constant
    nat{8} rd;              // destination register
    bv{10} adr;             // jump address of J-type instruction
    bv{DataWidth} opS;      // value to be stored
    bv{DataWidth} opL,opR;  // ALU operands
    bv{2*DataWidth} AluRes; // ALU result
    bv{DataWidth} LoadRes;  // Load result
    bool CondRes;           // result of branch condition
    
    next(pc) = 0;
    loop {
        waitInstr: pause;
        // instruction decode: determine operands and parts of instruction word
        Decode(instr,Reg,overflw,opc,fnc,adr,rd,opS,opL,opR);
        // execute current instruction: (AluRes,CondRes) = opc-fnc(opL,opR)
        Execute(opc,fnc,opL,opR,AluRes,CondRes);
        // memory transaction
        MemAccess(pc,opc,fnc,opS,AluRes,LoadRes,
                  adrMem,dataMem,
                  readMem,writeMem,reqMem,ackMem,doneMem);
        // write back results in registers
        WriteBack(pc,opc,fnc,adr,rd,AluRes,LoadRes,CondRes,Reg,overflw);
    }
}