// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
//
// This file defines the instruction set architecture of the MiniMIPS processor
// used for teaching and research by the Embedded Systems Group at the Univ.
// of Kaiserslautern. The module below defines the single cycle behavior where
// all instructions, except for memory transactions require only one cycle for
// their execution. Its purpose is the specification of the behavior of the CPU.
//
// Instructions have 16 bits, whereas the uppermost 6 bits encode the opcode of 
// the instruction. The other bits depend on the opcode, where the following
// types of instructions can be distinguished:
//
//    R-Type (3 registers):
//        i{15:10} : opcode
//        i{9:7}   : index of destination register
//        i{6:4}   : index of register holding the left operand
//        i{3:1}   : index of register holding the right operand
//        i{0}     : additional R-type opcode, e.g. encoding vector/scalar op.
//    I-Type (2 registers; 1 immediate operand): 
//        i{15:10} : opcode
//        i{9:7}   : index of destination register
//        i{6:4}   : index of register holding the left operand
//        i{3:0}   : 4-bit constant operand contained in the instruction
//    S-Type (1 register; 1 immediate operand):
//        i{15:10} : opcode
//        i{9:7}   : index of destination register
//        i{6:0}   : 7-bit constant (either operand or function code)
//    J-Type (0 register; 1 immediate operand): 
//        i{15:10} : opcode
//        i{9:0}   : 10-bit constant for address offset
//
// The communication with the instruction memory is rather simple: The CPU
// writes via its output "pc" the index of the desired instruction, and the
// environment provides via input "instr" the corresponding instruction word.
//
// The communication with the data memory is more complicated: First, the
// CPU has to apply for memory access by emitting "reqMem" until the memory
// responds with "ackMem". At that point of time, the address "adrMem" and
// the signals "readMem"/"writeMem" have to be provided, since the memory
// system receives its task. In case of a write, also the data to be written
// must be provided over "dataMem". Then, the CPU has to wait again until
// signal "doneMem" occurs which completes the memory transaction. At that
// point of time, the data can be fetched from "dataMem" in case of a load.
// Emitting readMem and writeMem at the same time instructs the memory system
// to write back the cache content.
// ************************************************************************** //

// ----------------------------------------------------------------------------
// opcodes of the instructions
// ----------------------------------------------------------------------------

macro ADD   = 0b000000;
macro ADDU  = 0b000001;
macro ADDI  = 0b000010;
macro ADDIU = 0b000011;
macro SUB   = 0b000100;
macro SUBU  = 0b000101;
macro SUBI  = 0b000110;
macro SUBIU = 0b000111;
macro MUL   = 0b001000;
macro MULU  = 0b001001;
macro MULI  = 0b001010;
macro MULIU = 0b001011;
macro DIV   = 0b001100;
macro DIVU  = 0b001101;
macro DIVI  = 0b001110;
macro DIVIU = 0b001111;

macro SLT   = 0b010000;
macro SLTU  = 0b010001;
macro SLE   = 0b010010;
macro SLEU  = 0b010011;
macro SEQ   = 0b010100;
macro SNE   = 0b010101;

macro AND   = 0b010110;
macro OR    = 0b010111;
macro NAND  = 0b011000;
macro NOR   = 0b011001;

macro LD    = 0b011010;
macro ST    = 0b011011;
macro LVWS  = 0b011100;
macro SVWS  = 0b011101;
macro LL    = 0b011110;
macro SC    = 0b011111;
macro MOV   = 0b100000;
macro MOVU  = 0b100001;

macro BEZ   = 0b100010;
macro BNZ   = 0b100011;
macro JMP   = 0b100100;
macro J     = 0b100101;

macro SYNC  = 0b100111; // note that this group of instructions share the same
macro OVF   = 0b100111; // opcode, and differ in the additional function code
macro MVTM  = 0b100111; // listed below
macro MVFM  = 0b100111;
macro MVTL  = 0b100111;
macro MVFL  = 0b100111;

macro fn_SYNC  = 0b0000000;
macro fn_OVF   = 0b0000001;
macro fn_MVTM  = 0b0000010;
macro fn_MVFM  = 0b0000011;
macro fn_MVTL  = 0b0000100;
macro fn_MVFL  = 0b0000101;



// **************************************************************************
//                   macros for implementing the behavior
// **************************************************************************

macro OpCode(i)  = i{15:10}; // opcode of instruction i
macro DestReg(i) = i{9:7};   // register index of destination
macro SrcLReg(i) = i{6:4};   // register index of left operand 
macro SrcRReg(i) = i{3:1};   // register index of right operand 
macro VctFlag(i) = i{0};     // register index of right operand 
macro ConstOp(i) = i{3:0};   // constant operand of instruction i
macro JumpAdr(i) = i{9:0};   // address of jump instruction
macro FctCode(i) = i{6:0};   // function code of instruction i

macro DataWidth     =  8;                         // bit-width of registers
macro One           = {true::DataWidth};          // bitvector consisting of 1s
macro Zero          = {false::DataWidth};         // bitvector consisting of 0s
macro UpperWord(r)  = r{2*DataWidth-1:DataWidth}; // upper half of double word
macro LowerWord(r)  = r{DataWidth-1:0};           // lower half of double word



// **************************************************************************
//                      single cycle behavior of the CPU
// **************************************************************************

module SingleCycleScalarBehav (
    bv{16} ?instr,               // instruction to be performed now    
    nat pc,                      // program counter
    event nat !adrMem,           // address for memory access
    event bv{DataWidth} dataMem, // data for memory access
    event readMem,writeMem,      // whether data is read or written to memory
    event reqMem,ackMem,doneMem  // signals for memory transaction
    )
{
    // ----------------------------------------------------------------------
    // registers for scalar data (required in all versions)
    // ----------------------------------------------------------------------    
    [8]bv{DataWidth} Reg;       // scalar registers
    bv{DataWidth} overflw;      // overflw register (completing result)
    bv{2*DataWidth} AluOut;     // intermediate result of ALU operations

    // ----------------------------------------------------------------------
    // auxiliary local variables to increase readability
    // ----------------------------------------------------------------------    
    bv{6} opc;                  // opcode of instr
    nat{8} rd,rs1,rs2;          // register indices taken from instr
    bv{4} cst;                  // constant operand of I-type instructions
    bv{7} fnc;                  // constant operand of S-type instructions
    bv{10} adr;                 // jump address of J-type instruction
    bool vct;                   // whether it's a vector instruction

    next(pc) = 0;
    loop {
        waitInstr: pause;
        // --------------------------------------------------------------------
        // instruction decode
        // --------------------------------------------------------------------
        opc = OpCode(instr);
        rd  = bv2nat(DestReg(instr));
        rs1 = bv2nat(SrcLReg(instr));
        rs2 = bv2nat(SrcRReg(instr));
        cst = ConstOp(instr);
        fnc = FctCode(instr);
        adr = JumpAdr(instr);
        vct = VctFlag(instr);

        // --------------------------------------------------------------------
        // execute current instruction
        // --------------------------------------------------------------------
        case
            // ----------------------------------------------------------------
            // arithmetic instructions with register operands
            // ----------------------------------------------------------------
            (opc==ADD & !vct)  do {
                AluOut = int2bv(bv2int(Reg[rs1]) + bv2int(Reg[rs2]),2*DataWidth);
                next(overflw) = UpperWord(AluOut);
                next(Reg[rd]) = LowerWord(AluOut);
                }
            (opc==ADDU & !vct) do {
                AluOut = nat2bv(bv2nat(Reg[rs1]) + bv2nat(Reg[rs2]),2*DataWidth);
                next(overflw) = UpperWord(AluOut);
                next(Reg[rd]) = LowerWord(AluOut);
                }
            (opc==SUB & !vct) do {
                AluOut = int2bv(bv2int(Reg[rs1]) - bv2int(Reg[rs2]),2*DataWidth);
                next(overflw) = UpperWord(AluOut);
                next(Reg[rd]) = LowerWord(AluOut);
                }
            (opc==SUBU & !vct) do {
                AluOut = nat2bv(bv2nat(Reg[rs1]) - bv2nat(Reg[rs2]),2*DataWidth);
                next(overflw) = UpperWord(AluOut);
                next(Reg[rd]) = LowerWord(AluOut);
                }
            (opc==MUL & !vct) do {
                AluOut = int2bv(bv2int(Reg[rs1]) * bv2int(Reg[rs2]),2*DataWidth);
                next(overflw) = UpperWord(AluOut);
                next(Reg[rd]) = LowerWord(AluOut);
                }
            (opc==MULU & !vct) do {
                AluOut = nat2bv(bv2nat(Reg[rs1]) * bv2nat(Reg[rs2]),2*DataWidth);
                next(overflw) = UpperWord(AluOut);
                next(Reg[rd]) = LowerWord(AluOut);
                }
            (opc==DIV & !vct) do {
                next(overflw) = int2bv(bv2int(Reg[rs1]) % bv2int(Reg[rs2]),DataWidth);
                next(Reg[rd]) = int2bv(bv2int(Reg[rs1]) / bv2int(Reg[rs2]),DataWidth);
                }
            (opc==DIVU & !vct) do {
                next(overflw) = nat2bv(bv2nat(Reg[rs1]) % bv2nat(Reg[rs2]),DataWidth);
                next(Reg[rd]) = nat2bv(bv2nat(Reg[rs1]) / bv2nat(Reg[rs2]),DataWidth);
                }
            // ----------------------------------------------------------------
            // arithmetic instructions with constant operands
            // ----------------------------------------------------------------
            (opc==ADDI) do {
                AluOut = int2bv(bv2int(Reg[rs1]) + bv2int(cst),2*DataWidth);
                next(overflw) = UpperWord(AluOut);
                next(Reg[rd]) = LowerWord(AluOut);
                }
            (opc==ADDIU) do {
                AluOut = nat2bv(bv2nat(Reg[rs1]) + bv2nat(cst),2*DataWidth);
                next(overflw) = UpperWord(AluOut);
                next(Reg[rd]) = LowerWord(AluOut);
                }
            (opc==SUBI) do {
                AluOut = int2bv(bv2int(Reg[rs1]) - bv2int(cst),2*DataWidth);
                next(overflw) = UpperWord(AluOut);
                next(Reg[rd]) = LowerWord(AluOut);
                }
            (opc==SUBIU) do {
                AluOut = nat2bv(bv2nat(Reg[rs1]) - bv2nat(cst),2*DataWidth);
                next(overflw) = UpperWord(AluOut);
                next(Reg[rd]) = LowerWord(AluOut);
                }
            (opc==MULI) do {
                AluOut = int2bv(bv2int(Reg[rs1]) * bv2int(cst),2*DataWidth);
                next(overflw) = UpperWord(AluOut);
                next(Reg[rd]) = LowerWord(AluOut);
                }
            (opc==MULIU) do {
                AluOut = nat2bv(bv2nat(Reg[rs1]) * bv2nat(cst),2*DataWidth);
                next(overflw) = UpperWord(AluOut);
                next(Reg[rd]) = LowerWord(AluOut);
                }
            (opc==DIVI) do {
                next(overflw) = int2bv(bv2int(Reg[rs1]) % bv2int(cst),DataWidth);
                next(Reg[rd]) = int2bv(bv2int(Reg[rs1]) / bv2int(cst),DataWidth);
                }
            (opc==DIVIU) do {
                next(overflw) = nat2bv(bv2nat(Reg[rs1]) % bv2nat(cst),DataWidth);
                next(Reg[rd]) = nat2bv(bv2nat(Reg[rs1]) / bv2nat(cst),DataWidth);
                }
            // ----------------------------------------------------------------
            // comparison instructions
            // ----------------------------------------------------------------
            (opc==SLT & !vct) do 
                next(Reg[rd]) = (bv2int(Reg[rs1]) <  bv2int(Reg[rs2])?One:Zero);
            (opc==SLTU & !vct) do 
                next(Reg[rd]) = (bv2nat(Reg[rs1]) <  bv2nat(Reg[rs2])?One:Zero);
            (opc==SLE & !vct) do 
                next(Reg[rd]) = (bv2int(Reg[rs1]) <= bv2int(Reg[rs2])?One:Zero);
            (opc==SLEU & !vct) do 
                next(Reg[rd]) = (bv2nat(Reg[rs1]) <= bv2nat(Reg[rs2])?One:Zero);
            (opc==SEQ & !vct) do
                next(Reg[rd]) =  (Reg[rs1] == Reg[rs2]?One:Zero);
            (opc==SNE & !vct) do
                next(Reg[rd]) = !(Reg[rs1] != Reg[rs2]?One:Zero);
            // ----------------------------------------------------------------
            // logic instructions
            // ----------------------------------------------------------------
            (opc==AND)  do next(Reg[rd]) =   Reg[rs1] & Reg[rs2];
            (opc==OR)   do next(Reg[rd]) =   Reg[rs1] |  Reg[rs2];
            (opc==NAND) do next(Reg[rd]) = !(Reg[rs1] & Reg[rs2]);
            (opc==NOR)  do next(Reg[rd]) = !(Reg[rs1] | Reg[rs2]);
            // ----------------------------------------------------------------
            // load and store instructions
            // ----------------------------------------------------------------
            (opc==LD | opc==LL | opc==ST | opc==SC | opc==SYNC & fnc==fn_SYNC) do {
                // apply for memory access by emitting reqMem until
                // ackMem holds; may take time on multiprocessors
                weak immediate abort {
                    loop {
                        emit(reqMem);
                        if(!ackMem) next(pc) = pc;
                        waitMem1: pause;
                    }
                } when(ackMem);
                // provide address and read/write request signal
                // until memory transaction is done
                weak immediate abort {
                    loop {
                        adrMem = bv2nat(Reg[rs1]) + bv2nat(cst);
                        case
                            (opc==LD)   do emit(readMem);
                            (opc==LL)   do emit(readMem);
                            (opc==ST)   do {emit(writeMem); dataMem = Reg[rd];}
                            (opc==SC)   do {emit(writeMem); dataMem = Reg[rd];}
                            (opc==SYNC) do {emit(readMem); emit(writeMem);}
                        default nothing;
                        if(!doneMem) next(pc) = pc;
                        waitMem2: pause;
                    }
                } when(doneMem);
                // in case of load, store the data in the register
                if(opc==LD | opc==LL)
                    next(Reg[rd]) = dataMem;
                }
            // ----------------------------------------------------------------
            // moving constants to registers
            // ----------------------------------------------------------------
            (opc==MOV)  do next(Reg[rd]) = int2bv(bv2int(fnc),DataWidth);
            (opc==MOVU) do next(Reg[rd]) = nat2bv(bv2nat(fnc),DataWidth);
            // ----------------------------------------------------------------
            // branch and jump instructions
            // ----------------------------------------------------------------
            (opc==BEZ) do 
                next(pc) = (Reg[rd]==Zero?pc+bv2int(Reg[rs1])+bv2int(cst):pc+1);
            (opc==BNZ) do 
                next(pc) = (Reg[rd]!=Zero?pc+bv2int(Reg[rs1])+bv2int(cst):pc+1);
            (opc==JMP) do 
                next(pc) = pc+bv2int(Reg[rd])+bv2int(fnc);
            (opc==J) do 
                next(pc) = pc+bv2int(adr);
            // ----------------------------------------------------------------
            // move content of overflow register to destination register rd
            // ----------------------------------------------------------------
            (opc==OVF & fnc==fn_OVF) do 
                next(Reg[rd]) = overflw;
            // ----------------------------------------------------------------
        default nothing;
        // --------------------------------------------------------------------
        // if no branch instruction is executed, the pc has to be incremented
        // to fetch the next instruction
        // --------------------------------------------------------------------
        if(opc!=BEZ & opc!=BNZ & opc!=JMP & opc!=J)
            next(pc) = pc+1;
    }
}