// ************************************************************************** // // // // eses eses // // eses eses // // eses eseses esesese eses Embedded Systems Group // // ese ese ese ese ese // // ese eseseses eseseses ese Department of Computer Science // // eses eses ese eses // // eses eseses eseseses eses University of Kaiserslautern // // eses eses // // // // ************************************************************************** // // This file implements a transport triggered architecture whose function // // units are connected by a bus with several sockets and lanes. All input and // // output ports of each functional unit have a unique address so that data can// // be transported from each of these output ports to each of the input ports. // // The register file has four inputs and can read and write to one of its // // registers in parallel. The control unit is able to deal with the following // // move instructions: // // // // * 00@src@tgt: data transfer from output port src to input port tgt // // * 01@cst@tgt: data transfer of constant cst to input port tgt // // * 10@src@cst: add cst to pc if value received from port src is non-zero // // * 11@src@tgt: no operation // // // // The control unit maintains the program counter pc and reads the program // // memory for fetching the next instruction to be executed. The general way // // of execution is that the control unit decodes the instruction, and puts the// // addresses for a data transfer on the bus. The function units with these // // addresses will automatically answer, and as a side effect, new results are // // computed when a function unit receives new input. In more detail: // // If a "00" instruction is fetched, then the control unit puts the addresses// // src and tgt found in the instruction on the bus, so that the function unit // // having an output with address src will put the corresponding value on the // // bus, and the function unit having an input with address tgt will read this // // value from the bus. // If a "01" instruction is fetched, then the control unit puts the addresses// // tgt and the constant cst as the value on the bus, and the function unit // // having an input with address tgt will read this value from the bus. Since // // the src part on bus is reset to zero, and zero is not used as an address, // // no function unit will also put a value on the bus. // // If a "10" instruction is fetched, then the control unit puts address src // // on the bus, so that the function unit having an output with address src // // will put the corresponding value on the bus. The control unit will then // // read the value, and if it is not zero, then the value cst is added to the // // current pc. // // If a "11" instruction is fetched, then the control unit puts nothing on // // the bus, and therefore no other function unit will answer. // // ************************************************************************** // macro NumPorts = 37; macro NumBus = 4; // number of busses macro AW = 8; // address width macro DW = 8; // data width macro PL = 10; // length of program // macros to construct machine instructions macro MvData(src,tgt) = 0b00@nat2bv(src,AW)@nat2bv(tgt,AW); macro MvCnst(cst,tgt) = 0b01@int2bv(cst,AW)@nat2bv(tgt,AW); macro RdCnst(src,cst) = 0b10@nat2bv(src,AW)@int2bv(cst,AW); macro NOP = 0b11@{false::AW}@{false::AW}; // addresses of the ports of the function units macro REG_ARG1 = 1; macro REG_ARG2 = 2; macro REG_ARG3 = 3; macro REG_RES = 4; macro ADDU_ARG1 = 5; macro ADDU_ARG2 = 6; macro ADDU_RES1 = 7; macro ADDU_RES2 = 8; macro SUBU_ARG1 = 9; macro SUBU_ARG2 = 10; macro SUBU_RES1 = 11; macro SUBU_RES2 = 12; macro MULU_ARG1 = 13; macro MULU_ARG2 = 14; macro MULU_RES1 = 15; macro MULU_RES2 = 16; macro DIVU_ARG1 = 17; macro DIVU_ARG2 = 18; macro DIVU_RES1 = 19; macro DIVU_RES2 = 20; macro ADDS_ARG1 = 21; macro ADDS_ARG2 = 22; macro ADDS_RES1 = 23; macro ADDS_RES2 = 24; macro SUBS_ARG1 = 25; macro SUBS_ARG2 = 26; macro SUBS_RES1 = 27; macro SUBS_RES2 = 28; macro MULS_ARG1 = 29; macro MULS_ARG2 = 30; macro MULS_RES1 = 31; macro MULS_RES2 = 32; macro DIVS_ARG1 = 33; macro DIVS_ARG2 = 34; macro DIVS_RES1 = 35; macro DIVS_RES2 = 36; macro CMPU_ARG1 = 37; macro CMPU_ARG2 = 38; macro CMPU_RES1 = 39; // equal macro CMPU_RES2 = 40; // not equal macro CMPU_RES2 = 41; // less than macro CMPU_RES2 = 42; // less than or equal macro CMPU_RES2 = 43; // greater than macro CMPU_RES2 = 44; // greater than or equal macro CMPS_ARG1 = 45; macro CMPS_ARG2 = 46; macro CMPS_RES1 = 47; // equal macro CMPS_RES2 = 48; // not equal macro CMPS_RES2 = 49; // less than macro CMPS_RES2 = 50; // less than or equal macro CMPS_RES2 = 51; // greater than macro CMPS_RES2 = 52; // greater than or equal // macros for accessing contained data macro OpcInstr(x) = x{2*AW+1:2*AW}; // operation code of instruction macro SrcInstr(x) = x{2*AW-1:AW}; // target address or constant operand macro TgtInstr(x) = x{AW-1:0}; // source address or constant operand macro SrcBus(x) = x.0; macro TgtBus(x) = x.1; macro ValBus(x) = x.2; module CPU([PL][NumBus]bv{2*AW+2} ?prog,nat{PL+1} pc) { event [NumBus](bv{AW} * bv{AW} * bv{DW}) bus; event branch; event [NumPorts]bool e; event [NumPorts]bv{DW} x; // ------------------------------------------------------------------------- // the control unit // ------------------------------------------------------------------------- { pause; loop { for(i=0..NumBus-1) { let(instr = prog[pc][i]) case (OpcInstr(instr) == 0b00) do { // move data from SrcInstr(instr[i]) to TgtInstr(instr[i]) SrcBus(bus[i]) = SrcInstr(instr); TgtBus(bus[i]) = TgtInstr(instr); // ValBus(bus[i]) will be defined by function unit // having output address SrcInstr(instr[i]) } (OpcInstr(instr) == 0b01) do { // move constant SrcInstr(instr[i]) to TgtInstr(instr[i]) SrcBus(bus[i]) = {false::AW}; TgtBus(bus[i]) = TgtInstr(instr); ValBus(bus[i]) = SrcInstr(instr); } (OpcInstr(instr) == 0b10) do { // move data from SrcInstr(instr[i]) to control unit // and modify the pc if the received value is not zero SrcBus(bus[i]) = SrcInstr(instr); TgtBus(bus[i]) = {false::AW}; // ValBus(bus[i]) will be defined by function unit // having output address SrcInstr(instr[i]) if(bv2nat(ValBus(bus[i]))!=0) { next(pc) = pc + bv2int(TgtInstr(instr)); emit(branch); } } default { // no operation nothing; SrcBus(bus[i]) = {false::AW}; TgtBus(bus[i]) = {false::AW}; ValBus(bus[i]) = {false::DW}; } } if(!branch) next(pc) = pc + 1; pause; }} // ------------------------------------------------------------------------- // function units // ------------------------------------------------------------------------- || BC_REG: BusRegConnect(bus,e[REG_ARG1],e[REG_ARG2],e[REG_ARG3],e[REG_RES], x[REG_ARG1],x[REG_ARG2],x[REG_ARG3],x[REG_RES], REG_ARG1, REG_ARG2, REG_ARG3, REG_RES); || REG: REG(e[REG_ARG1],e[REG_ARG2],e[REG_ARG3],e[REG_RES], x[REG_ARG1],x[REG_ARG2],x[REG_ARG3],x[REG_RES]); || BC_ADDU: BusConnect(bus,e[ADDU_ARG1],e[ADDU_ARG2],e[ADDU_RES1],e[ADDU_RES2], x[ADDU_ARG1],x[ADDU_ARG2],x[ADDU_RES1],x[ADDU_RES2], ADDU_ARG1, ADDU_ARG2, ADDU_RES1, ADDU_RES2); || ADDU: ADDU(e[ADDU_ARG1],e[ADDU_ARG2],e[ADDU_RES1],e[ADDU_RES2], x[ADDU_ARG1],x[ADDU_ARG2],x[ADDU_RES1],x[ADDU_RES2]); || BC_SUBU: BusConnect(bus,e[SUBU_ARG1],e[SUBU_ARG2],e[SUBU_RES1],e[SUBU_RES2], x[SUBU_ARG1],x[SUBU_ARG2],x[SUBU_RES1],x[SUBU_RES2], SUBU_ARG1, SUBU_ARG2, SUBU_RES1, SUBU_RES2); || SUBU: SUBU(e[SUBU_ARG1],e[SUBU_ARG2],e[SUBU_RES1],e[SUBU_RES2], x[SUBU_ARG1],x[SUBU_ARG2],x[SUBU_RES1],x[SUBU_RES2]); || BC_MULU: BusConnect(bus,e[MULU_ARG1],e[MULU_ARG2],e[MULU_RES1],e[MULU_RES2], x[MULU_ARG1],x[MULU_ARG2],x[MULU_RES1],x[MULU_RES2], MULU_ARG1, MULU_ARG2, MULU_RES1, MULU_RES2); || MULU: MULU(e[MULU_ARG1],e[MULU_ARG2],e[MULU_RES1],e[MULU_RES2], x[MULU_ARG1],x[MULU_ARG2],x[MULU_RES1],x[MULU_RES2]); || BC_DIVU: BusConnect(bus,e[DIVU_ARG1],e[DIVU_ARG2],e[DIVU_RES1],e[DIVU_RES2], x[DIVU_ARG1],x[DIVU_ARG2],x[DIVU_RES1],x[DIVU_RES2], DIVU_ARG1, DIVU_ARG2, DIVU_RES1, DIVU_RES2); || DIVU: DIVU(e[DIVU_ARG1],e[DIVU_ARG2],e[DIVU_RES1],e[DIVU_RES2], x[DIVU_ARG1],x[DIVU_ARG2],x[DIVU_RES1],x[DIVU_RES2]); || BC_ADDS: BusConnect(bus,e[ADDS_ARG1],e[ADDS_ARG2],e[ADDS_RES1],e[ADDS_RES2], x[ADDS_ARG1],x[ADDS_ARG2],x[ADDS_RES1],x[ADDS_RES2], ADDS_ARG1, ADDS_ARG2, ADDS_RES1, ADDS_RES2); || ADDS: ADDS(e[ADDS_ARG1],e[ADDS_ARG2],e[ADDS_RES1],e[ADDS_RES2], x[ADDS_ARG1],x[ADDS_ARG2],x[ADDS_RES1],x[ADDS_RES2]); || BC_SUBS: BusConnect(bus,e[SUBS_ARG1],e[SUBS_ARG2],e[SUBS_RES1],e[SUBS_RES2], x[SUBS_ARG1],x[SUBS_ARG2],x[SUBS_RES1],x[SUBS_RES2], SUBS_ARG1, SUBS_ARG2, SUBS_RES1, SUBS_RES2); || SUBS: SUBS(e[SUBS_ARG1],e[SUBS_ARG2],e[SUBS_RES1],e[SUBS_RES2], x[SUBS_ARG1],x[SUBS_ARG2],x[SUBS_RES1],x[SUBS_RES2]); || BC_MULS: BusConnect(bus,e[MULS_ARG1],e[MULS_ARG2],e[MULS_RES1],e[MULS_RES2], x[MULS_ARG1],x[MULS_ARG2],x[MULS_RES1],x[MULS_RES2], MULS_ARG1, MULS_ARG2, MULS_RES1, MULS_RES2); || MULS: MULS(e[MULS_ARG1],e[MULS_ARG2],e[MULS_RES1],e[MULS_RES2], x[MULS_ARG1],x[MULS_ARG2],x[MULS_RES1],x[MULS_RES2]); || BC_DIVS: BusConnect(bus,e[DIVS_ARG1],e[DIVS_ARG2],e[DIVS_RES1],e[DIVS_RES2], x[DIVS_ARG1],x[DIVS_ARG2],x[DIVS_RES1],x[DIVS_RES2], DIVS_ARG1, DIVS_ARG2, DIVS_RES1, DIVS_RES2); || DIVS: DIVS(e[DIVS_ARG1],e[DIVS_ARG2],e[DIVS_RES1],e[DIVS_RES2], x[DIVS_ARG1],x[DIVS_ARG2],x[DIVS_RES1],x[DIVS_RES2]); } drivenby p1 { // ------------------------------------------------------------------------- // The following implements a program to compute the sum of the first n=10 // natural numbers. To this end, we send n+1 and 1 as arguments to SUBS, so // that 10 is obtained as SUBS_RES1. Then, the VLIW instruction prog[1] // repeats itself until SUBS_RES1 is zero. Note that even though it will be // finally zero, which lets the CU increase the pc to 2, SUBS and ADDU will // perform further action in parallel (so that we need to use SUBS instead // of SUBU to avoid a runtime error). Note further that the register file // is not used in this program except for storing the final result! // ------------------------------------------------------------------------- prog[0][0] = MvCnst(11,SUBS_ARG1); // 11 --> SUBS.ARG1 prog[0][1] = MvCnst( 1,SUBS_ARG2); // 1 --> SUBS.ARG2 prog[0][2] = NOP; prog[0][3] = NOP; prog[1][0] = MvData(SUBS_RES1,SUBS_ARG1); prog[1][1] = MvData(SUBS_RES1,ADDU_ARG1); prog[1][2] = MvData(ADDU_RES1,ADDU_ARG2); prog[1][3] = RdCnst(SUBS_RES1,0); prog[2][0] = MvCnst(1,REG_ARG1); prog[2][1] = MvData(ADDU_RES1,REG_ARG2); // ADDU.RES -> reg[1] prog[2][2] = NOP; prog[2][3] = NOP; pause; // is required since all FUs start with a (non-immediate) await await(pc==2); } drivenby p2 { // ------------------------------------------------------------------------- // The following program implements y[i] := y[i] + a * x[i] where x and y // are in registers reg[0..15] and reg[16..31], respectively, and a=3. The // instructions prog[0..1] will first write values reg[i]:=i to this end. // If prog[6] is reached, we should have x[0..15] = reg[0..15] = 0..15 and // y[0..15] = reg[16..31] = 16 20 24 .. 76. // ------------------------------------------------------------------------- prog[0][0] = MvCnst(32,SUBS_ARG1); prog[0][1] = MvCnst( 1,SUBS_ARG2); prog[0][2] = NOP; prog[0][3] = NOP; prog[1][0] = MvData(SUBS_RES1,SUBS_ARG1); prog[1][1] = MvData(SUBS_RES1,REG_ARG1); prog[1][2] = MvData(SUBS_RES1,REG_ARG2); prog[1][3] = RdCnst(SUBS_RES1,0); prog[2][0] = MvCnst(32,SUBU_ARG1); prog[2][1] = MvCnst( 1,SUBU_ARG2); prog[2][2] = MvCnst(16,SUBS_ARG1); prog[2][3] = MvCnst( 1,SUBS_ARG2); prog[3][0] = MvCnst(3,MULU_ARG1); prog[3][1] = MvData(REG_RES,MULU_ARG2); prog[3][2] = MvData(SUBS_RES1,REG_ARG3); prog[3][3] = NOP; prog[4][0] = MvData(MULU_RES1,ADDU_ARG1); prog[4][1] = MvData(REG_RES,ADDU_ARG2); prog[4][2] = MvData(SUBU_RES1,REG_ARG3); prog[4][3] = MvData(SUBS_RES1,SUBS_ARG1); prog[5][0] = MvData(SUBU_RES1,REG_ARG1); prog[5][1] = MvData(ADDU_RES1,REG_ARG2); prog[5][2] = MvData(SUBU_RES1,SUBU_ARG1); prog[5][3] = RdCnst(SUBS_RES1,-2); prog[6][0] = NOP; prog[6][1] = NOP; prog[6][2] = NOP; prog[6][3] = NOP; pause; // is required since all FUs start with a (non-immediate) await await(pc==6); }