// the module CacheMESI implements the cache running MESI protocol in a multi-processor architecture.
// in the multi-processor architecture, each CPU is attached a cache. When the CPU is trying to do a
// read / write operation, it first checks if the corresponding line is in the cache. If not, we have
// a cache miss. The cache the communicates with other caches / memory w.r.t. the MESI protocol.
//
// If it is necessary to access the memory, the cache first have to compete for the bus.
// When it occupied the bus, it then checks if the state of the memory is busy or not.
// If the memory is busy, the cache has to release the bus, and re-apply for the bus again
// to check if the memory is still busy. Only when the cache get the bus and the memory is free,
// can the cache proceed the memory access.

macro NumProcs   = 2;      // number of processors

macro DataWidth   =  8;     // bit-width of registers, i.e. data words
macro ByteSize    = 256;    // nat{ByteSize} is the data type used for bytes
// macro WordSize    = 1;      // the number of bytes within a word
macro MemSize     = 10;     // size of main memory in words, i.e. bv{DataWidth}
macro NumOfLines  =  2;     // number of cache lines
macro LineSize    =  2;     // size of cache lines in words, i.e. bv{DataWidth}
macro NumOfSets   =  2;     // set associativity

macro DivUp(n,m) = ((n-1)/m)+1; // compute n/m, but round towards infinity
macro CacheSize  = NumOfLines * NumOfSets * LineSize;
macro LinesInMem = DivUp(MemSize,LineSize);
macro TagSize    = DivUp(LinesInMem,NumOfLines);
macro LineIdRng  = MemSize % NumOfLines;

// states of the memory
macro MemBusy    = 0;      // memory is busy working
macro MemFree    = 1;      // memory is free
macro MemDone    = 2;      // memory has finished working and loads the bus with valid data

// MESI states of cache lines
macro Modified  = 0;    // if cache line is modified in the cache
macro Exclusive = 1;    // if cache line is exclusive, but not modified
macro Shared    = 2;    // if cache line is held in several caches
macro Invalid   = 3;    // if cache line is not valid

// bus messages of CPU that owns the bus
macro ReadLine  = 1; // if CPU that owns the bus wants to read line with address busAdr
macro WriteLine = 2; // if CPU that owns the bus wants to write line with address busAdr

// bus messages of CPU that owns a requested line
macro NoMessage = 0;            // no message on the bus ????
macro FromOtherCacheClean = 1;  // if the CPU sends a unmodified line on the bus
macro FromOtherCacheDirty = 2;  // if the CPU sends a   modified line on the bus 
//macro FromMemory = 3;           // if the CPU does not send a line on the bus, thus
                                // the memory reacts

// inputs:
//   nat{NumProcs} ? pidIn,       the id of the CPU
//   event bool ? reqCache,       the signal indicating CPU tries to access the cache
//   event bool ? ackMemBus,      the memory grants the access for the bus
//   event bool ? ackCacheBus,    the cache is granted to broadcast to other caches
//   nat{3} stateMem,             the state of the memory
//   event bool ? readCPU,        the CPU requires to read the cache
//   event bool ? writeCPU,       the CPU requires to write the cache
// outputs:
//   event bool ! ackCache,       the cache grants CPU for accessing the cache, for the interface of SingleProcessor
//   event bool ! doneCache,      the cache fetched the data needed by the CPU
//   event bool ! reqMemBus,      the cache requires access to the memory
//   event bool ! reqCacheBus,    the cache requires to broadcast to other caches
//   event bool ! readMem,        the cache requires reading the memory
//   event bool ! writeMem,       the cache requires writing the memory
// inouts:
//   event nat{NumProcs} pidOut,  the bus holding the id of a CPU
//   event nat{MemSize} adrMem,   the address of the word in memory to access
//   event bv{DataWidth} dataMem, the data to be read from / written to
//   event bool snoop,            the signal indicating a snoop requesting the bus
module CacheMESI(
       nat{NumProcs} ? pidIn,
       event nat{NumProcs} pidOut,
       event bool ? reqCache, ! ackCache, ! doneCache,
       event bool ! reqMemBus, ? ackMemBus,
       // event bool ! reqCacheBus, ? ackCacheBus,
       nat{3} ? stateMem,
       //event nat{MemSize} ? adrCPU,
       event nat ? adrCPU,
       event bv{DataWidth} dataCPU, 
       // event nat{ByteSize} dataCPU,
       //event nat{MemSize} adrMem,
       event nat adrMem,
       // event [LineSize]nat{ByteSize} dataMem,
       event [LineSize]bv{DataWidth} dataMem,
       //event nat{MemSize} adrCache,
       //event [LineSize]nat{ByteSize} dataCache,
       event bool snoop,
       event bool ?readCPU, ?writeCPU,
       event bool !readMem, !writeMem,
       event nat{3} BusMsgOut, 
       event nat{4} BusMsgIn
    ){
    
    // The following local variables implement the storage of the cache,
    // i.e., for each cache line and each set, we have a state (see below),
    // a tag and the corresponding data in the cache line (which consists
    // of ByteSize many bytes.
    
    [NumOfLines][NumOfSets]nat{4} stateC;
    [NumOfLines][NumOfSets]nat{5} tagC;
    [NumOfLines][NumOfSets][LineSize]bv{DataWidth} lineC;

    // the following auxiliary variables are used for read/write:
    nat{LineSize} adrInL;                             // address of word in line
    nat{MemSize/LineSize} lineAdr;                    // line address of word
    nat{MemSize/LineSize} tag;                        // expected tag of the line 
    nat{NumOfLines} lineId;                           // index of line if in cache
    nat{NumOfSets} set;                               // set index in case of hit
    event invalid;                                     // signal invalidating other caches
    event hit;                                        // true if word is in cache
    event free;                                       // false if a line need to be written back
    
    // the following auxiliary variables are used for snoop:
    nat{LineSize} adrInLSNP;                          // address of word in line
    nat{MemSize/LineSize} lineAdrSNP;                 // line address of word
    nat{MemSize/LineSize} tagSNP;                     // expected tag of the line 
    nat{NumOfLines} lineIdSNP;                        // index of line if in cache
    nat{NumOfSets} setSNP;                            // set index in case of hit
    event hitSNP;                                     // true if word is in cache

    // bool snoopArbiter;                                // arbitration for snooping and read/write operations

    // initialize the cache for testing
    stateC[0][0]=Invalid;
    stateC[0][1]=Invalid;
    stateC[1][0]=Invalid;
    stateC[1][1]=Invalid;
    
    loop {
        if(reqCache) {
            // CPU wants to read or write a word from address adrCPU.
            // An assumption of the CPU:
            // the CPU first emit reqCache and waits for ackCache. The cache should immediately
            // reply by emitting ackCache. Then the CPU sends its id, address and data as well as
            // the required operation in the following cycles, and waits until the doneCPU signal. 
            // In the meantime, the cache reads these data and tries to proceed.
            emit(ackCache);
            
            // We first determine the index and tag of the cached line and the address in the line
            adrInL  = adrCPU % LineSize;   // address of the word in the requested line
            lineAdr = adrCPU / LineSize;   // line number of the requested word
            lineId = lineAdr % NumOfLines;  // index of the addressed line in cache
            tag    = adrCPU / NumOfLines; // expected tag of the cached line

            // test whether a set contains the requested line in a valid state
            for(s=0..NumOfSets-1)
                if(tagC[lineId][s]==tag & stateC[lineId][s]!=Invalid) {
                    set = s;
                    // in principle, the above condition says that we have a cache hit;
                    // however, write accesses to lines in state shared are also misses
                    // didn't consider when readCPU & writeCPU both are true
                    hit = readCPU | stateC[lineId][s]!=Shared;
                    // if a write to shared line, the line in other caches
                    // need to be invalidated
                    if(stateC[lineId][s]==Shared)
                        invalid = true;
                    else invalid = false;
                }
            if(hit) {
                // cache hit; no memory transaction required
                // thus, we instruct CPU to immediately send data
                // and the CPU may proceed afterwards with its work
                next(doneCache) = true; 
                if(readCPU)
                    next(dataCPU) = lineC[lineId][set][adrInL];
                else if (writeCPU) {
                    if(stateC[lineId][set]==Exclusive)
                        next(stateC[lineId][set]) = Modified;
                    next(lineC[lineId][set][adrInL]) = dataCPU;
                } // make room for the case when readCPU & writeCPU both are true
            } else {
                // cache missed; the cache has to fetch the needed data
                // from other caches, or even from the main memory
                // first the cache checks if it has any free room
                // if a write on shared line, then "set" is the place,
                // else we find a set that is not Modified.
                if (!invalid){ // be aware of the write confliction!
                    if(stateC[lineId][0]!=Modified) {
                        set = 0;
                        emit(free);
                    } else {
                        for(s=1..NumOfSets-1)
                            if(stateC[lineId][s-1]==Modified & stateC[lineId][s]!=Modified) {
                                set = s;
                                emit(free);
                            }
                    }
                }
               // all sets are modified, a write back is needed
               // for making a free room for a line
                if (!free) {
                    // write the first line back
                    set = 0; // ?also set the stateC flag?
                    // if there is a need from the snoop, leave the bus for snooping
                    immediate suspend{
                        emit(reqMemBus);
                        // wait until the cache gets both the bus and the memory
                        while(!(ackMemBus & stateMem==MemFree)) {
                            pause; 
                            emit(reqMemBus); 
                        }
                    }
                    when(snoop);
                    // write back the cache line
                    writeMem = true;
                    adrMem = tagC[lineId][set] * NumOfLines; // the address of the line in memory
                    for(i=0..LineSize-1)
                       dataMem[i] =lineC[lineId][set][i];
                    
                    // release the bus
                    pause;
                }
                // try asking for other caches for the data
                // if there is a need from the snoop, leave the bus for snooping
                immediate suspend{
                    emit(reqMemBus);
                    // wait until the cache gets both the bus
                    // no need for the memory!
                    while(!ackMemBus) {
                        pause; 
                        emit(reqMemBus); 
                    }
                }
                when(snoop);
                // send the requirements
                if (readCPU)
                    BusMsgOut = ReadLine;
                else if (writeCPU)  // TODO: do we need other cache to send back the line first, then overwrite?
                    BusMsgOut = WriteLine;
                pidOut = pidIn; // broadcast its own pid to the bus
                adrMem = adrCPU; // the address passed to the cache bus
                // release the cache bus
                pause;
                // check for the feedbacks from other caches
                // if nobody replies, then access to memory is needed
                if(BusMsgIn==NoMessage) {
                    // if a write, do the write directly on the cache without writing it back to memory--write back cache
                    if (writeCPU) {
                        next(lineC[lineId][set][adrInL]) = dataCPU;
                        next(stateC[lineId][set]) = Modified;
                        next(tagC[lineId][set]) = tag;
                    }
                    else if (readCPU) {  // else apply for the read from memory {
                        // apply for the memory bus
                        // if there is a need from the snoop, leave the bus for snooping
                        immediate suspend{
                            emit(reqMemBus);
                            // wait until the cache gets both the bus and the memory
                            while(!(ackMemBus & stateMem==MemFree)) {
                                pause; 
                                emit(reqMemBus); 
                            }
                        } when(snoop);
                        readMem = readCPU;
                        adrMem = adrCPU;
                        pause;                 
                        // wait for the memory. when the memory is done, it is sure that
                        // the data is valid for this cache, as it is the only cache that occupied the memory
                        immediate await(stateMem==MemDone);
                        // update the cache line
                        next(tagC[lineId][set]) = tag; // adrMem from bus should be equal to adrCPU from CPU
                        for(i=0..LineSize-1)
                            next(lineC[lineId][set][i]) = dataMem[i];
                        // return read value to CPU
                        next(dataCPU) = dataMem[adrInL];
                        next(stateC[lineId][set]) = Exclusive;
                    } 
                } else if (BusMsgIn==FromOtherCacheClean){ // some other cache has the up-to-date data
                    if (readCPU) { // if reading, change state of cache line to shared
                        next(stateC[lineId][set]) = Shared;
                        next(dataCPU) = dataMem[adrInL];
                        for(i=0..LineSize-1){
                            next(lineC[lineId][set][i]) = dataMem[i];
                        }
                    }
                    else if(writeCPU) {
                        next(stateC[lineId][set]) = Modified;
                        for(i=0..LineSize-1){
                            if (i!=adrInL) next(lineC[lineId][set][i]) = dataMem[i]; // keep the untouched word of line
                            else next(lineC[lineId][set][i]) = dataCPU; // only update the written word
                        }
                    }
                    // update the cache line
                    next(tagC[lineId][set]) = tag; // adrMem from bus should be equal to adrCPU from CPU

                } else if (BusMsgIn==FromOtherCacheDirty) { // some other cache has the modified data
                    if (readCPU) { // if reading, change state of cache line to exclusive
                        next(stateC[lineId][set]) = Exclusive;
                        next(dataCPU) = dataMem[adrInL];
                        for(i=0..LineSize-1){
                            next(lineC[lineId][set][i]) = dataMem[i];
                        }
                    } else if (writeCPU) {
                        next(stateC[lineId][set]) = Modified;
                        for(i=0..LineSize-1){
                            if (i!=adrInL) next(lineC[lineId][set][i]) = dataMem[i]; // keep the untouched word of line
                            else next(lineC[lineId][set][i]) = dataCPU; // only update the written word
                        }
                    }
                    // update the cache line
                    next(tagC[lineId][set]) = tag; // adrMem from bus should be equal to adrCPU from CPU
                } // by now, the data are fatched from the bus,
                  // provided by either other CPU's caches, or the memory
                // finally, emit the signal telling the CPU that the data is ready.
                next(doneCache) = true;
            }
        }
        pause;
    } // end of loop
    ||
    loop { // snooping
        if ((BusMsgOut == ReadLine | BusMsgOut == WriteLine) & (pidIn != pidOut)) { // some cache needs to read a line
           // We first determine the index and tag of the cached line and the address in the line
           // adrInLSNP  = adrCache % LineSize;     // address of the word in the requested line
           lineAdrSNP = adrMem / LineSize;          // line number of the requested word
           lineIdSNP  = lineAdrSNP % NumOfLines;    // index of the addressed line in cache
           tagSNP     = adrMem / NumOfLines;        // expected tag of the cached line
           // check if local cache has the line
            for(s=0..NumOfSets-1)
                // for modified / shared / exclusive, the matched line should be send back
                if(tagC[lineIdSNP][s]==tagSNP & stateC[lineIdSNP][s]!=Invalid) {
                    setSNP = s;
                    hitSNP = true;
            }
            // if had the line, try transfer it to the cache bus
            if(hitSNP) {
                if(BusMsgOut == ReadLine) {
                    pause;
                    emit(snoop); // block all the memory transactions on bus
                    // directly write back the shared data
                    for(i=0..LineSize-1)
                        dataMem[i] = lineC[lineIdSNP][setSNP][i];
                    // change the state of line accordingly
                    if(stateC[lineIdSNP][setSNP]==Shared) {
                        BusMsgIn = FromOtherCacheClean;
                    } else if(stateC[lineIdSNP][setSNP]==Modified) {
                        BusMsgIn = FromOtherCacheDirty;
                        next(stateC[lineIdSNP][setSNP]) = Invalid;
                    } else if(stateC[lineIdSNP][setSNP]==Exclusive) {
                        BusMsgIn = FromOtherCacheClean;
                        next(stateC[lineIdSNP][setSNP]) = Shared;
                    }
                } else if(BusMsgOut == WriteLine) {
                    pause;
                    if (stateC[lineIdSNP][setSNP] != Invalid) {
                        // shoud we send a BusMsgIn ?
                        // if we send a message, then the memory is not written.
                        emit(snoop);
                        for(i=0..LineSize-1) // transfer the data onto the bus
                            dataMem[i] = lineC[lineIdSNP][setSNP][i];
                        BusMsgIn = FromOtherCacheDirty; // this could also be FromOtherCacheClean, since the reactions are the same
                        next(stateC[lineIdSNP][setSNP]) = Invalid;
                    }
                }// end of ReadLine / WriteLine test
            }// end of hitSNP test
        } //end of BusMsgOut test
        pause;
    } // end of snooping loop

    
}