// ************************************************************************** //
//                                                                            //
//    eses                   eses                                             //
//   eses                     eses                                            //
//  eses    eseses  esesese    eses   Embedded Systems Group                  //
//  ese    ese  ese ese         ese                                           //
//  ese    eseseses eseseses    ese   Department of Computer Science          //
//  eses   eses          ese   eses                                           //
//   eses   eseses  eseseses  eses    University of Kaiserslautern            //
//    eses                   eses                                             //
//                                                                            //
// ************************************************************************** //
// Given an array a[0..N-1], the algorithm below determines its maximum using //
// O(N^1.5) processors in constant time. Thus, it improves Maximum_O1 in that //
// less processors are required while still requiring only O(1) time.         //
// It is possible to generate another algorithm that applies the algorithm    //
// below analogously to the use of Maximum_O1 here which then yields another  //
// algorithm to compute the maximum in time O(1) using only O(N^1.25) proc.   //
// This can be repeated as often as desired to generate algorithms that run   //
// in time O(1) requiring only O(N^1.5), O(N^1.25), O(N^1.125), ... processors//
// so that we can speak of an algorithm that requires only O(N^{1+eps}) many  //
// processors to compute the maximum in time O(1).                            //
// ************************************************************************** //

macro M = 8;
macro N = M*M;
macro R = 32;

macro isArrayMax(a,x)   = forall(i=0..N-1) (a[i] <= x);
macro isArrayMin(a,x)   = forall(i=0..N-1) (a[i] >= x);
macro contains(a,x)     = exists(i=0..N-1) (a[i] == x);


module Maximum_O1Opt([N]int{R} ?a, int{R} max) {
    event [N]bool m0;
    event [M]bool m1;
    event [M]int{R} a1;

    // apply algorithm Maximum_O1 to each subsequence of size M in parallel; 
    // this requires time O(1) using M^3 processors
    for(k=0..M-1) {
        // compute maximum of block a[k*M..(k+1)*M-1]
        // set m0[i] iff some a[j] is greater than a[i]
        for(i=0..M-1)
            for(j=0..M-1)
                if(a[k*M+i] < a[k*M+j])
                    m0[k*M+i] = true;
        // thus, those i where m[i] is false are the indices of the maximum a[i]
        // note: there might be several such indices (containing the same value)
        for(i=0..M-1)
            if(!m0[k*M+i])
                a1[k] = a[k*M+i];
    }
    // at this stage, we determined the maxima m1[0..M-1] of all subsequences of
    // size M; thus, we finally apply algorithm Maximum_O1 once more to compute
    // the maximum of this array which is also the maximum of array a
    // set m1[i] iff some m0[j] is greater than m0[i]
    for(i=0..M-1)
        for(j=0..M-1)
            if(a1[i] < a1[j])
                m1[i] = true;
    // thus, those i where m1[i] is false are the indices of the maximum a[i]
    // again, there might be several such indices (containing the same value)
    for(i=0..M-1)
        if(!m1[i])
            max = a1[i];
}
satisfies {
    correct : assert(contains(a,max) & isArrayMax(a,max));
}
drivenby {
    for(i=0..N/2-1)
        a[i] = i;
    for(i=N/2..N-1)
        a[i] = N-1-i;
    assert(contains(a,max) & isArrayMax(a,max));
}