// ************************************************************************** // // // // eses eses // // eses eses // // eses eseses esesese eses Embedded Systems Group // // ese ese ese ese ese // // ese eseseses eseseses ese Department of Computer Science // // eses eses ese eses // // eses eseses eseseses eses University of Kaiserslautern // // eses eses // // // // ************************************************************************** // // The module below implements a one-dimensional systolic array for computing // // the edit distance of a list of pairs of sequences a[0..M-1] and b[0..N-1]. // // To this end, the array elements compute the values of the diagonals of the // // corresponding distance matrix d (where d[i][j] is the distance between // // a[0..i-1] and b[0..j-1] as explained in module EditDistance). Since the // // computation of the values of a diagonal requires the values of the previous// // two diagonals, we have to forward the values of each diagonal to its next // // two successor diagonals. To this end, we have added an array e in the two- // // dimensional systolic array EditDistanceSystolic2D. In the one-dimensional // // case below, the array elements alternate with two behaviors: either the // // previously computed value is simply stored and the incoming values of the // // sequence elements are piped through without modifying the internally stored// // value or these values (the internal store and the two incoming values of // // the stream) are used to compute a new diagonal element. // // Sequence a is piped through from right to left while sequence b is piped // // through from left to right, only every other cycle can be used to pipe in a// // value. The picture below shows one computation of the edit distance for M=4// // and N=5: // // // // t==0: [ ] [ ] [ ] [ ] [ ] [d00] [ ] [ ] [ ] [ ] // // t==1: [ ] [ ] [ ] [ ] [d10] [e00] [d01] [ ] [ ] [ ] // // t==2: [ ] [ ] [ ] [d20] [e10] [d11] [e01] [d02] [ ] [ ] // // t==3: [ ] [ ] [d30] [e20] [d21] [e11] [d12] [e02] [d03] [ ] // // t==4: [ ] [ ] [ ] [d31] [e21] [d22] [e12] [d13] [e03] [d04] // // t==5: [ ] [ ] [ ] [ ] [d32] [e22] [d23] [e13] [d14] [ ] // // t==6: [ ] [ ] [ ] [ ] [ ] [d33] [e23] [d24] [ ] [ ] // // t==7: [ ] [ ] [ ] [ ] [ ] [ ] [d34] [ ] [ ] [ ] // // t==8: [ ] [ ] [ ] [ ] [ ] [ ] [ ] [ ] [ ] [ ] // // // // The position of the sequence elements at even and odd points of time are // // therefore as follows: // // // // t=2i: // // a[i] a[i-1] ... a[0] a[0] ... b[0] b[0] ... b[i-1] b[i] // // [] [] ... [] [] ... [] [] ... [] [] // // 0 1 2i-1 2i L-2i-1 L-2i L-2 L-1 // // // // t=2i+1: // // a[i] a[i] ... a[0] a[0] ... b[0] b[0] ... b[i] b[i] // // [] [] ... [] [] ... [] [] ... [] [] // // 0 1 2i 2i+1 L-2i-2 L-2i-1 L-2 L-1 // // // // To determine the length L of the systolic array, we have to make sure that // // L is large enough so that each a[i] meets every element of b[j] and vice // // versa. We moreover demand that L must be an even number so that the two // // occurrences of a[i] meet both occurrences of every b[j]. // // If a[0] leaves the array, it must meet at least at that point of time the// // last element b[N-1] since otherwise b[N-1] is piped in too late, i.e., // // after a[0] has already left the array. This means that a[0] and b[N-1] are // // both at elements L-2 and L-1 which can only happen at an odd point of time,// // i.e.: // // // // t=2i+1 --> 2i+1 = L-1 and i=N-1, thus 2N-1=L-1, and therefore L=2N // // // // Analogously, if b[0] leaves the array, it must meet at least at that point // // of time the last element a[M-1] since otherwise a[M-1] is too late, i.e., // // it enters the array after b[0] has left it. This means that b[0] and a[M-1]// // are both at elements 0 and 1 which can only happen at an odd point of time,// // i.e.: // // // // t=2i+1 --> L-2i-2 = 0 and i=M-1, thus L=2M // // // // To satisfy both conditions, we conclude L>=2max(M,N) and therefore define // // the length L of the array as L:=2max(M,N). // // Using this length L, we can next determine the point of time t0 where the // // first elements a[0] and b[0] meet in the array: // // // // t0=2i --> 2i = L-2i, i.e., 4i = L // // t0=2i+1 --> 2i = L-2i-2, i.e., 4i = L-2 // // // // Thus, if L=4k, then the meeting point is t0=2k, otherwise L=4k+2 must hold // // (since L is even) and then t0=2k+1 follows. Thus, in both cases, we have // // t0=L/2=max(M,N) and a[0] and b[0] meet at this point of time at elements // // with index t0-1 and t0. // // At time t0, the upper left corner element d[1][1] of the diagonal matrix // // is computed, but assigned with a delayed assignment. Hence, the values of // // the first diagonal are available at time t0+1, and in general, diagonal i // // is available at time t0+i. Since there are M+N-1 diagonals, the final value// // is obtained at time t0+M+N-1 = max(M,N)+M+N-1. // // To summarize, the following facts of a computation whose first elements // // entered the array at time t_off (so that both occurrences entered at time // // t_off+1) can be easily derived: // // (1) The elements a[0] and b[0] match at time MAX+t_off. // // (2) Both occurrences of the last elements of the computation entered at // // time 2*MAX-1+t_off. // // (2) The first elements of the next computation entered at 2*MAX+t_off. // // (3) The values of diagonal i=1,…,M+N-1 are available in array x at the // // times MAX+i+t_off. // // (4) The final edit distance is available at time MAX+M+N-1+t_off. // // (5) The elements a[0] and b[0] of the next computation match at time // // 3*MAX+t_off due to (1). Thus, there is an off-time where no diagonal// // elements are in x of length 3*MAX+t_off-(MAX+MAX+MIN-1+t_off), i.e. // // MAX-MIN+1 (note that M+N=MAX+MIN). // // (6) Computations are repeated with period 2*MAX, i.e. the length of the // // array. // // // // Finally, we have to determine the index of the array element where the // // result can be obtained. To this end, note that the last element of the // // shorter and longer sequences entered the array at time 2*MIN-1+t_off and // // 2*MAX-1+t_off. Since the final result is available at time MAX+M+N-1+t_off,// // i.e., 2*MAX+MIN-1+t_off, the last element of the longer sequence moves // // further (2*MAX+MIN-1+t_off)-(2*MAX-1+t_off) = MIN steps while the last one // // of the shorter one moves (2*MAX+MIN-1+t_off)-(2*MIN-1+t_off) = 2*MAX-MIN // // steps. Thus, the index of the final result is: // // (1) if M<=N holds, then 2*MAX-MIN-1 or 2*MAX-MIN (depending on m[i]) // // (2) if M>N holds, then MIN-1 or MIN (depending on m[i]) // // // // For example, computing the edit distance of a = 1334 and b = 20345 followed// // by computing the edit distance of a = 1234 and b = 32345 starting at time // // t_off=2 yields the following computations of the diagonals (the distance // // matrices are shown on the right: // // // // step 7: match of a[0] and b[0] // // step 8: x : - - - - - 1 - - - - 0 | 1 2 3 4 5 // // step 9: x : - - - - 2 1 2 - - - --+---------- // // step 10: x : - - - 3 2 2 2 3 - - 1 | 1 2 3 4 5 // // step 11: x : - - 4 3 3 2 2 3 4 - 2 | 2 2 2 3 4 // // step 12: x : - - - 4 3 2 2 3 4 5 3 | 3 3 2 3 4 // // step 13: x : - - - - 3 2 3 3 4 - 4 | 4 4 3 2 3 // // step 14: x : - - - - - 2 3 4 - - // // step 15: x : - - - - - - 3 - - - // // step 16: off-time // // step 17: off-time and match of a[0] and b[0] // // step 18: x : - - - - - 1 - - - - 0 | 1 2 3 4 5 // // step 19: x : - - - - 2 1 2 - - - --+---------- // // step 20: x : - - - 2 2 1 2 3 - - 1 | 1 2 3 4 5 // // step 21: x : - - 3 2 2 1 2 3 4 - 2 | 2 1 2 3 4 // // step 22: x : - - - 3 2 1 2 3 4 5 3 | 2 2 1 2 3 // // step 23: x : - - - - 2 1 2 3 4 - 4 | 3 3 2 1 2 // // step 24: x : - - - - - 1 2 3 - - // // step 25: x : - - - - - - 2 - - - // // // // ************************************************************************** // macro M = 4; // size of first sequence macro N = 5; // size of second sequence macro C = 8; // sequence elements have type nat{C} macro P = M+N+1; // number of pairs of input sequences macro max(x,y) = (x<y ? y : x); macro min(x,y) = (x<y ? x : y); macro min3(x,y,z) = min(x,min(y,z)); macro MIN = min(M,N); macro MAX = max(M,N); macro L = 2*MAX; module EditDistance1D(event ?s_init,?t_init,nat{C} ?s_elem,?t_elem,!dist,event valid) { [L]nat{C} a_elem,b_elem; // current element of the sequence [L]bool a_init,b_init; // holds for the head of a sequence [L]nat a_len,b_len; // counting the number of elements passed by [L]nat x; // current value of the edit distance [L]bool m; // modus, i.e., whether eij or dij is stored in x nat steps; // counting the steps to determine end of computation for(i=0..L-1) // modus will always be alternating m[i] = (i%2)==0; loop { pause; for(i=0..L-1) { let(aLen = (a_init[i] ? 1 : 1+(a_len[i]+1)/2)) // # of elements passed in a let(bLen = (b_init[i] ? 1 : 1+(b_len[i]+1)/2)) // # of elements passed in b let(diff = (a_elem[i]==b_elem[i] ? 0 : 1)) // whether a_elem[i]==b_elem[i] let(d_diag = (a_init[i] ? bLen-1 : (b_init[i] ? aLen-1 : x[i])) + diff) let(d_left = (b_init[i] ? aLen : (i==0 ? MAX : x[i-1]+1))) let(d_right = (a_init[i] ? bLen : (i==L-1 ? MAX : x[i+1]+1))) { next(x[i]) = (m[i] ? x[i] : min3(d_left,d_diag,d_right)); next(m[i]) = not m[i]; next(a_elem[i]) = (i==0 ? s_elem : a_elem[i-1]); next(b_elem[i]) = (i==L-1 ? t_elem : b_elem[i+1]); next(a_init[i]) = (i==0 ? s_init : a_init[i-1]); next(b_init[i]) = (i==L-1 ? t_init : b_init[i+1]); next(a_len[i]) = (a_init[i] ? 1 : a_len[i]+1); next(b_len[i]) = (b_init[i] ? 1 : b_len[i]+1); } } // count the computation steps to determine whether result is valid // (steps corresponds to time with offset 1) if(steps==2*MAX+MIN) emit(valid); next(steps) = (valid ? MIN+1 : steps+1); // read the edit distance (only reasonable if valid holds) let(indexL = (M<=N ? 2*MAX-MIN-1 : MIN-1)) let(offset = (m[indexL] ? 0 : 1)) dist = x[indexL+offset]; } } drivenby { [M+N+1][M]nat{C} s_seq; [M+N+1][N]nat{C} t_seq; // input sequence of sequences for s s_seq = [ [1,3,3,4], [1,2,3,4], [1,3,2,4], [1,3,5,4], [5,3,3,4], [5,2,3,3], [4,1,2,3], [1,3,4,5], [1,1,1,1], [3,3,3,3] ]; // input sequence of sequences for t t_seq = [ [2,0,3,4,5], [3,2,3,4,5], [3,5,5,4,5], [5,3,6,4,5], [5,5,3,3,4], [4,2,3,3,3], [1,2,3,1,6], [1,1,1,1,1], [3,2,3,1,3], [0,2,2,5,6] ]; pause; // pipe in the input sequences s_seq for(i=0..P-1) { // for all input pairs s_init = true; t_init = true; next(s_init) = true; next(t_init) = true; for(j=0..MAX-1) { s_elem = (j<M ? s_seq[i][j] : 0); t_elem = (j<N ? t_seq[i][j] : 0); pause; pause; } } // wait until sequences were piped through for(i=1..MIN) pause; }