const int t0 = OP(2841 * blk[1 * step] + 565 * blk[7 * step]); \
const int t1 = OP( 565 * blk[1 * step] - 2841 * blk[7 * step]); \
const int t2 = OP(1609 * blk[5 * step] + 2408 * blk[3 * step]); \
const int t3 = OP(2408 * blk[5 * step] - 1609 * blk[3 * step]); \
const int t4 = OP(1108 * blk[2 * step] - 2676 * blk[6 * step]); \
const int t5 = OP(2676 * blk[2 * step] + 1108 * blk[6 * step]); \
const int t6 = ((blk[0 * step] + blk[4 * step]) * (1 << dshift)) + bias; \
const int t7 = ((blk[0 * step] - blk[4 * step]) * (1 << dshift)) + bias; \
const int t8 = t0 + t2; \
const int t9 = t0 - t2; \
const int tA = (int)(181U * (t9 + (t1 - t3)) + 0x80) >> 8; \
const int tB = (int)(181U * (t9 - (t1 - t3)) + 0x80) >> 8; \
const int tC = t1 + t3; \
\
blk[0 * step] = (t6 + t5 + t8) >> shift; \
blk[1 * step] = (t7 + t4 + tA) >> shift; \
blk[2 * step] = (t7 - t4 + tB) >> shift; \
blk[3 * step] = (t6 - t5 + tC) >> shift; \
blk[4 * step] = (t6 - t5 - tC) >> shift; \
blk[5 * step] = (t7 - t4 - tB) >> shift; \
blk[6 * step] = (t7 + t4 - tA) >> shift; \
blk[7 * step] = (t6 + t5 - t8) >> shift; \