1 2 module fast_dct; 3 4 // borrowed from microsoft test-suite (mpeg2dec). 5 6 /**********************************************************/ 7 /* inverse two dimensional DCT, Chen-Wang algorithm */ 8 /* (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984) */ 9 /* 32-bit integer arithmetic (8 bit coefficients) */ 10 /* 11 mults, 29 adds per DCT */ 11 /* sE, 18.8.91 */ 12 /**********************************************************/ 13 /* coefficients extended to 12 bit for IEEE1180-1990 */ 14 /* compliance sE, 2.1.94 */ 15 /**********************************************************/ 16 17 /* this code assumes >> to be a two's-complement arithmetic */ 18 /* right shift: (-2)>>1 == -1 , (-3)>>1 == -2 */ 19 20 static assert((-2)>>1 == -1); 21 static assert((-3)>>1 == -2); 22 23 import std.algorithm; 24 import math; 25 import des.ts; 26 27 const W1 = 2841; /* 2048*sqrt(2)*cos(1*pi/16) */ 28 const W2 = 2676; /* 2048*sqrt(2)*cos(2*pi/16) */ 29 const W3 = 2408; /* 2048*sqrt(2)*cos(3*pi/16) */ 30 const W5 = 1609; /* 2048*sqrt(2)*cos(5*pi/16) */ 31 const W6 = 1108; /* 2048*sqrt(2)*cos(6*pi/16) */ 32 const W7 = 565; /* 2048*sqrt(2)*cos(7*pi/16) */ 33 34 /* private data */ 35 private shared short[1024] iclip; /* clipping table */ 36 private shared short* iclp; 37 38 /* row (horizontal) IDCT 39 * 40 * 7 pi 1 41 * dst[k] = sum c[l] * src[l] * cos( -- * ( k + - ) * l ) 42 * l=0 8 2 43 * 44 * where: c[0] = 128 45 * c[1..7] = 128*sqrt(2) 46 */ 47 48 private void idctrow(short[] blk) 49 { 50 int x0, x1, x2, x3, x4, x5, x6, x7, x8; 51 52 /* shortcut */ 53 if (!((x1 = blk[4]<<11) | (x2 = blk[6]) | (x3 = blk[2]) | 54 (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3]))) 55 { 56 blk[0]=blk[1]=blk[2]=blk[3]=blk[4]=blk[5]=blk[6]=blk[7]=cast(short)(blk[0]<<3); 57 return; 58 } 59 60 x0 = (blk[0]<<11) + 128; /* for proper rounding in the fourth stage */ 61 62 /* first stage */ 63 x8 = W7*(x4+x5); 64 x4 = x8 + (W1-W7)*x4; 65 x5 = x8 - (W1+W7)*x5; 66 x8 = W3*(x6+x7); 67 x6 = x8 - (W3-W5)*x6; 68 x7 = x8 - (W3+W5)*x7; 69 70 /* second stage */ 71 x8 = x0 + x1; 72 x0 -= x1; 73 x1 = W6*(x3+x2); 74 x2 = x1 - (W2+W6)*x2; 75 x3 = x1 + (W2-W6)*x3; 76 x1 = x4 + x6; 77 x4 -= x6; 78 x6 = x5 + x7; 79 x5 -= x7; 80 81 /* third stage */ 82 x7 = x8 + x3; 83 x8 -= x3; 84 x3 = x0 + x2; 85 x0 -= x2; 86 x2 = (181*(x4+x5)+128)>>8; 87 x4 = (181*(x4-x5)+128)>>8; 88 89 /* fourth stage */ 90 blk[0] = cast(short) ((x7+x1)>>8); 91 blk[1] = cast(short) ((x3+x2)>>8); 92 blk[2] = cast(short) ((x0+x4)>>8); 93 blk[3] = cast(short) ((x8+x6)>>8); 94 blk[4] = cast(short) ((x8-x6)>>8); 95 blk[5] = cast(short) ((x0-x4)>>8); 96 blk[6] = cast(short) ((x3-x2)>>8); 97 blk[7] = cast(short) ((x7-x1)>>8); 98 } 99 100 /* column (vertical) IDCT 101 * 102 * 7 pi 1 103 * dst[8*k] = sum c[l] * src[8*l] * cos( -- * ( k + - ) * l ) 104 * l=0 8 2 105 * 106 * where: c[0] = 1/1024 107 * c[1..7] = (1/1024)*sqrt(2) 108 */ 109 static void idctcol(short [] blk) 110 { 111 int x0, x1, x2, x3, x4, x5, x6, x7, x8; 112 113 /* shortcut */ 114 if (!((x1 = (blk[8*4]<<8)) | (x2 = blk[8*6]) | (x3 = blk[8*2]) | 115 (x4 = blk[8*1]) | (x5 = blk[8*7]) | (x6 = blk[8*5]) | (x7 = blk[8*3]))) 116 { 117 blk[8*0]=blk[8*1]=blk[8*2]=blk[8*3]=blk[8*4]=blk[8*5]=blk[8*6]=blk[8*7]= 118 iclp[(blk[8*0]+32)>>6]; 119 return; 120 } 121 122 x0 = (blk[8*0]<<8) + 8192; 123 124 /* first stage */ 125 x8 = W7*(x4+x5) + 4; 126 x4 = (x8+(W1-W7)*x4)>>3; 127 x5 = (x8-(W1+W7)*x5)>>3; 128 x8 = W3*(x6+x7) + 4; 129 x6 = (x8-(W3-W5)*x6)>>3; 130 x7 = (x8-(W3+W5)*x7)>>3; 131 132 /* second stage */ 133 x8 = x0 + x1; 134 x0 -= x1; 135 x1 = W6*(x3+x2) + 4; 136 x2 = (x1-(W2+W6)*x2)>>3; 137 x3 = (x1+(W2-W6)*x3)>>3; 138 x1 = x4 + x6; 139 x4 -= x6; 140 x6 = x5 + x7; 141 x5 -= x7; 142 143 /* third stage */ 144 x7 = x8 + x3; 145 x8 -= x3; 146 x3 = x0 + x2; 147 x0 -= x2; 148 x2 = (181*(x4+x5)+128)>>8; 149 x4 = (181*(x4-x5)+128)>>8; 150 151 /* fourth stage */ 152 blk[8*0] = iclp[(x7+x1)>>14]; 153 blk[8*1] = iclp[(x3+x2)>>14]; 154 blk[8*2] = iclp[(x0+x4)>>14]; 155 blk[8*3] = iclp[(x8+x6)>>14]; 156 blk[8*4] = iclp[(x8-x6)>>14]; 157 blk[8*5] = iclp[(x0-x4)>>14]; 158 blk[8*6] = iclp[(x3-x2)>>14]; 159 blk[8*7] = iclp[(x7-x1)>>14]; 160 } 161 162 /* two dimensional inverse discrete cosine transform */ 163 void Fast_IDCT(ref short[64] block) 164 { 165 int i; 166 167 for (i=0; i<8; i++) 168 idctrow(block[8*i..8*(i+1)]); 169 170 for (i=0; i<8; i++) 171 idctcol(block[i..$]); 172 } 173 174 void fast_idct_annexA(ref short[64] block) 175 { 176 Fast_IDCT(block); 177 foreach(i, v; block) 178 { 179 block[i] = saturate!(short,-256,255)(v); 180 } 181 } 182 183 private void Initialize_Fast_IDCT() 184 { 185 int i; 186 187 iclp = iclip.ptr + 512; 188 for (i= -512; i<512; i++) 189 iclp[i] = cast(short)((i<-256) ? -256 : ((i>255) ? 255 : i)); 190 } 191 192 shared static this() 193 { 194 Initialize_Fast_IDCT(); 195 } 196 197 unittest // check DC 2D 198 { 199 const N2 = 64; 200 short[N2] block; 201 auto reference = new real[N2]; 202 203 block[0] = 64; 204 fill(reference, 8.0); 205 auto orig = block; 206 207 Fast_IDCT(block); 208 209 auto err = norm(block, reference); 210 assertEqApprox(err, 0, 10e-5); 211 assertEq(block, reference); 212 } 213 214 unittest // check Parseval's identity 215 { 216 import std.random; 217 218 foreach(i; 0..100) 219 { 220 auto coefs = uniformDistribution(64); 221 short[64] block; 222 foreach(j;0..64) 223 block[j] = cast(short) coefs[j]; 224 auto orig = block; 225 226 Fast_IDCT(block); 227 228 assertEqApprox(norm(block), norm(orig), 10e-5); 229 } 230 }