00001
00016 #include "global.h"
00017 #include "transform.h"
00018
00019 #if !defined(USEMMX) // MMX, SSE, SSE2 intrinsic support
00020
00021
00022 void forward4x4(int **block, int **tblock, int pos_y, int pos_x)
00023 {
00024 int i, ii;
00025 int tmp[16];
00026 int *pTmp = tmp, *pblock;
00027 int p0,p1,p2,p3;
00028 int t0,t1,t2,t3;
00029
00030
00031 for (i=pos_y; i < pos_y + BLOCK_SIZE; i++)
00032 {
00033 pblock = &block[i][pos_x];
00034 p0 = *(pblock++);
00035 p1 = *(pblock++);
00036 p2 = *(pblock++);
00037 p3 = *(pblock );
00038
00039 t0 = p0 + p3;
00040 t1 = p1 + p2;
00041 t2 = p1 - p2;
00042 t3 = p0 - p3;
00043
00044 *(pTmp++) = t0 + t1;
00045 *(pTmp++) = (t3 << 1) + t2;
00046 *(pTmp++) = t0 - t1;
00047 *(pTmp++) = t3 - (t2 << 1);
00048 }
00049
00050
00051 for (i=0; i < BLOCK_SIZE; i++)
00052 {
00053 pTmp = tmp + i;
00054 p0 = *pTmp;
00055 p1 = *(pTmp += BLOCK_SIZE);
00056 p2 = *(pTmp += BLOCK_SIZE);
00057 p3 = *(pTmp += BLOCK_SIZE);
00058
00059 t0 = p0 + p3;
00060 t1 = p1 + p2;
00061 t2 = p1 - p2;
00062 t3 = p0 - p3;
00063
00064 ii = pos_x + i;
00065 tblock[pos_y ][ii] = t0 + t1;
00066 tblock[pos_y + 1][ii] = t2 + (t3 << 1);
00067 tblock[pos_y + 2][ii] = t0 - t1;
00068 tblock[pos_y + 3][ii] = t3 - (t2 << 1);
00069 }
00070 }
00071
00072 void inverse4x4(int **tblock, int **block, int pos_y, int pos_x)
00073 {
00074 int i, ii;
00075 int tmp[16];
00076 int *pTmp = tmp, *pblock;
00077 int p0,p1,p2,p3;
00078 int t0,t1,t2,t3;
00079
00080
00081 for (i = pos_y; i < pos_y + BLOCK_SIZE; i++)
00082 {
00083 pblock = &tblock[i][pos_x];
00084 t0 = *(pblock++);
00085 t1 = *(pblock++);
00086 t2 = *(pblock++);
00087 t3 = *(pblock );
00088
00089 p0 = t0 + t2;
00090 p1 = t0 - t2;
00091 p2 = (t1 >> 1) - t3;
00092 p3 = t1 + (t3 >> 1);
00093
00094 *(pTmp++) = p0 + p3;
00095 *(pTmp++) = p1 + p2;
00096 *(pTmp++) = p1 - p2;
00097 *(pTmp++) = p0 - p3;
00098 }
00099
00100
00101 for (i = 0; i < BLOCK_SIZE; i++)
00102 {
00103 pTmp = tmp + i;
00104 t0 = *pTmp;
00105 t1 = *(pTmp += BLOCK_SIZE);
00106 t2 = *(pTmp += BLOCK_SIZE);
00107 t3 = *(pTmp += BLOCK_SIZE);
00108
00109 p0 = t0 + t2;
00110 p1 = t0 - t2;
00111 p2 =(t1 >> 1) - t3;
00112 p3 = t1 + (t3 >> 1);
00113
00114 ii = i + pos_x;
00115 block[pos_y ][ii] = p0 + p3;
00116 block[pos_y + 1][ii] = p1 + p2;
00117 block[pos_y + 2][ii] = p1 - p2;
00118 block[pos_y + 3][ii] = p0 - p3;
00119 }
00120 }
00121
00122
00123 void hadamard4x4(int **block, int **tblock)
00124 {
00125 int i;
00126 int tmp[16];
00127 int *pTmp = tmp, *pblock;
00128 int p0,p1,p2,p3;
00129 int t0,t1,t2,t3;
00130
00131
00132 for (i = 0; i < BLOCK_SIZE; i++)
00133 {
00134 pblock = block[i];
00135 p0 = *(pblock++);
00136 p1 = *(pblock++);
00137 p2 = *(pblock++);
00138 p3 = *(pblock );
00139
00140 t0 = p0 + p3;
00141 t1 = p1 + p2;
00142 t2 = p1 - p2;
00143 t3 = p0 - p3;
00144
00145 *(pTmp++) = t0 + t1;
00146 *(pTmp++) = t3 + t2;
00147 *(pTmp++) = t0 - t1;
00148 *(pTmp++) = t3 - t2;
00149 }
00150
00151
00152 for (i = 0; i < BLOCK_SIZE; i++)
00153 {
00154 pTmp = tmp + i;
00155 p0 = *pTmp;
00156 p1 = *(pTmp += BLOCK_SIZE);
00157 p2 = *(pTmp += BLOCK_SIZE);
00158 p3 = *(pTmp += BLOCK_SIZE);
00159
00160 t0 = p0 + p3;
00161 t1 = p1 + p2;
00162 t2 = p1 - p2;
00163 t3 = p0 - p3;
00164
00165 tblock[0][i] = (t0 + t1) >> 1;
00166 tblock[1][i] = (t2 + t3) >> 1;
00167 tblock[2][i] = (t0 - t1) >> 1;
00168 tblock[3][i] = (t3 - t2) >> 1;
00169 }
00170 }
00171
00172
00173 void ihadamard4x4(int **tblock, int **block)
00174 {
00175 int i;
00176 int tmp[16];
00177 int *pTmp = tmp, *pblock;
00178 int p0,p1,p2,p3;
00179 int t0,t1,t2,t3;
00180
00181
00182 for (i = 0; i < BLOCK_SIZE; i++)
00183 {
00184 pblock = tblock[i];
00185 t0 = *(pblock++);
00186 t1 = *(pblock++);
00187 t2 = *(pblock++);
00188 t3 = *(pblock );
00189
00190 p0 = t0 + t2;
00191 p1 = t0 - t2;
00192 p2 = t1 - t3;
00193 p3 = t1 + t3;
00194
00195 *(pTmp++) = p0 + p3;
00196 *(pTmp++) = p1 + p2;
00197 *(pTmp++) = p1 - p2;
00198 *(pTmp++) = p0 - p3;
00199 }
00200
00201
00202 for (i = 0; i < BLOCK_SIZE; i++)
00203 {
00204 pTmp = tmp + i;
00205 t0 = *pTmp;
00206 t1 = *(pTmp += BLOCK_SIZE);
00207 t2 = *(pTmp += BLOCK_SIZE);
00208 t3 = *(pTmp += BLOCK_SIZE);
00209
00210 p0 = t0 + t2;
00211 p1 = t0 - t2;
00212 p2 = t1 - t3;
00213 p3 = t1 + t3;
00214
00215 block[0][i] = p0 + p3;
00216 block[1][i] = p1 + p2;
00217 block[2][i] = p1 - p2;
00218 block[3][i] = p0 - p3;
00219 }
00220 }
00221
00222 void hadamard4x2(int **block, int **tblock)
00223 {
00224 int i;
00225 int tmp[8];
00226 int *pTmp = tmp;
00227 int p0,p1,p2,p3;
00228 int t0,t1,t2,t3;
00229
00230
00231 *(pTmp++) = block[0][0] + block[1][0];
00232 *(pTmp++) = block[0][1] + block[1][1];
00233 *(pTmp++) = block[0][2] + block[1][2];
00234 *(pTmp++) = block[0][3] + block[1][3];
00235
00236 *(pTmp++) = block[0][0] - block[1][0];
00237 *(pTmp++) = block[0][1] - block[1][1];
00238 *(pTmp++) = block[0][2] - block[1][2];
00239 *(pTmp ) = block[0][3] - block[1][3];
00240
00241
00242 pTmp = tmp;
00243 for (i=0;i<2;i++)
00244 {
00245 p0 = *(pTmp++);
00246 p1 = *(pTmp++);
00247 p2 = *(pTmp++);
00248 p3 = *(pTmp++);
00249
00250 t0 = p0 + p3;
00251 t1 = p1 + p2;
00252 t2 = p1 - p2;
00253 t3 = p0 - p3;
00254
00255 tblock[i][0] = (t0 + t1);
00256 tblock[i][1] = (t3 + t2);
00257 tblock[i][2] = (t0 - t1);
00258 tblock[i][3] = (t3 - t2);
00259 }
00260 }
00261
00262 void ihadamard4x2(int **tblock, int **block)
00263 {
00264 int i;
00265 int tmp[8];
00266 int *pTmp = tmp;
00267 int p0,p1,p2,p3;
00268 int t0,t1,t2,t3;
00269
00270
00271 *(pTmp++) = tblock[0][0] + tblock[1][0];
00272 *(pTmp++) = tblock[0][1] + tblock[1][1];
00273 *(pTmp++) = tblock[0][2] + tblock[1][2];
00274 *(pTmp++) = tblock[0][3] + tblock[1][3];
00275
00276 *(pTmp++) = tblock[0][0] - tblock[1][0];
00277 *(pTmp++) = tblock[0][1] - tblock[1][1];
00278 *(pTmp++) = tblock[0][2] - tblock[1][2];
00279 *(pTmp ) = tblock[0][3] - tblock[1][3];
00280
00281
00282 pTmp = tmp;
00283 for (i = 0; i < 2; i++)
00284 {
00285 p0 = *(pTmp++);
00286 p1 = *(pTmp++);
00287 p2 = *(pTmp++);
00288 p3 = *(pTmp++);
00289
00290 t0 = p0 + p2;
00291 t1 = p0 - p2;
00292 t2 = p1 - p3;
00293 t3 = p1 + p3;
00294
00295
00296 block[0][i] = t0 + t3;
00297 block[1][i] = t1 + t2;
00298 block[2][i] = t1 - t2;
00299 block[3][i] = t0 - t3;
00300 }
00301 }
00302
00303
00304 void hadamard2x2(int **block, int tblock[4])
00305 {
00306 int p0,p1,p2,p3;
00307
00308 p0 = block[0][0] + block[0][4];
00309 p1 = block[0][0] - block[0][4];
00310 p2 = block[4][0] + block[4][4];
00311 p3 = block[4][0] - block[4][4];
00312
00313 tblock[0] = (p0 + p2);
00314 tblock[1] = (p1 + p3);
00315 tblock[2] = (p0 - p2);
00316 tblock[3] = (p1 - p3);
00317 }
00318
00319 void ihadamard2x2(int tblock[4], int block[4])
00320 {
00321 int t0,t1,t2,t3;
00322
00323 t0 = tblock[0] + tblock[1];
00324 t1 = tblock[0] - tblock[1];
00325 t2 = tblock[2] + tblock[3];
00326 t3 = tblock[2] - tblock[3];
00327
00328 block[0] = (t0 + t2);
00329 block[1] = (t1 + t3);
00330 block[2] = (t0 - t2);
00331 block[3] = (t1 - t3);
00332 }
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355 void forward8x8(int **block, int **tblock, int pos_y, int pos_x)
00356 {
00357 int i, ii;
00358 int tmp[64];
00359 int *pTmp = tmp, *pblock;
00360 int a0, a1, a2, a3;
00361 int p0, p1, p2, p3, p4, p5 ,p6, p7;
00362 int b0, b1, b2, b3, b4, b5, b6, b7;
00363
00364
00365 for (i=pos_y; i < pos_y + BLOCK_SIZE_8x8; i++)
00366 {
00367 pblock = &block[i][pos_x];
00368 p0 = *(pblock++);
00369 p1 = *(pblock++);
00370 p2 = *(pblock++);
00371 p3 = *(pblock++);
00372 p4 = *(pblock++);
00373 p5 = *(pblock++);
00374 p6 = *(pblock++);
00375 p7 = *(pblock );
00376
00377 a0 = p0 + p7;
00378 a1 = p1 + p6;
00379 a2 = p2 + p5;
00380 a3 = p3 + p4;
00381
00382 b0 = a0 + a3;
00383 b1 = a1 + a2;
00384 b2 = a0 - a3;
00385 b3 = a1 - a2;
00386
00387 a0 = p0 - p7;
00388 a1 = p1 - p6;
00389 a2 = p2 - p5;
00390 a3 = p3 - p4;
00391
00392 b4 = a1 + a2 + ((a0 >> 1) + a0);
00393 b5 = a0 - a3 - ((a2 >> 1) + a2);
00394 b6 = a0 + a3 - ((a1 >> 1) + a1);
00395 b7 = a1 - a2 + ((a3 >> 1) + a3);
00396
00397 *(pTmp++) = b0 + b1;
00398 *(pTmp++) = b4 + (b7 >> 2);
00399 *(pTmp++) = b2 + (b3 >> 1);
00400 *(pTmp++) = b5 + (b6 >> 2);
00401 *(pTmp++) = b0 - b1;
00402 *(pTmp++) = b6 - (b5 >> 2);
00403 *(pTmp++) = (b2 >> 1) - b3;
00404 *(pTmp++) = (b4 >> 2) - b7;
00405 }
00406
00407
00408 for (i=0; i < BLOCK_SIZE_8x8; i++)
00409 {
00410 pTmp = tmp + i;
00411 p0 = *pTmp;
00412 p1 = *(pTmp += BLOCK_SIZE_8x8);
00413 p2 = *(pTmp += BLOCK_SIZE_8x8);
00414 p3 = *(pTmp += BLOCK_SIZE_8x8);
00415 p4 = *(pTmp += BLOCK_SIZE_8x8);
00416 p5 = *(pTmp += BLOCK_SIZE_8x8);
00417 p6 = *(pTmp += BLOCK_SIZE_8x8);
00418 p7 = *(pTmp += BLOCK_SIZE_8x8);
00419
00420 a0 = p0 + p7;
00421 a1 = p1 + p6;
00422 a2 = p2 + p5;
00423 a3 = p3 + p4;
00424
00425 b0 = a0 + a3;
00426 b1 = a1 + a2;
00427 b2 = a0 - a3;
00428 b3 = a1 - a2;
00429
00430 a0 = p0 - p7;
00431 a1 = p1 - p6;
00432 a2 = p2 - p5;
00433 a3 = p3 - p4;
00434
00435 b4 = a1 + a2 + ((a0 >> 1) + a0);
00436 b5 = a0 - a3 - ((a2 >> 1) + a2);
00437 b6 = a0 + a3 - ((a1 >> 1) + a1);
00438 b7 = a1 - a2 + ((a3 >> 1) + a3);
00439
00440 ii = pos_x + i;
00441 tblock[pos_y ][ii] = b0 + b1;
00442 tblock[pos_y + 1][ii] = b4 + (b7 >> 2);
00443 tblock[pos_y + 2][ii] = b2 + (b3 >> 1);
00444 tblock[pos_y + 3][ii] = b5 + (b6 >> 2);
00445 tblock[pos_y + 4][ii] = b0 - b1;
00446 tblock[pos_y + 5][ii] = b6 - (b5 >> 2);
00447 tblock[pos_y + 6][ii] = (b2 >> 1) - b3;
00448 tblock[pos_y + 7][ii] = (b4 >> 2) - b7;
00449 }
00450 }
00451
00452 void inverse8x8(int **tblock, int **block, int pos_y, int pos_x)
00453 {
00454 int i, ii;
00455 int tmp[64];
00456 int *pTmp = tmp, *pblock;
00457 int a0, a1, a2, a3;
00458 int p0, p1, p2, p3, p4, p5 ,p6, p7;
00459 int b0, b1, b2, b3, b4, b5, b6, b7;
00460
00461
00462 for (i=pos_y; i < pos_y + BLOCK_SIZE_8x8; i++)
00463 {
00464 pblock = &tblock[i][pos_x];
00465 p0 = *(pblock++);
00466 p1 = *(pblock++);
00467 p2 = *(pblock++);
00468 p3 = *(pblock++);
00469 p4 = *(pblock++);
00470 p5 = *(pblock++);
00471 p6 = *(pblock++);
00472 p7 = *(pblock );
00473
00474 a0 = p0 + p4;
00475 a1 = p0 - p4;
00476 a2 = p6 - (p2 >> 1);
00477 a3 = p2 + (p6 >> 1);
00478
00479 b0 = a0 + a3;
00480 b2 = a1 - a2;
00481 b4 = a1 + a2;
00482 b6 = a0 - a3;
00483
00484 a0 = -p3 + p5 - p7 - (p7 >> 1);
00485 a1 = p1 + p7 - p3 - (p3 >> 1);
00486 a2 = -p1 + p7 + p5 + (p5 >> 1);
00487 a3 = p3 + p5 + p1 + (p1 >> 1);
00488
00489
00490 b1 = a0 + (a3>>2);
00491 b3 = a1 + (a2>>2);
00492 b5 = a2 - (a1>>2);
00493 b7 = a3 - (a0>>2);
00494
00495 *(pTmp++) = b0 + b7;
00496 *(pTmp++) = b2 - b5;
00497 *(pTmp++) = b4 + b3;
00498 *(pTmp++) = b6 + b1;
00499 *(pTmp++) = b6 - b1;
00500 *(pTmp++) = b4 - b3;
00501 *(pTmp++) = b2 + b5;
00502 *(pTmp++) = b0 - b7;
00503 }
00504
00505
00506 for (i=0; i < BLOCK_SIZE_8x8; i++)
00507 {
00508 pTmp = tmp + i;
00509 p0 = *pTmp;
00510 p1 = *(pTmp += BLOCK_SIZE_8x8);
00511 p2 = *(pTmp += BLOCK_SIZE_8x8);
00512 p3 = *(pTmp += BLOCK_SIZE_8x8);
00513 p4 = *(pTmp += BLOCK_SIZE_8x8);
00514 p5 = *(pTmp += BLOCK_SIZE_8x8);
00515 p6 = *(pTmp += BLOCK_SIZE_8x8);
00516 p7 = *(pTmp += BLOCK_SIZE_8x8);
00517
00518 a0 = p0 + p4;
00519 a1 = p0 - p4;
00520 a2 = p6 - (p2>>1);
00521 a3 = p2 + (p6>>1);
00522
00523 b0 = a0 + a3;
00524 b2 = a1 - a2;
00525 b4 = a1 + a2;
00526 b6 = a0 - a3;
00527
00528 a0 = -p3 + p5 - p7 - (p7 >> 1);
00529 a1 = p1 + p7 - p3 - (p3 >> 1);
00530 a2 = -p1 + p7 + p5 + (p5 >> 1);
00531 a3 = p3 + p5 + p1 + (p1 >> 1);
00532
00533
00534 b1 = a0 + (a3 >> 2);
00535 b7 = a3 - (a0 >> 2);
00536 b3 = a1 + (a2 >> 2);
00537 b5 = a2 - (a1 >> 2);
00538
00539 ii = i + pos_x;
00540 block[pos_y ][ii] = b0 + b7;
00541 block[pos_y + 1][ii] = b2 - b5;
00542 block[pos_y + 2][ii] = b4 + b3;
00543 block[pos_y + 3][ii] = b6 + b1;
00544 block[pos_y + 4][ii] = b6 - b1;
00545 block[pos_y + 5][ii] = b4 - b3;
00546 block[pos_y + 6][ii] = b2 + b5;
00547 block[pos_y + 7][ii] = b0 - b7;
00548 }
00549 }
00550
00551 #endif