00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include "contributors.h"
00017
00018 #include <limits.h>
00019
00020 #include "global.h"
00021 #include "image.h"
00022 #include "memalloc.h"
00023 #include "mb_access.h"
00024 #include "refbuf.h"
00025 #include "mv_search.h"
00026 #include "me_distortion.h"
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 int distortion4x4SAD(int* diff, int min_dist)
00037 {
00038 int distortion = 0, k;
00039
00040 for (k = 0; k < 16; k++)
00041 {
00042 distortion += iabs(*diff++);
00043 }
00044
00045 return distortion;
00046 }
00047
00048
00049
00050
00051
00052
00053
00054 int distortion4x4SSE(int* diff, int min_dist)
00055 {
00056 int distortion = 0, k;
00057 for (k = 0; k < 16; k++)
00058 {
00059 distortion += iabs2(*diff++);
00060 }
00061 return distortion;
00062 }
00063
00064
00065
00066
00067
00068
00069
00070
00071 int distortion4x4SATD(int* diff, int min_dist)
00072 {
00073 return HadamardSAD4x4( diff );
00074 }
00075
00076
00077
00078
00079
00080
00081
00082
00083 int distortion8x8SADthres(int* diff, int max_dist)
00084 {
00085 int distortion = 0, i, j;
00086
00087 for (j = 0; j < 8; j++)
00088 {
00089 for (i = 0; i < 8; i++)
00090 {
00091 distortion += iabs(*diff++);
00092 }
00093 if (*diff > max_dist)
00094 return *diff;
00095 }
00096
00097 return distortion;
00098 }
00099
00100
00101
00102
00103
00104
00105
00106 int distortion8x8SAD(int* diff, int min_diff)
00107 {
00108 int distortion = 0, k;
00109
00110 for (k = 0; k < 64; k++)
00111 {
00112 distortion += iabs(*diff++);
00113 }
00114
00115 return distortion;
00116 }
00117
00118
00119
00120
00121
00122
00123
00124 int distortion8x8SSE(int* diff, int min_dist)
00125 {
00126 int distortion = 0, k;
00127 for (k = 0; k < 64; k++)
00128 {
00129 distortion += iabs2(*diff++);
00130 }
00131
00132 return distortion;
00133 }
00134
00135
00136
00137
00138
00139
00140
00141 int distortion8x8SATD(int* diff, int min_dist)
00142 {
00143 return HadamardSAD8x8( diff );
00144 }
00145
00146
00147 void select_distortion(ImageParameters *p_Img, InputParameters *p_Inp)
00148 {
00149 switch(p_Inp->ModeDecisionMetric)
00150 {
00151 case ERROR_SAD:
00152 p_Img->distortion4x4 = distortion4x4SAD;
00153 p_Img->distortion8x8 = distortion8x8SAD;
00154 break;
00155 case ERROR_SSE:
00156 p_Img->distortion4x4 = distortion4x4SSE;
00157 p_Img->distortion8x8 = distortion8x8SSE;
00158 break;
00159 case ERROR_SATD :
00160 default:
00161 p_Img->distortion4x4 = distortion4x4SATD;
00162 p_Img->distortion8x8 = distortion8x8SATD;
00163 break;
00164 }
00165 }
00166
00167
00168
00169
00170
00171
00172
00173
00174 int HadamardSAD4x4 (int* diff)
00175 {
00176 int k, satd = 0;
00177 int m[16], d[16];
00178
00179
00180 m[ 0] = diff[ 0] + diff[12];
00181 m[ 1] = diff[ 1] + diff[13];
00182 m[ 2] = diff[ 2] + diff[14];
00183 m[ 3] = diff[ 3] + diff[15];
00184 m[ 4] = diff[ 4] + diff[ 8];
00185 m[ 5] = diff[ 5] + diff[ 9];
00186 m[ 6] = diff[ 6] + diff[10];
00187 m[ 7] = diff[ 7] + diff[11];
00188 m[ 8] = diff[ 4] - diff[ 8];
00189 m[ 9] = diff[ 5] - diff[ 9];
00190 m[10] = diff[ 6] - diff[10];
00191 m[11] = diff[ 7] - diff[11];
00192 m[12] = diff[ 0] - diff[12];
00193 m[13] = diff[ 1] - diff[13];
00194 m[14] = diff[ 2] - diff[14];
00195 m[15] = diff[ 3] - diff[15];
00196
00197 d[ 0] = m[ 0] + m[ 4];
00198 d[ 1] = m[ 1] + m[ 5];
00199 d[ 2] = m[ 2] + m[ 6];
00200 d[ 3] = m[ 3] + m[ 7];
00201 d[ 4] = m[ 8] + m[12];
00202 d[ 5] = m[ 9] + m[13];
00203 d[ 6] = m[10] + m[14];
00204 d[ 7] = m[11] + m[15];
00205 d[ 8] = m[ 0] - m[ 4];
00206 d[ 9] = m[ 1] - m[ 5];
00207 d[10] = m[ 2] - m[ 6];
00208 d[11] = m[ 3] - m[ 7];
00209 d[12] = m[12] - m[ 8];
00210 d[13] = m[13] - m[ 9];
00211 d[14] = m[14] - m[10];
00212 d[15] = m[15] - m[11];
00213
00214 m[ 0] = d[ 0] + d[ 3];
00215 m[ 1] = d[ 1] + d[ 2];
00216 m[ 2] = d[ 1] - d[ 2];
00217 m[ 3] = d[ 0] - d[ 3];
00218 m[ 4] = d[ 4] + d[ 7];
00219 m[ 5] = d[ 5] + d[ 6];
00220 m[ 6] = d[ 5] - d[ 6];
00221 m[ 7] = d[ 4] - d[ 7];
00222 m[ 8] = d[ 8] + d[11];
00223 m[ 9] = d[ 9] + d[10];
00224 m[10] = d[ 9] - d[10];
00225 m[11] = d[ 8] - d[11];
00226 m[12] = d[12] + d[15];
00227 m[13] = d[13] + d[14];
00228 m[14] = d[13] - d[14];
00229 m[15] = d[12] - d[15];
00230
00231 d[ 0] = m[ 0] + m[ 1];
00232 d[ 1] = m[ 0] - m[ 1];
00233 d[ 2] = m[ 2] + m[ 3];
00234 d[ 3] = m[ 3] - m[ 2];
00235 d[ 4] = m[ 4] + m[ 5];
00236 d[ 5] = m[ 4] - m[ 5];
00237 d[ 6] = m[ 6] + m[ 7];
00238 d[ 7] = m[ 7] - m[ 6];
00239 d[ 8] = m[ 8] + m[ 9];
00240 d[ 9] = m[ 8] - m[ 9];
00241 d[10] = m[10] + m[11];
00242 d[11] = m[11] - m[10];
00243 d[12] = m[12] + m[13];
00244 d[13] = m[12] - m[13];
00245 d[14] = m[14] + m[15];
00246 d[15] = m[15] - m[14];
00247
00248
00249
00250 for (k=0; k<16; ++k)
00251 {
00252 satd += iabs(d [k]);
00253 }
00254
00255
00256 return ((satd+1)>>1);
00257 }
00258
00259
00260
00261
00262
00263
00264
00265 int HadamardSAD8x8 (int* diff)
00266 {
00267 int i, j, jj, sad=0;
00268
00269
00270 int m1[8][8], m2[8][8], m3[8][8];
00271
00272
00273
00274 for (j=0; j < 8; j++)
00275 {
00276 jj = j << 3;
00277 m2[j][0] = diff[jj ] + diff[jj+4];
00278 m2[j][1] = diff[jj+1] + diff[jj+5];
00279 m2[j][2] = diff[jj+2] + diff[jj+6];
00280 m2[j][3] = diff[jj+3] + diff[jj+7];
00281 m2[j][4] = diff[jj ] - diff[jj+4];
00282 m2[j][5] = diff[jj+1] - diff[jj+5];
00283 m2[j][6] = diff[jj+2] - diff[jj+6];
00284 m2[j][7] = diff[jj+3] - diff[jj+7];
00285
00286 m1[j][0] = m2[j][0] + m2[j][2];
00287 m1[j][1] = m2[j][1] + m2[j][3];
00288 m1[j][2] = m2[j][0] - m2[j][2];
00289 m1[j][3] = m2[j][1] - m2[j][3];
00290 m1[j][4] = m2[j][4] + m2[j][6];
00291 m1[j][5] = m2[j][5] + m2[j][7];
00292 m1[j][6] = m2[j][4] - m2[j][6];
00293 m1[j][7] = m2[j][5] - m2[j][7];
00294
00295 m2[j][0] = m1[j][0] + m1[j][1];
00296 m2[j][1] = m1[j][0] - m1[j][1];
00297 m2[j][2] = m1[j][2] + m1[j][3];
00298 m2[j][3] = m1[j][2] - m1[j][3];
00299 m2[j][4] = m1[j][4] + m1[j][5];
00300 m2[j][5] = m1[j][4] - m1[j][5];
00301 m2[j][6] = m1[j][6] + m1[j][7];
00302 m2[j][7] = m1[j][6] - m1[j][7];
00303 }
00304
00305
00306 for (i=0; i < 8; i++)
00307 {
00308 m3[0][i] = m2[0][i] + m2[4][i];
00309 m3[1][i] = m2[1][i] + m2[5][i];
00310 m3[2][i] = m2[2][i] + m2[6][i];
00311 m3[3][i] = m2[3][i] + m2[7][i];
00312 m3[4][i] = m2[0][i] - m2[4][i];
00313 m3[5][i] = m2[1][i] - m2[5][i];
00314 m3[6][i] = m2[2][i] - m2[6][i];
00315 m3[7][i] = m2[3][i] - m2[7][i];
00316
00317 m1[0][i] = m3[0][i] + m3[2][i];
00318 m1[1][i] = m3[1][i] + m3[3][i];
00319 m1[2][i] = m3[0][i] - m3[2][i];
00320 m1[3][i] = m3[1][i] - m3[3][i];
00321 m1[4][i] = m3[4][i] + m3[6][i];
00322 m1[5][i] = m3[5][i] + m3[7][i];
00323 m1[6][i] = m3[4][i] - m3[6][i];
00324 m1[7][i] = m3[5][i] - m3[7][i];
00325
00326 m2[0][i] = m1[0][i] + m1[1][i];
00327 m2[1][i] = m1[0][i] - m1[1][i];
00328 m2[2][i] = m1[2][i] + m1[3][i];
00329 m2[3][i] = m1[2][i] - m1[3][i];
00330 m2[4][i] = m1[4][i] + m1[5][i];
00331 m2[5][i] = m1[4][i] - m1[5][i];
00332 m2[6][i] = m1[6][i] + m1[7][i];
00333 m2[7][i] = m1[6][i] - m1[7][i];
00334 }
00335 for (j=0; j < 8; j++)
00336 for (i=0; i < 8; i++)
00337 sad += iabs (m2[j][i]);
00338
00339 return ((sad+2)>>2);
00340 }
00341
00342
00343
00344
00345
00346
00347
00348 int computeSAD(StorablePicture *ref1,
00349 MEBlock *mv_block,
00350 int min_mcost,
00351 MotionVector *cand)
00352 {
00353 int mcost = 0;
00354 int y,x;
00355 short blocksize_x = mv_block->blocksize_x;
00356 short blocksize_y = mv_block->blocksize_y;
00357 ImageParameters *p_Img = mv_block->p_Img;
00358 int pad_size_x = p_Img->padded_size_x - blocksize_x;
00359 #if (JM_MEM_DISTORTION)
00360 int *imgpel_abs = p_Img->imgpel_abs;
00361 #endif
00362
00363 imgpel *src_line, *ref_line;
00364
00365 src_line = mv_block->orig_pic[0];
00366 ref_line = UMVLine4X (ref1, cand->mv_y, cand->mv_x);
00367 for (y=0; y<blocksize_y; y++)
00368 {
00369 for (x = 0; x < blocksize_x; x+=4)
00370 {
00371 #if (JM_MEM_DISTORTION)
00372 mcost += imgpel_abs[ *src_line++ - *ref_line++ ];
00373 mcost += imgpel_abs[ *src_line++ - *ref_line++ ];
00374 mcost += imgpel_abs[ *src_line++ - *ref_line++ ];
00375 mcost += imgpel_abs[ *src_line++ - *ref_line++ ];
00376 #else
00377 mcost += iabs( *src_line++ - *ref_line++ );
00378 mcost += iabs( *src_line++ - *ref_line++ );
00379 mcost += iabs( *src_line++ - *ref_line++ );
00380 mcost += iabs( *src_line++ - *ref_line++ );
00381 #endif
00382 }
00383 if (mcost >= min_mcost) return mcost;
00384 ref_line += pad_size_x;
00385 }
00386 if ( mv_block->ChromaMEEnable )
00387 {
00388
00389 int blocksize_x_cr = mv_block->blocksize_cr_x;
00390 int blocksize_y_cr = mv_block->blocksize_cr_y;
00391 int cr_pad_size_x = p_Img->cr_padded_size_x - blocksize_x_cr;
00392 int k;
00393 int mcr_cost = 0;
00394
00395 for (k=0; k < 2; k++)
00396 {
00397 src_line = mv_block->orig_pic[k+1];
00398 ref_line = UMVLine8X_chroma ( ref1, k+1, cand->mv_y, cand->mv_x);
00399 mcr_cost = 0;
00400
00401 for (y = 0; y < blocksize_y_cr; y++)
00402 {
00403 for (x = 0; x < blocksize_x_cr; x += 2)
00404 {
00405 #if (JM_MEM_DISTORTION)
00406 mcr_cost += imgpel_abs[ *src_line++ - *ref_line++ ];
00407 mcr_cost += imgpel_abs[ *src_line++ - *ref_line++ ];
00408 #else
00409 mcr_cost += iabs( *src_line++ - *ref_line++ );
00410 mcr_cost += iabs( *src_line++ - *ref_line++ );
00411 #endif
00412 }
00413 ref_line += cr_pad_size_x;
00414 }
00415 mcost += mv_block->ChromaMEWeight * mcr_cost;
00416 if (mcost >= min_mcost) return mcost;
00417 }
00418 }
00419
00420 return mcost;
00421 }
00422
00423
00424
00425
00426
00427
00428
00429 int computeSADWP(StorablePicture *ref1,
00430 MEBlock *mv_block,
00431 int min_mcost,
00432 MotionVector *cand
00433 )
00434 {
00435 int mcost = 0;
00436 int y, x;
00437 int weighted_pel;
00438 short blocksize_x = mv_block->blocksize_x;
00439 short blocksize_y = mv_block->blocksize_y;
00440
00441 ImageParameters *p_Img = mv_block->p_Img;
00442 Slice *currSlice = mv_block->p_slice;
00443 int pad_size_x = p_Img->padded_size_x - blocksize_x;
00444 int max_imgpel_value = p_Img->max_imgpel_value;
00445 short weight = mv_block->weight_luma;
00446 short offset = mv_block->offset_luma;
00447
00448 int wp_luma_round = currSlice->wp_luma_round;
00449 short luma_log_weight_denom = currSlice->luma_log_weight_denom;
00450
00451 imgpel *src_line = mv_block->orig_pic[0];
00452 imgpel *ref_line = UMVLine4X (ref1, cand->mv_y, cand->mv_x);
00453
00454 for (y=0; y<blocksize_y; y++)
00455 {
00456 for (x = 0; x < blocksize_x; x+=4)
00457 {
00458 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00459 mcost += iabs( *src_line++ - weighted_pel );
00460 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00461 mcost += iabs( *src_line++ - weighted_pel );
00462 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00463 mcost += iabs( *src_line++ - weighted_pel );
00464 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00465 mcost += iabs( *src_line++ - weighted_pel );
00466 }
00467 if (mcost >= min_mcost) return mcost;
00468 ref_line += pad_size_x;
00469 }
00470 if ( mv_block->ChromaMEEnable )
00471 {
00472
00473 int blocksize_x_cr = mv_block->blocksize_cr_x;
00474 int blocksize_y_cr = mv_block->blocksize_cr_y;
00475 int cr_pad_size_x = p_Img->cr_padded_size_x - blocksize_x_cr;
00476 int k;
00477 int mcr_cost = 0;
00478 int max_imgpel_value_uv = p_Img->max_pel_value_comp[1];
00479 int wp_chroma_round = currSlice->wp_chroma_round;
00480 short chroma_log_weight_denom = currSlice->chroma_log_weight_denom;
00481
00482 for (k=0; k < 2; k++)
00483 {
00484 weight = mv_block->weight_cr[k];
00485 offset = mv_block->offset_cr[k];
00486
00487 mcr_cost = 0;
00488 src_line = mv_block->orig_pic[k+1];
00489 ref_line = UMVLine8X_chroma ( ref1, k+1, cand->mv_y, cand->mv_x);
00490 for (y=0; y<blocksize_y_cr; y++)
00491 {
00492 for (x = 0; x < blocksize_x_cr; x+=2)
00493 {
00494 weighted_pel = iClip1( max_imgpel_value_uv, ((weight * *ref_line++ + wp_chroma_round) >> chroma_log_weight_denom) + offset);
00495 mcr_cost += iabs( *src_line++ - weighted_pel );
00496 weighted_pel = iClip1( max_imgpel_value_uv, ((weight * *ref_line++ + wp_chroma_round) >> chroma_log_weight_denom) + offset);
00497 mcr_cost += iabs( *src_line++ - weighted_pel );
00498 }
00499 ref_line += cr_pad_size_x;
00500 }
00501 mcost += mv_block->ChromaMEWeight * mcr_cost;
00502 if (mcost >= min_mcost) return mcost;
00503 }
00504 }
00505
00506 return mcost;
00507 }
00508
00509
00510
00511
00512
00513
00514
00515 int computeBiPredSAD1(StorablePicture *ref1,
00516 StorablePicture *ref2,
00517 MEBlock *mv_block,
00518 int min_mcost,
00519 MotionVector *cand1,
00520 MotionVector *cand2)
00521 {
00522 int mcost = 0;
00523 int bi_diff;
00524 int y,x;
00525 short blocksize_x = mv_block->blocksize_x;
00526 short blocksize_y = mv_block->blocksize_y;
00527 ImageParameters *p_Img = mv_block->p_Img;
00528 int pad_size_x = p_Img->padded_size_x - blocksize_x;
00529 #if (JM_MEM_DISTORTION)
00530 int *imgpel_abs = p_Img->imgpel_abs;
00531 #endif
00532
00533 imgpel *src_line = mv_block->orig_pic[0];
00534 imgpel *ref2_line = UMVLine4X(ref2, cand2->mv_y, cand2->mv_x);
00535 imgpel *ref1_line = UMVLine4X(ref1, cand1->mv_y, cand1->mv_x);
00536
00537 for (y = 0; y < blocksize_y; y++)
00538 {
00539 for (x = 0; x < blocksize_x; x+=4)
00540 {
00541 #if (JM_MEM_DISTORTION)
00542 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00543 mcost += imgpel_abs[ bi_diff ];
00544 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00545 mcost += imgpel_abs[ bi_diff ];
00546 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00547 mcost += imgpel_abs[ bi_diff ];
00548 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00549 mcost += imgpel_abs[ bi_diff ];
00550 #else
00551 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00552 mcost += iabs(bi_diff);
00553 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00554 mcost += iabs(bi_diff);
00555 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00556 mcost += iabs(bi_diff);
00557 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00558 mcost += iabs(bi_diff);
00559 #endif
00560 }
00561 if (mcost >= min_mcost) return mcost;
00562 ref2_line += pad_size_x;
00563 ref1_line += pad_size_x;
00564 }
00565
00566 if ( mv_block->ChromaMEEnable )
00567 {
00568
00569 int blocksize_x_cr = mv_block->blocksize_cr_x;
00570 int blocksize_y_cr = mv_block->blocksize_cr_y;
00571 int cr_pad_size_x = p_Img->cr_padded_size_x - blocksize_x_cr;
00572 int k;
00573 int mcr_cost = 0;
00574
00575 for (k=1; k<3; k++)
00576 {
00577 mcr_cost = 0;
00578 src_line = mv_block->orig_pic[k];
00579 ref2_line = UMVLine8X_chroma ( ref2, k, cand2->mv_y, cand2->mv_x);
00580 ref1_line = UMVLine8X_chroma ( ref1, k, cand1->mv_y, cand1->mv_x);
00581
00582 for (y=0; y<blocksize_y_cr; y++)
00583 {
00584 for (x = 0; x < blocksize_x_cr; x+=2)
00585 {
00586 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00587 mcr_cost += iabs(bi_diff);
00588 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00589 mcr_cost += iabs(bi_diff);
00590 }
00591 ref2_line += cr_pad_size_x;
00592 ref1_line += cr_pad_size_x;
00593 }
00594 mcost += mv_block->ChromaMEWeight * mcr_cost;
00595 if (mcost >= min_mcost) return mcost;
00596 }
00597 }
00598 return mcost;
00599 }
00600
00601
00602
00603
00604
00605
00606
00607 int computeBiPredSAD2(StorablePicture *ref1,
00608 StorablePicture *ref2,
00609 MEBlock *mv_block,
00610 int min_mcost,
00611 MotionVector *cand1,
00612 MotionVector *cand2)
00613 {
00614 int mcost = 0;
00615 int bi_diff;
00616 ImageParameters *p_Img = mv_block->p_Img;
00617 Slice *currSlice = mv_block->p_slice;
00618 int denom = currSlice->luma_log_weight_denom + 1;
00619 int lround = 2 * currSlice->wp_luma_round;
00620 int max_imgpel_value = p_Img->max_imgpel_value;
00621 int y,x;
00622 int weighted_pel, pixel1, pixel2;
00623 short blocksize_x = mv_block->blocksize_x;
00624 short blocksize_y = mv_block->blocksize_y;
00625 short weight1 = mv_block->weight1;
00626 short weight2 = mv_block->weight2;
00627 short offsetBi = mv_block->offsetBi;
00628
00629 int pad_size_x = p_Img->padded_size_x - blocksize_x;
00630
00631 imgpel *src_line = mv_block->orig_pic[0];
00632 imgpel *ref2_line = UMVLine4X(ref2, cand2->mv_y, cand2->mv_x);
00633 imgpel *ref1_line = UMVLine4X(ref1, cand1->mv_y, cand1->mv_x);
00634
00635 for (y=0; y<blocksize_y; y++)
00636 {
00637 for (x = 0; x < blocksize_x; x+=4)
00638 {
00639 pixel1 = weight1 * (*ref1_line++);
00640 pixel2 = weight2 * (*ref2_line++);
00641 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
00642 bi_diff = (*src_line++) - weighted_pel;
00643 mcost += iabs(bi_diff);
00644
00645 pixel1 = weight1 * (*ref1_line++);
00646 pixel2 = weight2 * (*ref2_line++);
00647 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
00648 bi_diff = (*src_line++) - weighted_pel;
00649 mcost += iabs(bi_diff);
00650
00651 pixel1 = weight1 * (*ref1_line++);
00652 pixel2 = weight2 * (*ref2_line++);
00653 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
00654 bi_diff = (*src_line++) - weighted_pel;
00655 mcost += iabs(bi_diff);
00656
00657 pixel1 = weight1 * (*ref1_line++);
00658 pixel2 = weight2 * (*ref2_line++);
00659 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
00660 bi_diff = (*src_line++) - weighted_pel;
00661 mcost += iabs(bi_diff);
00662 }
00663 if (mcost >= min_mcost) return mcost;
00664 ref2_line += pad_size_x;
00665 ref1_line += pad_size_x;
00666 }
00667
00668 if ( mv_block->ChromaMEEnable )
00669 {
00670
00671 int blocksize_x_cr = mv_block->blocksize_cr_x;
00672 int blocksize_y_cr = mv_block->blocksize_cr_y;
00673 int cr_pad_size_x = p_Img->cr_padded_size_x - blocksize_x_cr;
00674 int k;
00675 int mcr_cost = 0;
00676 int max_imgpel_value_uv = p_Img->max_pel_value_comp[1];
00677
00678 for (k=0; k<2; k++)
00679 {
00680 weight1 = mv_block->weight1_cr[k];
00681 weight2 = mv_block->weight2_cr[k];
00682 offsetBi = mv_block->offsetBi_cr[k];
00683
00684 mcr_cost = 0;
00685 src_line = mv_block->orig_pic[k+1];
00686 ref2_line = UMVLine8X_chroma ( ref2, k+1, cand2->mv_y, cand2->mv_x);
00687 ref1_line = UMVLine8X_chroma ( ref1, k+1, cand1->mv_y, cand1->mv_x);
00688
00689 for (y=0; y<blocksize_y_cr; y++)
00690 {
00691 for (x = 0; x < blocksize_x_cr; x+=2)
00692 {
00693 pixel1 = weight1 * (*ref1_line++);
00694 pixel2 = weight2 * (*ref2_line++);
00695 weighted_pel = iClip1( max_imgpel_value_uv, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
00696 bi_diff = (*src_line++) - weighted_pel;
00697 mcr_cost += iabs(bi_diff);
00698
00699 pixel1 = weight1 * (*ref1_line++);
00700 pixel2 = weight2 * (*ref2_line++);
00701 weighted_pel = iClip1( max_imgpel_value_uv, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
00702 bi_diff = (*src_line++) - weighted_pel;
00703 mcr_cost += iabs(bi_diff);
00704 }
00705 ref2_line += cr_pad_size_x;
00706 ref1_line += cr_pad_size_x;
00707 }
00708 mcost += mv_block->ChromaMEWeight * mcr_cost;
00709 if (mcost >= min_mcost) return mcost;
00710
00711 }
00712 }
00713 return mcost;
00714 }
00715
00716
00717
00718
00719
00720
00721
00722 int computeSATD(StorablePicture *ref1,
00723 MEBlock *mv_block,
00724 int min_mcost,
00725 MotionVector *cand
00726 )
00727 {
00728 int mcost = 0;
00729 int y, x, y4, *d;
00730 int src_size_x, src_size_mul;
00731 short blocksize_x = mv_block->blocksize_x;
00732 short blocksize_y = mv_block->blocksize_y;
00733 ImageParameters *p_Img = mv_block->p_Img;
00734 imgpel *src_tmp = mv_block->orig_pic[0];
00735 int diff[MB_PIXELS];
00736 imgpel *src_line, *ref_line;
00737
00738 if ( !mv_block->test8x8 )
00739 {
00740 src_size_x = blocksize_x - BLOCK_SIZE;
00741 src_size_mul = blocksize_x * BLOCK_SIZE;
00742 for (y = cand->mv_y; y < cand->mv_y + (blocksize_y<<2); y += (BLOCK_SIZE_SP))
00743 {
00744 for (x=0; x<blocksize_x; x += BLOCK_SIZE)
00745 {
00746 d = diff;
00747 ref_line = UMVLine4X (ref1, y, cand->mv_x + (x<<2));
00748 src_line = src_tmp + x;
00749 for (y4 = 0; y4 < BLOCK_SIZE; y4++ )
00750 {
00751 *d++ = *src_line++ - *ref_line++ ;
00752 *d++ = *src_line++ - *ref_line++ ;
00753 *d++ = *src_line++ - *ref_line++ ;
00754 *d++ = *src_line++ - *ref_line++ ;
00755
00756 ref_line += p_Img->padded_size_x_m4x4;
00757 src_line += src_size_x;
00758 }
00759 if ((mcost += HadamardSAD4x4 (diff)) > min_mcost) return mcost;
00760 }
00761 src_tmp += src_size_mul;
00762 }
00763 }
00764 else
00765 {
00766 src_size_x = (blocksize_x - BLOCK_SIZE_8x8);
00767 src_size_mul = blocksize_x * BLOCK_SIZE_8x8;
00768 for (y = cand->mv_y; y < cand->mv_y + (blocksize_y<<2); y += (BLOCK_SIZE_8x8_SP) )
00769 {
00770 for (x=0; x<blocksize_x; x += BLOCK_SIZE_8x8 )
00771 {
00772 d = diff;
00773 ref_line = UMVLine4X (ref1, y, cand->mv_x + (x<<2));
00774 src_line = src_tmp + x;
00775 for (y4 = 0; y4 < BLOCK_SIZE_8x8; y4++ )
00776 {
00777 *d++ = *src_line++ - *ref_line++ ;
00778 *d++ = *src_line++ - *ref_line++ ;
00779 *d++ = *src_line++ - *ref_line++ ;
00780 *d++ = *src_line++ - *ref_line++ ;
00781 *d++ = *src_line++ - *ref_line++ ;
00782 *d++ = *src_line++ - *ref_line++ ;
00783 *d++ = *src_line++ - *ref_line++ ;
00784 *d++ = *src_line++ - *ref_line++ ;
00785
00786 ref_line += p_Img->padded_size_x_m8x8;
00787 src_line += src_size_x;
00788 }
00789 if ((mcost += HadamardSAD8x8 (diff)) > min_mcost) return mcost;
00790 }
00791 src_tmp += src_size_mul;
00792 }
00793 }
00794 return mcost;
00795 }
00796
00797
00798
00799
00800
00801
00802
00803 int computeSATDWP(StorablePicture *ref1,
00804 MEBlock *mv_block,
00805 int min_mcost,
00806 MotionVector *cand
00807 )
00808 {
00809 int mcost = 0;
00810 int y, x, y4, *d;
00811 int weighted_pel;
00812 int src_size_x, src_size_mul;
00813 short blocksize_x = mv_block->blocksize_x;
00814 short blocksize_y = mv_block->blocksize_y;
00815
00816 imgpel *src_tmp = mv_block->orig_pic[0];
00817 ImageParameters *p_Img = mv_block->p_Img;
00818 Slice *currSlice = mv_block->p_slice;
00819 short luma_log_weight_denom = currSlice->luma_log_weight_denom;
00820 short weight = mv_block->weight_luma;
00821 short offset = mv_block->offset_luma;
00822
00823 int wp_luma_round = currSlice->wp_luma_round;
00824 int max_imgpel_value = p_Img->max_imgpel_value;
00825 int diff[MB_PIXELS];
00826 imgpel *src_line, *ref_line;
00827
00828 if ( !mv_block->test8x8 )
00829 {
00830 src_size_x = (blocksize_x - BLOCK_SIZE);
00831 src_size_mul = blocksize_x * BLOCK_SIZE;
00832 for (y = cand->mv_y; y < cand->mv_y + (blocksize_y<<2); y += (BLOCK_SIZE_SP))
00833 {
00834 for (x=0; x<blocksize_x; x += BLOCK_SIZE)
00835 {
00836 d = diff;
00837 ref_line = UMVLine4X (ref1, y, cand->mv_x + (x<<2));
00838 src_line = src_tmp + x;
00839 for (y4 = 0; y4 < BLOCK_SIZE; y4++ )
00840 {
00841 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00842 *d++ = *src_line++ - weighted_pel;
00843 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00844 *d++ = *src_line++ - weighted_pel;
00845 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00846 *d++ = *src_line++ - weighted_pel;
00847 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00848 *d++ = *src_line++ - weighted_pel;
00849
00850 ref_line += p_Img->padded_size_x_m4x4;
00851 src_line += src_size_x;
00852 }
00853 if ((mcost += HadamardSAD4x4 (diff)) > min_mcost) return mcost;
00854 }
00855 src_tmp += src_size_mul;
00856 }
00857 }
00858 else
00859 {
00860 src_size_x = (blocksize_x - BLOCK_SIZE_8x8);
00861 src_size_mul = blocksize_x * BLOCK_SIZE_8x8;
00862 for (y = cand->mv_y; y < cand->mv_y + (blocksize_y<<2); y += (BLOCK_SIZE_8x8_SP) )
00863 {
00864 for (x=0; x<blocksize_x; x += BLOCK_SIZE_8x8 )
00865 {
00866 d = diff;
00867 ref_line = UMVLine4X (ref1, y, cand->mv_x + (x<<2));
00868 src_line = src_tmp + x;
00869 for (y4 = 0; y4 < BLOCK_SIZE_8x8; y4++ )
00870 {
00871 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00872 *d++ = *src_line++ - weighted_pel;
00873 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00874 *d++ = *src_line++ - weighted_pel;
00875 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00876 *d++ = *src_line++ - weighted_pel;
00877 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00878 *d++ = *src_line++ - weighted_pel;
00879 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00880 *d++ = *src_line++ - weighted_pel;
00881 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00882 *d++ = *src_line++ - weighted_pel;
00883 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00884 *d++ = *src_line++ - weighted_pel;
00885 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
00886 *d++ = *src_line++ - weighted_pel;
00887
00888 ref_line += p_Img->padded_size_x_m8x8;
00889 src_line += src_size_x;
00890 }
00891 if ((mcost += HadamardSAD8x8 (diff)) > min_mcost) return mcost;
00892 }
00893 src_tmp += src_size_mul;
00894 }
00895 }
00896 return mcost;
00897 }
00898
00899
00900
00901
00902
00903
00904
00905 int computeBiPredSATD1(StorablePicture *ref1,
00906 StorablePicture *ref2,
00907 MEBlock *mv_block,
00908 int min_mcost,
00909 MotionVector *cand1,
00910 MotionVector *cand2)
00911 {
00912 int mcost = 0;
00913 int y, x, y4, *d;
00914 int src_size_x, src_size_mul;
00915 imgpel *src_tmp = mv_block->orig_pic[0];
00916 int diff[MB_PIXELS];
00917 imgpel *src_line, *ref1_line, *ref2_line;
00918 short blocksize_x = mv_block->blocksize_x;
00919 short blocksize_y = mv_block->blocksize_y;
00920 ImageParameters *p_Img = mv_block->p_Img;
00921
00922 if ( !mv_block->test8x8 )
00923 {
00924 src_size_x = (blocksize_x - BLOCK_SIZE);
00925 src_size_mul = blocksize_x * BLOCK_SIZE;
00926 for (y=0; y<(blocksize_y<<2); y += (BLOCK_SIZE_SP))
00927 {
00928 for (x=0; x<blocksize_x; x += BLOCK_SIZE)
00929 {
00930 d = diff;
00931 src_line = src_tmp + x;
00932 ref2_line = UMVLine4X(ref2, cand2->mv_y + y, cand2->mv_x + (x<<2));
00933 ref1_line = UMVLine4X(ref1, cand1->mv_y + y, cand1->mv_x + (x<<2));
00934 for (y4 = 0; y4 < BLOCK_SIZE; y4++ )
00935 {
00936 *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00937 *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00938 *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00939 *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00940
00941 ref1_line += p_Img->padded_size_x_m4x4;
00942 ref2_line += p_Img->padded_size_x_m4x4;
00943 src_line += src_size_x;
00944 }
00945 if ((mcost += HadamardSAD4x4 (diff)) > min_mcost) return mcost;
00946 }
00947 src_tmp += src_size_mul;
00948 }
00949 }
00950 else
00951 {
00952 src_size_x = (blocksize_x - BLOCK_SIZE_8x8);
00953 src_size_mul = blocksize_x * BLOCK_SIZE_8x8;
00954 for (y=0; y<(blocksize_y << 2); y += BLOCK_SIZE_8x8_SP )
00955 {
00956 int y_pos2 = cand2->mv_y + y;
00957 int y_pos1 = cand1->mv_y + y;
00958 for (x=0; x<blocksize_x; x += BLOCK_SIZE_8x8 )
00959 {
00960 d = diff;
00961 src_line = src_tmp + x;
00962 ref2_line = UMVLine4X(ref2, y_pos2, cand2->mv_x + (x<<2));
00963 ref1_line = UMVLine4X(ref1, y_pos1, cand1->mv_x + (x<<2));
00964 for (y4 = 0; y4 < BLOCK_SIZE_8x8; y4++ )
00965 {
00966 *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00967 *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00968 *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00969 *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00970 *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00971 *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00972 *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00973 *d++ = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
00974
00975 ref1_line += p_Img->padded_size_x_m8x8;
00976 ref2_line += p_Img->padded_size_x_m8x8;
00977 src_line += src_size_x;
00978 }
00979 if ((mcost += HadamardSAD8x8 (diff)) > min_mcost) return mcost;
00980 }
00981 src_tmp += src_size_mul;
00982 }
00983 }
00984 return mcost;
00985 }
00986
00987
00988
00989
00990
00991
00992
00993 int computeBiPredSATD2(StorablePicture *ref1,
00994 StorablePicture *ref2,
00995 MEBlock *mv_block,
00996 int min_mcost,
00997 MotionVector *cand1,
00998 MotionVector *cand2)
00999 {
01000 int mcost = 0;
01001 int y, x, y4, *d;
01002 int weighted_pel, pixel1, pixel2;
01003 ImageParameters *p_Img = mv_block->p_Img;
01004 Slice *currSlice = mv_block->p_slice;
01005 int denom = currSlice->luma_log_weight_denom + 1;
01006 int lround = 2 * currSlice->wp_luma_round;
01007 short weight1 = mv_block->weight1;
01008 short weight2 = mv_block->weight2;
01009 short offsetBi = mv_block->offsetBi;
01010
01011
01012 int max_imgpel_value = p_Img->max_imgpel_value;
01013 int src_size_x, src_size_mul;
01014 imgpel *src_tmp = mv_block->orig_pic[0];
01015 int diff[MB_PIXELS];
01016 imgpel *src_line, *ref1_line, *ref2_line;
01017 short blocksize_x = mv_block->blocksize_x;
01018 short blocksize_y = mv_block->blocksize_y;
01019
01020 if ( !mv_block->test8x8 )
01021 {
01022 src_size_x = (blocksize_x - BLOCK_SIZE);
01023 src_size_mul = blocksize_x * BLOCK_SIZE;
01024 for (y=0; y<(blocksize_y<<2); y += BLOCK_SIZE_SP)
01025 {
01026 for (x=0; x<blocksize_x; x += BLOCK_SIZE)
01027 {
01028 d = diff;
01029 src_line = src_tmp + x;
01030 ref2_line = UMVLine4X(ref2, cand2->mv_y + y, cand2->mv_x + (x<<2));
01031 ref1_line = UMVLine4X(ref1, cand1->mv_y + y, cand1->mv_x + (x<<2));
01032 for (y4 = 0; y4 < BLOCK_SIZE; y4++ )
01033 {
01034
01035 pixel1 = weight1 * (*ref1_line++);
01036 pixel2 = weight2 * (*ref2_line++);
01037 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01038 *d++ = (*src_line++) - weighted_pel;
01039
01040 pixel1 = weight1 * (*ref1_line++);
01041 pixel2 = weight2 * (*ref2_line++);
01042 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01043 *d++ = (*src_line++) - weighted_pel;
01044
01045 pixel1 = weight1 * (*ref1_line++);
01046 pixel2 = weight2 * (*ref2_line++);
01047 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01048 *d++ = (*src_line++) - weighted_pel;
01049
01050 pixel1 = weight1 * (*ref1_line++);
01051 pixel2 = weight2 * (*ref2_line++);
01052 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01053 *d++ = (*src_line++) - weighted_pel;
01054
01055 ref1_line += p_Img->padded_size_x_m4x4;
01056 ref2_line += p_Img->padded_size_x_m4x4;
01057 src_line += src_size_x;
01058 }
01059 if ((mcost += HadamardSAD4x4 (diff)) > min_mcost) return mcost;
01060 }
01061 src_tmp += src_size_mul;
01062 }
01063 }
01064 else
01065 {
01066 src_size_x = (blocksize_x - BLOCK_SIZE_8x8);
01067 src_size_mul = blocksize_x * BLOCK_SIZE_8x8;
01068 for (y=0; y < (blocksize_y << 2); y += BLOCK_SIZE_8x8_SP )
01069 {
01070 int y_pos2 = cand2->mv_y + y;
01071 int y_pos1 = cand1->mv_y + y;
01072 for (x=0; x<blocksize_x; x += BLOCK_SIZE_8x8 )
01073 {
01074 d = diff;
01075 src_line = src_tmp + x;
01076 ref2_line = UMVLine4X(ref2, y_pos2, cand2->mv_x + (x<<2));
01077 ref1_line = UMVLine4X(ref1, y_pos1, cand1->mv_x + (x<<2));
01078 for (y4 = 0; y4 < BLOCK_SIZE_8x8; y4++ )
01079 {
01080
01081 pixel1 = weight1 * (*ref1_line++);
01082 pixel2 = weight2 * (*ref2_line++);
01083 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01084 *d++ = (*src_line++) - weighted_pel;
01085
01086 pixel1 = weight1 * (*ref1_line++);
01087 pixel2 = weight2 * (*ref2_line++);
01088 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01089 *d++ = (*src_line++) - weighted_pel;
01090
01091 pixel1 = weight1 * (*ref1_line++);
01092 pixel2 = weight2 * (*ref2_line++);
01093 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01094 *d++ = (*src_line++) - weighted_pel;
01095
01096 pixel1 = weight1 * (*ref1_line++);
01097 pixel2 = weight2 * (*ref2_line++);
01098 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01099 *d++ = (*src_line++) - weighted_pel;
01100
01101 pixel1 = weight1 * (*ref1_line++);
01102 pixel2 = weight2 * (*ref2_line++);
01103 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01104 *d++ = (*src_line++) - weighted_pel;
01105
01106 pixel1 = weight1 * (*ref1_line++);
01107 pixel2 = weight2 * (*ref2_line++);
01108 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01109 *d++ = (*src_line++) - weighted_pel;
01110
01111 pixel1 = weight1 * (*ref1_line++);
01112 pixel2 = weight2 * (*ref2_line++);
01113 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01114 *d++ = (*src_line++) - weighted_pel;
01115
01116 pixel1 = weight1 * (*ref1_line++);
01117 pixel2 = weight2 * (*ref2_line++);
01118 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01119 *d++ = (*src_line) - weighted_pel;
01120
01121 ref1_line += p_Img->padded_size_x_m8x8;
01122 ref2_line += p_Img->padded_size_x_m8x8;
01123 src_line += src_size_x;
01124 }
01125 if ((mcost += HadamardSAD8x8 (diff)) > min_mcost) return mcost;
01126 }
01127 src_tmp += src_size_mul;
01128 }
01129 }
01130 return mcost;
01131 }
01132
01133
01134
01135
01136
01137
01138
01139
01140 int computeSSE(StorablePicture *ref1,
01141 MEBlock *mv_block,
01142 int min_mcost,
01143 MotionVector *cand
01144 )
01145 {
01146 int mcost = 0;
01147 int y,x;
01148 short blocksize_x = mv_block->blocksize_x;
01149 short blocksize_y = mv_block->blocksize_y;
01150 ImageParameters *p_Img = mv_block->p_Img;
01151 int pad_size_x = p_Img->padded_size_x - blocksize_x;
01152
01153 imgpel *src_line = mv_block->orig_pic[0];
01154 imgpel *ref_line = UMVLine4X (ref1, cand->mv_y, cand->mv_x);
01155
01156 for (y=0; y<blocksize_y; y++)
01157 {
01158 for (x = 0; x < blocksize_x; x+=4)
01159 {
01160 mcost += iabs2( *src_line++ - *ref_line++ );
01161 mcost += iabs2( *src_line++ - *ref_line++ );
01162 mcost += iabs2( *src_line++ - *ref_line++ );
01163 mcost += iabs2( *src_line++ - *ref_line++ );
01164 }
01165 if (mcost >= min_mcost) return mcost;
01166 ref_line += pad_size_x;
01167 }
01168
01169 if ( mv_block->ChromaMEEnable )
01170 {
01171
01172 int blocksize_x_cr = mv_block->blocksize_cr_x;
01173 int blocksize_y_cr = mv_block->blocksize_cr_y;
01174 int cr_pad_size_x = p_Img->cr_padded_size_x - blocksize_x_cr;
01175 int k;
01176 int mcr_cost = 0;
01177
01178 for (k=0; k<2; k++)
01179 {
01180 mcr_cost = 0;
01181 src_line = mv_block->orig_pic[k+1];
01182 ref_line = UMVLine8X_chroma ( ref1, k+1, cand->mv_y, cand->mv_x);
01183 for (y=0; y<blocksize_y_cr; y++)
01184 {
01185 for (x = 0; x < blocksize_x_cr; x+=2)
01186 {
01187 mcr_cost += iabs2( *src_line++ - *ref_line++ );
01188 mcr_cost += iabs2( *src_line++ - *ref_line++ );
01189 }
01190 ref_line += cr_pad_size_x;
01191 }
01192 mcost += mv_block->ChromaMEWeight * mcr_cost;
01193 if (mcost >= min_mcost) return mcost;
01194 }
01195 }
01196
01197 return mcost;
01198 }
01199
01200
01201
01202
01203
01204
01205
01206
01207 int computeSSEWP(StorablePicture *ref1,
01208 MEBlock *mv_block,
01209 int min_mcost,
01210 MotionVector *cand
01211 )
01212 {
01213 int mcost = 0;
01214 int y,x;
01215 int weighted_pel;
01216 short blocksize_x = mv_block->blocksize_x;
01217 short blocksize_y = mv_block->blocksize_y;
01218 ImageParameters *p_Img = mv_block->p_Img;
01219 Slice *currSlice = mv_block->p_slice;
01220 short weight = mv_block->weight_luma;
01221 short offset = mv_block->offset_luma;
01222
01223 int wp_luma_round = currSlice->wp_luma_round;
01224 int pad_size_x = p_Img->padded_size_x - blocksize_x;
01225 int max_imgpel_value = p_Img->max_imgpel_value;
01226 short luma_log_weight_denom = currSlice->luma_log_weight_denom;
01227
01228 imgpel *src_line = mv_block->orig_pic[0];
01229 imgpel *ref_line = UMVLine4X (ref1, cand->mv_y, cand->mv_x);
01230
01231 for (y=0; y<blocksize_y; y++)
01232 {
01233 for (x = 0; x < blocksize_x; x+=4)
01234 {
01235 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
01236 mcost += iabs2( *src_line++ - weighted_pel );
01237 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
01238 mcost += iabs2( *src_line++ - weighted_pel );
01239 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
01240 mcost += iabs2( *src_line++ - weighted_pel );
01241 weighted_pel = iClip1( max_imgpel_value, ((weight * *ref_line++ + wp_luma_round) >> luma_log_weight_denom) + offset);
01242 mcost += iabs2( *src_line++ - weighted_pel );
01243 }
01244 if (mcost >= min_mcost) return mcost;
01245 ref_line += pad_size_x;
01246 }
01247
01248 if ( mv_block->ChromaMEEnable )
01249 {
01250
01251
01252 int blocksize_x_cr = mv_block->blocksize_cr_x;
01253 int blocksize_y_cr = mv_block->blocksize_cr_y;
01254 int cr_pad_size_x = p_Img->cr_padded_size_x - blocksize_x_cr;
01255 int k;
01256 int mcr_cost = 0;
01257 int max_imgpel_value_uv = p_Img->max_pel_value_comp[1];
01258 int wp_chroma_round = currSlice->wp_chroma_round;
01259 short chroma_log_weight_denom = currSlice->chroma_log_weight_denom;
01260
01261 for (k=0; k<2; k++)
01262 {
01263 weight = mv_block->weight_cr[k];
01264 offset = mv_block->offset_cr[k];
01265
01266 mcr_cost = 0;
01267 src_line = mv_block->orig_pic[k+1];
01268 ref_line = UMVLine8X_chroma ( ref1, k+1, cand->mv_y, cand->mv_x);
01269 for (y=0; y<blocksize_y_cr; y++)
01270 {
01271
01272 for (x = 0; x < blocksize_x_cr; x+=2)
01273 {
01274 weighted_pel = iClip1( max_imgpel_value_uv, ((weight * *ref_line++ + wp_chroma_round) >> chroma_log_weight_denom) + offset);
01275 mcr_cost += iabs2( *src_line++ - weighted_pel );
01276 weighted_pel = iClip1( max_imgpel_value_uv, ((weight * *ref_line++ + wp_chroma_round) >> chroma_log_weight_denom) + offset);
01277 mcr_cost += iabs2( *src_line++ - weighted_pel );
01278 }
01279 ref_line += cr_pad_size_x;
01280 }
01281 mcost += mv_block->ChromaMEWeight * mcr_cost;
01282 if (mcost >= min_mcost) return mcost;
01283 }
01284 }
01285
01286 return mcost;
01287 }
01288
01289
01290
01291
01292
01293
01294
01295 int computeBiPredSSE1(StorablePicture *ref1,
01296 StorablePicture *ref2,
01297 MEBlock *mv_block,
01298 int min_mcost,
01299 MotionVector *cand1,
01300 MotionVector *cand2)
01301 {
01302 int mcost = 0;
01303 int bi_diff;
01304 int y,x;
01305 short blocksize_x = mv_block->blocksize_x;
01306 short blocksize_y = mv_block->blocksize_y;
01307 ImageParameters *p_Img = mv_block->p_Img;
01308 int pad_size_x = p_Img->padded_size_x - blocksize_x;
01309
01310 imgpel *src_line = mv_block->orig_pic[0];
01311 imgpel *ref2_line = UMVLine4X(ref2, cand2->mv_y, cand2->mv_x);
01312 imgpel *ref1_line = UMVLine4X(ref1, cand1->mv_y, cand1->mv_x);
01313
01314 for (y = 0; y < blocksize_y; y++)
01315 {
01316 for (x = 0; x < blocksize_x; x+=4)
01317 {
01318 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
01319 mcost += iabs2(bi_diff);
01320 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
01321 mcost += iabs2(bi_diff);
01322 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
01323 mcost += iabs2(bi_diff);
01324 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
01325 mcost += iabs2(bi_diff);
01326 }
01327
01328 if (mcost >= min_mcost) return mcost;
01329 ref2_line += pad_size_x;
01330 ref1_line += pad_size_x;
01331 }
01332
01333 if ( mv_block->ChromaMEEnable )
01334 {
01335
01336 int blocksize_x_cr = mv_block->blocksize_cr_x;
01337 int blocksize_y_cr = mv_block->blocksize_cr_y;
01338 int cr_pad_size_x = p_Img->cr_padded_size_x - blocksize_x_cr;
01339 int k;
01340 int mcr_cost = 0;
01341
01342 for (k=0; k<2; k++)
01343 {
01344 mcr_cost = 0;
01345 src_line = mv_block->orig_pic[k+1];
01346 ref2_line = UMVLine8X_chroma ( ref2, k+1, cand2->mv_y, cand2->mv_x);
01347 ref1_line = UMVLine8X_chroma ( ref1, k+1, cand1->mv_y, cand1->mv_x);
01348
01349 for (y=0; y<blocksize_y_cr; y++)
01350 {
01351 for (x = 0; x < blocksize_x_cr; x+=2)
01352 {
01353 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
01354 mcr_cost += iabs2(bi_diff);
01355 bi_diff = (*src_line++) - ((*ref1_line++ + *ref2_line++ + 1)>>1);
01356 mcr_cost += iabs2(bi_diff);
01357 }
01358 ref2_line += cr_pad_size_x;
01359 ref1_line += cr_pad_size_x;
01360 }
01361 mcost += mv_block->ChromaMEWeight * mcr_cost;
01362 if (mcost >= min_mcost) return mcost;
01363 }
01364 }
01365
01366 return mcost;
01367 }
01368
01369
01370
01371
01372
01373
01374
01375
01376 int computeBiPredSSE2(StorablePicture *ref1,
01377 StorablePicture *ref2,
01378 MEBlock *mv_block,
01379 int min_mcost,
01380 MotionVector *cand1,
01381 MotionVector *cand2)
01382 {
01383 int mcost = 0;
01384 int bi_diff;
01385 ImageParameters *p_Img = mv_block->p_Img;
01386 Slice *currSlice = mv_block->p_slice;
01387 int denom = currSlice->luma_log_weight_denom + 1;
01388 int lround = 2 * currSlice->wp_luma_round;
01389 int max_imgpel_value = p_Img->max_imgpel_value;
01390 int y,x;
01391 int weighted_pel, pixel1, pixel2;
01392 short weight1 = mv_block->weight1;
01393 short weight2 = mv_block->weight2;
01394 short offsetBi = mv_block->offsetBi;
01395
01396 short blocksize_x = mv_block->blocksize_x;
01397 short blocksize_y = mv_block->blocksize_y;
01398 int pad_size_x = p_Img->padded_size_x - blocksize_x;
01399
01400 imgpel *src_line = mv_block->orig_pic[0];
01401 imgpel *ref2_line = UMVLine4X(ref2, cand2->mv_y, cand2->mv_x);
01402 imgpel *ref1_line = UMVLine4X(ref1, cand1->mv_y, cand1->mv_x);
01403 for (y=0; y<blocksize_y; y++)
01404 {
01405 for (x = 0; x < blocksize_x; x+=4)
01406 {
01407 pixel1 = weight1 * (*ref1_line++);
01408 pixel2 = weight2 * (*ref2_line++);
01409 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01410 bi_diff = (*src_line++) - weighted_pel;
01411 mcost += bi_diff * bi_diff;
01412
01413 pixel1 = weight1 * (*ref1_line++);
01414 pixel2 = weight2 * (*ref2_line++);
01415 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01416 bi_diff = (*src_line++) - weighted_pel;
01417 mcost += bi_diff * bi_diff;
01418
01419 pixel1 = weight1 * (*ref1_line++);
01420 pixel2 = weight2 * (*ref2_line++);
01421 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01422 bi_diff = (*src_line++) - weighted_pel;
01423 mcost += bi_diff * bi_diff;
01424
01425 pixel1 = weight1 * (*ref1_line++);
01426 pixel2 = weight2 * (*ref2_line++);
01427 weighted_pel = iClip1( max_imgpel_value, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01428 bi_diff = (*src_line++) - weighted_pel;
01429 mcost += bi_diff * bi_diff;
01430 }
01431 if (mcost >= min_mcost) return mcost;
01432 ref2_line += pad_size_x;
01433 ref1_line += pad_size_x;
01434 }
01435
01436 if ( mv_block->ChromaMEEnable )
01437 {
01438
01439 int blocksize_x_cr = mv_block->blocksize_cr_x;
01440 int blocksize_y_cr = mv_block->blocksize_cr_y;
01441 int cr_pad_size_x = p_Img->cr_padded_size_x - blocksize_x_cr;
01442 int k;
01443 int mcr_cost = 0;
01444 int max_imgpel_value_uv = p_Img->max_pel_value_comp[1];
01445
01446 for (k=0; k<2; k++)
01447 {
01448 weight1 = mv_block->weight1_cr[k];
01449 weight2 = mv_block->weight2_cr[k];
01450 offsetBi = mv_block->offsetBi_cr[k];
01451
01452 mcr_cost = 0;
01453 src_line = mv_block->orig_pic[k+1];
01454 ref2_line = UMVLine8X_chroma ( ref2, k+1, cand2->mv_y, cand2->mv_x);
01455 ref1_line = UMVLine8X_chroma ( ref1, k+1, cand1->mv_y, cand1->mv_x);
01456
01457 for (y=0; y<blocksize_y_cr; y++)
01458 {
01459 for (x = 0; x < blocksize_x_cr; x+=2)
01460 {
01461 pixel1 = weight1 * (*ref1_line++);
01462 pixel2 = weight2 * (*ref2_line++);
01463 weighted_pel = iClip1( max_imgpel_value_uv, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01464 bi_diff = (*src_line++) - weighted_pel;
01465 mcr_cost += bi_diff * bi_diff;
01466
01467 pixel1 = weight1 * (*ref1_line++);
01468 pixel2 = weight2 * (*ref2_line++);
01469 weighted_pel = iClip1( max_imgpel_value_uv, ((pixel1 + pixel2 + lround) >> denom) + offsetBi);
01470 bi_diff = (*src_line++) - weighted_pel;
01471 mcr_cost += bi_diff * bi_diff;
01472 }
01473 ref2_line += cr_pad_size_x;
01474 ref1_line += cr_pad_size_x;
01475 }
01476 mcost += mv_block->ChromaMEWeight * mcr_cost;
01477 if (mcost >= min_mcost) return mcost;
01478 }
01479 }
01480
01481 return mcost;
01482 }
01483
01484