H.264/AVC Reference Software Encoder: transform8x8.c Source File

00001 /*!
00002  ***************************************************************************
00003  * \file transform8x8.c
00004  *
00005  * \brief
00006  *    8x8 transform functions
00007  *
00008  * \author
00009  *    Main contributors (see contributors.h for copyright, address and affiliation details)
00010  *    - Yuri Vatis
00011  *    - Jan Muenster
00012  *    - Lowell Winger                   <lwinger@lsil.com>
00013  * \date
00014  *    12. October 2003
00015  **************************************************************************
00016  */
00017 
00018 #include <math.h>
00019 #include <limits.h>
00020 
00021 #include "global.h"
00022 
00023 #include "image.h"
00024 #include "mb_access.h"
00025 #include "elements.h"
00026 #include "vlc.h"
00027 #include "transform8x8.h"
00028 #include "transform.h"
00029 #include "macroblock.h"
00030 #include "symbol.h"
00031 #include "mc_prediction.h"
00032 #include "md_distortion.h"
00033 #include "quant8x8.h"
00034 #include "rdoq.h"
00035 #include "q_matrix.h"
00036 #include "q_offsets.h"
00037 #include "rdopt.h"
00038 #include "md_common.h"
00039 #include "intra8x8.h"
00040 #include "rdopt_coding_state.h"
00041 
00042 //! single scan pattern
00043 static const byte SNGL_SCAN8x8[64][2] = {
00044   {0,0}, {1,0}, {0,1}, {0,2}, {1,1}, {2,0}, {3,0}, {2,1},
00045   {1,2}, {0,3}, {0,4}, {1,3}, {2,2}, {3,1}, {4,0}, {5,0},
00046   {4,1}, {3,2}, {2,3}, {1,4}, {0,5}, {0,6}, {1,5}, {2,4},
00047   {3,3}, {4,2}, {5,1}, {6,0}, {7,0}, {6,1}, {5,2}, {4,3},
00048   {3,4}, {2,5}, {1,6}, {0,7}, {1,7}, {2,6}, {3,5}, {4,4},
00049   {5,3}, {6,2}, {7,1}, {7,2}, {6,3}, {5,4}, {4,5}, {3,6},
00050   {2,7}, {3,7}, {4,6}, {5,5}, {6,4}, {7,3}, {7,4}, {6,5},
00051   {5,6}, {4,7}, {5,7}, {6,6}, {7,5}, {7,6}, {6,7}, {7,7}
00052 };
00053 
00054 
00055 //! field scan pattern
00056 static const byte FIELD_SCAN8x8[64][2] = {   // 8x8
00057   {0,0}, {0,1}, {0,2}, {1,0}, {1,1}, {0,3}, {0,4}, {1,2},
00058   {2,0}, {1,3}, {0,5}, {0,6}, {0,7}, {1,4}, {2,1}, {3,0},
00059   {2,2}, {1,5}, {1,6}, {1,7}, {2,3}, {3,1}, {4,0}, {3,2},
00060   {2,4}, {2,5}, {2,6}, {2,7}, {3,3}, {4,1}, {5,0}, {4,2},
00061   {3,4}, {3,5}, {3,6}, {3,7}, {4,3}, {5,1}, {6,0}, {5,2},
00062   {4,4}, {4,5}, {4,6}, {4,7}, {5,3}, {6,1}, {6,2}, {5,4},
00063   {5,5}, {5,6}, {5,7}, {6,3}, {7,0}, {7,1}, {6,4}, {6,5},
00064   {6,6}, {6,7}, {7,2}, {7,3}, {7,4}, {7,5}, {7,6}, {7,7}
00065 };
00066 
00067 
00068 //! array used to find expensive coefficients
00069 static const byte COEFF_COST8x8[2][64] =
00070 {
00071   {3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
00072    1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
00073    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00074    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
00075   {9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
00076    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
00077    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
00078    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9}
00079 };
00080 
00081 
00082 // Predictor array index definitions
00083 #define P_Z (PredPel[0])
00084 #define P_A (PredPel[1])
00085 #define P_B (PredPel[2])
00086 #define P_C (PredPel[3])
00087 #define P_D (PredPel[4])
00088 #define P_E (PredPel[5])
00089 #define P_F (PredPel[6])
00090 #define P_G (PredPel[7])
00091 #define P_H (PredPel[8])
00092 #define P_I (PredPel[9])
00093 #define P_J (PredPel[10])
00094 #define P_K (PredPel[11])
00095 #define P_L (PredPel[12])
00096 #define P_M (PredPel[13])
00097 #define P_N (PredPel[14])
00098 #define P_O (PredPel[15])
00099 #define P_P (PredPel[16])
00100 #define P_Q (PredPel[17])
00101 #define P_R (PredPel[18])
00102 #define P_S (PredPel[19])
00103 #define P_T (PredPel[20])
00104 #define P_U (PredPel[21])
00105 #define P_V (PredPel[22])
00106 #define P_W (PredPel[23])
00107 #define P_X (PredPel[24])
00108 
00109 
00110 
00111 /*!
00112 ************************************************************************
00113 * \brief
00114 *    Residual DPCM for Intra lossless coding
00115 *
00116 * \par Input:
00117 *    block_x,block_y: Block position inside a macro block (0,8).
00118 ************************************************************************
00119 */
00120 //For residual DPCM
00121 static int Residual_DPCM_8x8(int ipmode, int **ores, int **rres,int block_y, int block_x)
00122 {
00123   int i,j;
00124   int temp[8][8];
00125 
00126   if(ipmode==VERT_PRED)
00127   { 
00128 
00129     for (j=0; j<8; j++)
00130      temp[0][j] = ores[block_y][block_x+j];
00131 
00132     for (i=1; i<8; i++) 
00133       for (j=0; j<8; j++)
00134         temp[i][j] =  ores[block_y+i][block_x+j] - ores[block_y+i-1][block_x+j];
00135 
00136     for (i = 0; i < 8; i++)
00137       for (j = 0; j < 8; j++)
00138         rres[block_y+i][block_x+j] = temp[i][j];
00139   }
00140   else  //HOR_PRED
00141   {
00142     for (i=0; i<8; i++)
00143      temp[i][0] = ores[block_y + i][block_x];
00144 
00145     for (i=0; i<8; i++)
00146       for (j=1; j<8; j++)
00147         temp[i][j] = ores[block_y+i][block_x+j] - ores[block_y+i][block_x+j-1];
00148 
00149     for (i=0; i<8; i++)
00150       for (j=0; j<8; j++)
00151         rres[block_y+i][block_x+j] = temp[i][j];
00152   }
00153   return 0;
00154 }
00155 
00156 /*!
00157 ************************************************************************
00158 * \brief
00159 *    Inverse residual DPCM for Intra lossless coding
00160 *
00161 * \par Input:
00162 *    block_x,block_y: Block position inside a macro block (0,8).
00163 ************************************************************************
00164 */
00165 //For residual DPCM
00166 static int Inv_Residual_DPCM_8x8(Macroblock *currMB, int **m7, int block_y, int block_x)  
00167 {
00168   int i;
00169   int temp[8][8];
00170 
00171   if(currMB->ipmode_DPCM == VERT_PRED)
00172   {
00173     for(i=0; i<8; i++)
00174     {
00175       temp[0][i] = m7[block_y+0][block_x+i];
00176       temp[1][i] = temp[0][i] + m7[block_y+1][block_x+i];
00177       temp[2][i] = temp[1][i] + m7[block_y+2][block_x+i];
00178       temp[3][i] = temp[2][i] + m7[block_y+3][block_x+i];
00179       temp[4][i] = temp[3][i] + m7[block_y+4][block_x+i];
00180       temp[5][i] = temp[4][i] + m7[block_y+5][block_x+i];
00181       temp[6][i] = temp[5][i] + m7[block_y+6][block_x+i];
00182       temp[7][i] = temp[6][i] + m7[block_y+7][block_x+i];
00183     }
00184     for(i=0; i<8; i++)
00185     {
00186       m7[block_y+1][block_x+i] = temp[1][i];
00187       m7[block_y+2][block_x+i] = temp[2][i];
00188       m7[block_y+3][block_x+i] = temp[3][i];
00189       m7[block_y+4][block_x+i] = temp[4][i];
00190       m7[block_y+5][block_x+i] = temp[5][i];
00191       m7[block_y+6][block_x+i] = temp[6][i];
00192       m7[block_y+7][block_x+i] = temp[7][i];
00193     }
00194   }
00195   else //HOR_PRED
00196   {
00197     for(i=0; i<8; i++)
00198     {
00199       temp[i][0] = m7[block_y+i][block_x+0];
00200       temp[i][1] = temp[i][0] + m7[block_y+i][block_x+1];
00201       temp[i][2] = temp[i][1] + m7[block_y+i][block_x+2];
00202       temp[i][3] = temp[i][2] + m7[block_y+i][block_x+3];
00203       temp[i][4] = temp[i][3] + m7[block_y+i][block_x+4];
00204       temp[i][5] = temp[i][4] + m7[block_y+i][block_x+5];
00205       temp[i][6] = temp[i][5] + m7[block_y+i][block_x+6];
00206       temp[i][7] = temp[i][6] + m7[block_y+i][block_x+7];
00207     }
00208     for(i=0; i<8; i++)
00209     {
00210       m7[block_y+i][block_x+1] = temp[i][1];
00211       m7[block_y+i][block_x+2] = temp[i][2];
00212       m7[block_y+i][block_x+3] = temp[i][3];
00213       m7[block_y+i][block_x+4] = temp[i][4];
00214       m7[block_y+i][block_x+5] = temp[i][5];
00215       m7[block_y+i][block_x+6] = temp[i][6];
00216       m7[block_y+i][block_x+7] = temp[i][7];
00217     }
00218   }
00219   return 0;
00220 }
00221 
00222 /*!
00223  *************************************************************************************
00224  * \brief
00225  *    8x8 Intra mode decision for a macroblock
00226  *************************************************************************************
00227  */
00228 int Mode_Decision_for_Intra8x8Macroblock (Macroblock *currMB, double lambda, double *min_cost)
00229 {
00230   Slice *currSlice = currMB->p_slice;
00231   int cur_cbp = 0, b8;
00232   double cost8x8;
00233   int cr_cbp[3] = { 0, 0, 0}; 
00234 
00235   *min_cost = (int)floor(6.0 * lambda + 0.4999);
00236 
00237   if (currSlice->P444_joined == 0)
00238   {
00239     for (b8=0; b8<4; b8++)
00240     {
00241       if (currSlice->Mode_Decision_for_8x8IntraBlocks (currMB, b8, lambda, &cost8x8))
00242       {
00243         cur_cbp |= (1<<b8);
00244       }
00245       *min_cost += cost8x8;      
00246     }
00247   }
00248   else
00249   { 
00250     int k;
00251     currSlice->cmp_cbp[1] = currSlice->cmp_cbp[2] = 0;
00252     currMB->cr_cbp[0] = 0;
00253     currMB->cr_cbp[1] = 0;
00254     currMB->cr_cbp[2] = 0;
00255 
00256     for (b8 = 0; b8 < 4; b8++)
00257     {
00258       if (currSlice->Mode_Decision_for_8x8IntraBlocks (currMB, b8, lambda, &cost8x8))
00259       {
00260         cur_cbp |= (1<<b8);
00261       }
00262       *min_cost += cost8x8;
00263 
00264       for (k = 1; k < 3; k++)
00265       {
00266         if (cr_cbp[k])
00267         {
00268           currSlice->cmp_cbp[k] |= (1 << b8);
00269           cur_cbp |= currSlice->cmp_cbp[k];
00270           currSlice->cmp_cbp[k] = cur_cbp;
00271         }
00272       }      
00273     }
00274   }
00275 
00276   return cur_cbp;
00277 }
00278 
00279 
00280 /*!
00281  *************************************************************************************
00282  * \brief
00283  *    R-D Cost for an 8x8 Intra block
00284  *************************************************************************************
00285  */
00286 
00287 double rdcost_for_8x8_intra_blocks(Macroblock *currMB, int *nonzero, int b8, int ipmode, double lambda, double min_rdcost, int mostProbableMode)
00288 {
00289   ImageParameters *p_Img = currMB->p_Img;
00290   Slice *currSlice = currMB->p_slice;
00291 
00292   double  rdcost = 0.0;
00293   int     dummy = 0;
00294   int     rate;
00295   int64   distortion  = 0;
00296   int     block_x     = (b8 & 0x01) << 3;
00297   int     block_y     = (b8 >> 1) << 3;
00298   int     pic_pix_x   = currMB->pix_x + block_x;
00299   int     pic_pix_y   = currMB->pix_y + block_y;
00300   int     pic_opix_y  = currMB->opix_y + block_y;
00301 
00302   SyntaxElement  se;
00303   const int      *partMap   = assignSE2partition[currSlice->partition_mode];
00304   DataPartition  *dataPart;
00305 
00306   //===== perform DCT, Q, IQ, IDCT, Reconstruction =====
00307   *nonzero = currMB->trans_8x8 (currMB, PLANE_Y, b8, &dummy, 1);
00308 
00309   //===== get distortion (SSD) of 8x8 block =====
00310   distortion += compute_SSE8x8(&p_Img->pCurImg[pic_opix_y], &p_Img->enc_picture->imgY[pic_pix_y], pic_pix_x, pic_pix_x);
00311 
00312   if ((double) distortion > min_rdcost)
00313   {
00314     //currSlice->reset_coding_state (currMB, currSlice->p_RDO->cs_cm);
00315     return (double) distortion;
00316   }
00317 
00318   currMB->ipmode_DPCM = NO_INTRA_PMODE;  
00319 
00320   //===== RATE for INTRA PREDICTION MODE  (SYMBOL MODE MUST BE SET TO CAVLC) =====
00321   se.value1 = (mostProbableMode == ipmode) ? -1 : ipmode < mostProbableMode ? ipmode : ipmode-1;
00322 
00323   //--- set position and type ---
00324   se.context = b8;
00325   se.type    = SE_INTRAPREDMODE;
00326 
00327   //--- choose data partition ---
00328   if (currSlice->slice_type != B_SLICE)
00329     dataPart = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]);
00330   else
00331     dataPart = &(currSlice->partArr[partMap[SE_BFRAME]]);
00332 
00333   //--- encode and update rate ---
00334   currSlice->writeIntraPredMode (&se, dataPart);
00335 
00336   rate = se.len;
00337 
00338   //===== RATE for LUMINANCE COEFFICIENTS =====
00339 
00340   if (currSlice->symbol_mode == CAVLC)
00341   {      
00342     rate  += currSlice->writeCoeff4x4_CAVLC (currMB, LUMA, b8, 0, 0);
00343     rate  += currSlice->writeCoeff4x4_CAVLC (currMB, LUMA, b8, 1, 0);
00344     rate  += currSlice->writeCoeff4x4_CAVLC (currMB, LUMA, b8, 2, 0);
00345     rate  += currSlice->writeCoeff4x4_CAVLC (currMB, LUMA, b8, 3, 0);
00346   }
00347   else
00348   {
00349     rate  += writeCoeff8x8_CABAC (currMB, PLANE_Y, b8, 1);
00350   }
00351 
00352   rdcost = (double)distortion + lambda*(double)rate;
00353 
00354   currSlice->reset_coding_state (currMB, currSlice->p_RDO->cs_cm);
00355 
00356   return rdcost;
00357 }
00358 
00359 
00360 /*!
00361  *************************************************************************************
00362  * \brief
00363  *    R-D Cost for an 8x8 Intra block
00364  *************************************************************************************
00365  */
00366 
00367 double rdcost_for_8x8_intra_blocks_444(Macroblock *currMB, int *nonzero, int b8, int ipmode, double lambda, double min_rdcost, int mostProbableMode)
00368 {
00369   ImageParameters *p_Img = currMB->p_Img;
00370   Slice *currSlice = currMB->p_slice;
00371 
00372   double  rdcost = 0.0;
00373   int     dummy = 0;
00374   int     rate;
00375   int64   distortion  = 0;
00376   int     block_x     = (b8 & 0x01) << 3;
00377   int     block_y     = (b8 >> 1) << 3;
00378   int     pic_pix_x   = currMB->pix_x + block_x;
00379   int     pic_pix_y   = currMB->pix_y + block_y;
00380   int     pic_opix_y  = currMB->opix_y + block_y;
00381 
00382   SyntaxElement  se;
00383   const int      *partMap   = assignSE2partition[currSlice->partition_mode];
00384   DataPartition  *dataPart;
00385 
00386   if(currSlice->P444_joined == 0) 
00387   {
00388     //===== perform DCT, Q, IQ, IDCT, Reconstruction =====
00389     *nonzero = currMB->trans_8x8 (currMB, PLANE_Y, b8, &dummy, 1);
00390 
00391     //===== get distortion (SSD) of 8x8 block =====
00392     distortion += compute_SSE8x8(&p_Img->pCurImg[pic_opix_y], &p_Img->enc_picture->imgY[pic_pix_y], pic_pix_x, pic_pix_x);
00393 
00394     currMB->ipmode_DPCM = NO_INTRA_PMODE;  
00395 
00396     //===== RATE for INTRA PREDICTION MODE  (SYMBOL MODE MUST BE SET TO CAVLC) =====
00397     se.value1 = (mostProbableMode == ipmode) ? -1 : ipmode < mostProbableMode ? ipmode : ipmode-1;
00398 
00399     //--- set position and type ---
00400     se.context = b8;
00401     se.type    = SE_INTRAPREDMODE;
00402 
00403     //--- choose data partition ---
00404     if (currSlice->slice_type != B_SLICE)
00405       dataPart = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]);
00406     else
00407       dataPart = &(currSlice->partArr[partMap[SE_BFRAME]]);
00408 
00409     //--- encode and update rate ---
00410     currSlice->writeIntraPredMode (&se, dataPart);
00411 
00412     rate = se.len;
00413 
00414     //===== RATE for LUMINANCE COEFFICIENTS =====
00415 
00416     if (currSlice->symbol_mode == CAVLC)
00417     {      
00418       rate  += currSlice->writeCoeff4x4_CAVLC (currMB, LUMA, b8, 0, 0);
00419       rate  += currSlice->writeCoeff4x4_CAVLC (currMB, LUMA, b8, 1, 0);
00420       rate  += currSlice->writeCoeff4x4_CAVLC (currMB, LUMA, b8, 2, 0);
00421       rate  += currSlice->writeCoeff4x4_CAVLC (currMB, LUMA, b8, 3, 0);
00422     }
00423     else
00424     {
00425       rate  += writeCoeff8x8_CABAC (currMB, PLANE_Y, b8, 1);
00426     }
00427   }
00428   else
00429   {
00430     ColorPlane k;
00431     //===== perform DCT, Q, IQ, IDCT, Reconstruction =====
00432     *nonzero = currMB->trans_8x8 (currMB, PLANE_Y, b8, &dummy, 1);
00433 
00434     //===== get distortion (SSD) of 8x8 block =====
00435     distortion += compute_SSE8x8(&p_Img->pCurImg[pic_opix_y], &p_Img->enc_picture->imgY[pic_pix_y], pic_pix_x, pic_pix_x);
00436 
00437     for (k = PLANE_U; k <= PLANE_V; k++)
00438     {
00439       select_plane(p_Img, k);
00440       currMB->c_nzCbCr[k ]= currMB->trans_8x8(currMB, k, b8, &dummy,1);
00441       distortion += compute_SSE8x8(&p_Img->pImgOrg[k][pic_opix_y], &p_Img->enc_picture->p_curr_img[pic_pix_y], pic_pix_x, pic_pix_x);
00442     }
00443     currMB->ipmode_DPCM = NO_INTRA_PMODE;
00444     select_plane(p_Img, PLANE_Y);
00445 
00446     //===== RATE for INTRA PREDICTION MODE  (SYMBOL MODE MUST BE SET TO CAVLC) =====
00447     se.value1 = (mostProbableMode == ipmode) ? -1 : ipmode < mostProbableMode ? ipmode : ipmode-1;
00448 
00449     //--- set position and type ---
00450     se.context = b8;
00451     se.type    = SE_INTRAPREDMODE;
00452 
00453     //--- choose data partition ---
00454     if (currSlice->slice_type != B_SLICE)
00455       dataPart = &(currSlice->partArr[partMap[SE_INTRAPREDMODE]]);
00456     else
00457       dataPart = &(currSlice->partArr[partMap[SE_BFRAME]]);
00458 
00459     //--- encode and update rate ---
00460     currSlice->writeIntraPredMode (&se, dataPart);
00461     rate = se.len;
00462 
00463     //===== RATE for LUMINANCE COEFFICIENTS =====
00464 
00465     if (currSlice->symbol_mode == CAVLC)
00466     {      
00467       int b4;
00468       for(b4=0; b4<4; b4++)
00469       {
00470         rate  += currSlice->writeCoeff4x4_CAVLC (currMB, LUMA, b8, b4, 0);
00471         rate  += currSlice->writeCoeff4x4_CAVLC (currMB, CB, b8, b4, 0);
00472         rate  += currSlice->writeCoeff4x4_CAVLC (currMB, CR, b8, b4, 0);
00473       }
00474     }
00475     else
00476     {
00477       rate  += writeCoeff8x8_CABAC (currMB, PLANE_Y, b8, 1);
00478       rate  += writeCoeff8x8_CABAC (currMB, PLANE_U, b8, 1);
00479       rate  += writeCoeff8x8_CABAC (currMB, PLANE_V, b8, 1);
00480     }
00481   }
00482   rdcost = (double)distortion + lambda*(double)rate;
00483 
00484   currSlice->reset_coding_state (currMB, currSlice->p_RDO->cs_cm);
00485 
00486   return rdcost;
00487 }
00488 
00489 static inline int check_zero(int **mb_ores, int block_x)
00490 {
00491   int i, j, k = 0;
00492 
00493   for (j = 0; (j < BLOCK_SIZE_8x8) && (k == 0); j++)
00494   {
00495     for (i = block_x; (i< block_x + BLOCK_SIZE_8x8) && (k == 0); i++)
00496     {
00497       //k |= (mb_ores[j][i] != 0);
00498       k |= mb_ores[j][i];
00499     }
00500   }
00501   return k;
00502 }
00503 
00504 /*!
00505  ************************************************************************
00506  * \brief
00507  *    The routine performs transform,quantization,inverse transform, adds the diff.
00508  *    to the prediction and writes the result to the decoded luma frame. Includes the
00509  *    RD constrained quantization also.
00510  *
00511  * \par Input:
00512  *    b8: Block position inside a macro block (0,1,2,3).
00513  *
00514  * \par Output:
00515  *    nonzero: 0 if no levels are nonzero.  1 if there are nonzero levels.
00516  *    coeff_cost: Counter for nonzero coefficients, used to discard expensive levels.
00517  ************************************************************************
00518  */
00519 int dct_8x8(Macroblock *currMB, ColorPlane pl, int b8, int *coeff_cost, int intra)
00520 {
00521   ImageParameters *p_Img = currMB->p_Img;
00522   int nonzero = FALSE; 
00523 
00524   int block_x = 8*(b8 & 0x01);
00525   int block_y = 8*(b8 >> 1);
00526   int pl_off = b8+ (pl<<2);
00527   Slice *currSlice = currMB->p_slice;
00528   imgpel **img_enc = p_Img->enc_picture->p_curr_img;
00529 
00530   imgpel **mb_pred = currSlice->mb_pred[pl];
00531   int    **mb_ores = currSlice->mb_ores[pl];
00532   int    **mb_rres = currSlice->mb_rres[pl];
00533 
00534   int max_imgpel_value   = p_Img->max_imgpel_value;
00535 
00536   if (check_zero(&mb_ores[block_y], block_x) != 0)
00537   {
00538     int qp = currMB->qp_scaled[pl];
00539 
00540     // Variable p_Quant and some of its parameters could be all set outside 
00541     // to speed up the code (e.g. field mode, coeff_cost, etc). 
00542     QuantParameters *p_Quant = p_Img->p_Quant;
00543 
00544     QuantMethods quant_methods;
00545     quant_methods.block_x = block_x;
00546     quant_methods.block_y = block_y;
00547 
00548     quant_methods.ACLevel = currSlice->cofAC[pl_off][0][0];
00549     quant_methods.ACRun   = currSlice->cofAC[pl_off][0][1];
00550 
00551     quant_methods.qp         = qp;
00552     quant_methods.q_params   = p_Quant->q_params_8x8[pl][intra][qp]; 
00553     quant_methods.fadjust    = p_Img->AdaptiveRounding ? (&p_Img->ARCofAdj8x8[pl][currMB->ar_mode][block_y]) : NULL;
00554     quant_methods.coeff_cost = coeff_cost;
00555     quant_methods.pos_scan   = currMB->is_field_mode ? FIELD_SCAN8x8 : SNGL_SCAN8x8;    
00556     quant_methods.c_cost     = COEFF_COST8x8[currSlice->disthres];
00557 
00558     // Forward 8x8 transform
00559     forward8x8(mb_ores, mb_rres, block_y, block_x);
00560 
00561     // Quantization process
00562     nonzero = currSlice->quant_8x8(currMB, &mb_rres[block_y], &quant_methods);
00563   }
00564   else
00565   {
00566     currSlice->cofAC[pl_off][0][0][0] = 0;
00567   }
00568 
00569   if (nonzero)
00570   {
00571     // Inverse 8x8 transform
00572     inverse8x8(mb_rres, mb_rres, block_y, block_x);
00573 
00574     // generate final block
00575     sample_reconstruct (&img_enc[currMB->pix_y + block_y], &mb_pred[block_y], &mb_rres[block_y], block_x, currMB->pix_x + block_x, BLOCK_SIZE_8x8, BLOCK_SIZE_8x8, max_imgpel_value, DQ_BITS_8);
00576   }
00577   else // if (nonzero) => No transformed residual. Just use prediction.
00578   {
00579     copy_image_data_8x8 (&img_enc[currMB->pix_y + block_y], &mb_pred[block_y], currMB->pix_x + block_x, block_x);
00580   }  
00581 
00582   //  Decoded block moved to frame memory
00583   return nonzero;
00584 }
00585 
00586 /*!
00587  ************************************************************************
00588  * \brief
00589  *    The routine performs transform,quantization,inverse transform, adds the diff.
00590  *    to the prediction and writes the result to the decoded luma frame. Includes the
00591  *    RD constrained quantization also. Used for CAVLC.
00592  *
00593  * \par Input:
00594  *    b8: Block position inside a macro block (0,1,2,3).
00595  *
00596  * \par Output:
00597  *    nonzero: 0 if no levels are nonzero.  1 if there are nonzero levels.
00598  *    coeff_cost: Counter for nonzero coefficients, used to discard expensive levels.
00599  ************************************************************************
00600  */
00601 int dct_8x8_cavlc(Macroblock *currMB, ColorPlane pl, int b8, int *coeff_cost, int intra)
00602 {
00603   ImageParameters *p_Img = currMB->p_Img;
00604   int nonzero = FALSE; 
00605 
00606   int block_x = 8*(b8 & 0x01);
00607   int block_y = 8*(b8 >> 1);
00608   int pl_off = b8+ (pl<<2);
00609   imgpel **img_enc = p_Img->enc_picture->p_curr_img;
00610   Slice *currSlice = currMB->p_slice;
00611   imgpel **mb_pred = currSlice->mb_pred[pl];  
00612   int    **mb_ores = currSlice->mb_ores[pl];   
00613   int    **mb_rres = currSlice->mb_rres[pl];   
00614 
00615   int max_imgpel_value   = p_Img->max_imgpel_value;
00616   
00617   int qp = currMB->qp_scaled[pl];
00618 
00619   //if (check_zero(&mb_ores[block_y], block_x) != 0)
00620   {
00621     // Variable p_Quant and some of its parameters could be all set outside 
00622     // to speed up the code (e.g. field mode, coeff_cost, etc). 
00623     QuantParameters *p_Quant = p_Img->p_Quant;
00624 
00625     QuantMethods quant_methods;
00626     quant_methods.block_x    = block_x;
00627     quant_methods.block_y    = block_y;
00628     quant_methods.qp         = qp;
00629     quant_methods.q_params   = p_Quant->q_params_8x8[pl][intra][qp]; 
00630     quant_methods.fadjust    = p_Img->AdaptiveRounding ? (&p_Img->ARCofAdj8x8[pl][currMB->ar_mode][block_y]) : NULL;
00631     quant_methods.coeff_cost = coeff_cost;
00632     quant_methods.pos_scan   = currMB->is_field_mode ? FIELD_SCAN8x8 : SNGL_SCAN8x8;    
00633     quant_methods.c_cost     = COEFF_COST8x8[currSlice->disthres];
00634 
00635     // Forward 8x8 transform
00636     forward8x8(mb_ores, mb_rres, block_y, block_x);
00637 
00638     // Quantization process
00639     nonzero = currSlice->quant_8x8cavlc(currMB, &mb_rres[block_y], &quant_methods, currSlice->cofAC[pl_off]);
00640   }
00641 
00642   if (nonzero)
00643   {
00644     // Inverse 8x8 transform
00645     inverse8x8(mb_rres, mb_rres, block_y, block_x);
00646 
00647     // generate final block
00648     sample_reconstruct (&img_enc[currMB->pix_y + block_y], &mb_pred[block_y], &mb_rres[block_y], block_x, currMB->pix_x + block_x, BLOCK_SIZE_8x8, BLOCK_SIZE_8x8, max_imgpel_value, DQ_BITS_8);
00649   }
00650   else // if (nonzero) => No transformed residual. Just use prediction.
00651   {
00652     copy_image_data_8x8(&img_enc[currMB->pix_y + block_y], &mb_pred[block_y], currMB->pix_x + block_x, block_x);
00653   }  
00654 
00655   //  Decoded block moved to frame memory
00656   return nonzero;
00657 }
00658 
00659 int dct_8x8_ls(Macroblock *currMB, ColorPlane pl, int b8, int *coeff_cost, int intra)
00660 {
00661   ImageParameters *p_Img = currMB->p_Img;
00662   int i,j,coeff_ctr;
00663   int scan_pos = 0,run = -1;
00664   int nonzero = FALSE;  
00665 
00666   int block_x = 8*(b8 & 0x01);
00667   int block_y = 8*(b8 >> 1);
00668   int pl_off = b8 + (pl<<2);
00669   Slice *currSlice = currMB->p_slice;
00670   int*  ACLevel = currSlice->cofAC[pl_off][0][0];
00671   int*  ACRun   = currSlice->cofAC[pl_off][0][1];  
00672   imgpel **img_enc = p_Img->enc_picture->p_curr_img;
00673   imgpel **mb_pred = currSlice->mb_pred[pl];
00674   int    **mb_ores = currSlice->mb_ores[pl];
00675   int    **mb_rres = currSlice->mb_rres[pl];
00676 
00677   int scan_poss[4] = { 0 }, runs[4] = { -1, -1, -1, -1 };
00678   int MCcoeff = 0;
00679   int *m7;
00680   int is_cavlc = (currSlice->symbol_mode == CAVLC);
00681 
00682   const byte (*pos_scan)[2] = currMB->is_field_mode ? FIELD_SCAN8x8 : SNGL_SCAN8x8;
00683 
00684   int **fadjust8x8 = p_Img->AdaptiveRounding ? (&p_Img->ARCofAdj8x8[pl][currMB->ar_mode][block_y]) :NULL;
00685   
00686   runs[0]=runs[1]=runs[2]=runs[3]=-1;
00687   scan_poss[0] = scan_poss[1] = scan_poss[2] = scan_poss[3] = 0;
00688 
00689   if( (currMB->ipmode_DPCM < 2)&&(intra))
00690   {
00691     Residual_DPCM_8x8(currMB->ipmode_DPCM, mb_ores, mb_rres, block_y, block_x);
00692   }
00693 
00694   for (coeff_ctr=0; coeff_ctr < 64; coeff_ctr++)
00695   {
00696     i=pos_scan[coeff_ctr][0];
00697     j=pos_scan[coeff_ctr][1];
00698 
00699     run++;
00700 
00701     if (currMB->luma_transform_size_8x8_flag && is_cavlc)
00702     {
00703       MCcoeff = (coeff_ctr & 3);
00704       runs[MCcoeff]++;
00705     }
00706 
00707     m7 = &mb_rres[block_y + j][block_x + i];
00708 
00709     if (p_Img->AdaptiveRounding)
00710     {
00711       fadjust8x8[j][block_x+i] = 0;
00712     }
00713 
00714     if (*m7 != 0)
00715     {
00716       nonzero = TRUE;
00717 
00718       if (currMB->luma_transform_size_8x8_flag && is_cavlc)
00719       {
00720         *m7 = iClip3(-CAVLC_LEVEL_LIMIT, CAVLC_LEVEL_LIMIT, *m7);
00721         *coeff_cost += MAX_VALUE;
00722 
00723         currSlice->cofAC[pl_off][MCcoeff][0][scan_poss[MCcoeff]  ] = *m7;
00724         currSlice->cofAC[pl_off][MCcoeff][1][scan_poss[MCcoeff]++] = runs[MCcoeff];
00725         ++scan_pos;
00726         runs[MCcoeff]=-1;
00727       }
00728       else
00729       {
00730         *coeff_cost += MAX_VALUE;
00731         ACLevel[scan_pos  ] = *m7;
00732         ACRun  [scan_pos++] = run;
00733         run=-1;                     // reset zero level counter
00734       }
00735     }
00736   }
00737 
00738   if (!currMB->luma_transform_size_8x8_flag || !is_cavlc)
00739     ACLevel[scan_pos] = 0;
00740   else
00741   {
00742     for(i=0; i<4; i++)
00743       currSlice->cofAC[pl_off][i][0][scan_poss[i]] = 0;
00744   }
00745 
00746   if( (currMB->ipmode_DPCM < 2) && (intra))
00747   {
00748     Inv_Residual_DPCM_8x8(currMB, mb_rres, block_y, block_x);
00749   }
00750 
00751   for( j=block_y; j<block_y + BLOCK_SIZE_8x8; j++)
00752   {            
00753     for( i=block_x; i< block_x + BLOCK_SIZE_8x8; i++)
00754     {
00755       mb_rres[j][i] += (int) mb_pred[j][i];
00756       img_enc[currMB->pix_y + j][currMB->pix_x + i]= (imgpel) mb_rres[j][i];
00757     }
00758   }  
00759 
00760   //  Decoded block moved to frame memory
00761   return nonzero;
00762 }
00763 
00764 
00765 /*static inline void compute_diff(int *diff, imgpel *cimg, imgpel *cmpr, int width)
00766 {
00767   int i;
00768   for (i = 0; i < width; i++)
00769   {
00770     *(diff++) = *(cimg++) - *(cmpr++);
00771   }
00772 }*/
00773 
00774 /*!
00775 *************************************************************************************
00776 * \brief
00777 *     distortion for an 8x8 Intra block 
00778 *************************************************************************************
00779 */
00780 void compute_comp8x8_cost(ImageParameters *p_Img, imgpel **cur_img, imgpel **mpr8x8, int pic_opix_x, int *cost, int min_cost)
00781 {
00782   int diff64[64];
00783 
00784   int i, j;
00785   int *diff = &diff64[0];
00786   imgpel *cimg, *cmpr;
00787 
00788   for (j=0; j<8; j++)
00789   {
00790   //  compute_diff(diff, &cur_img[j][pic_opix_x], &mpr8x8[j][0], BLOCK_SIZE_8x8);
00791 
00792     cimg = &cur_img[j][pic_opix_x];
00793     cmpr = &mpr8x8[j][0];
00794     for (i=0; i<8; i++)
00795     {
00796       *diff++ = *cimg++ - *cmpr++;
00797     }
00798 
00799   }
00800   *cost += p_Img->distortion8x8 (diff64, min_cost);
00801 }
00802 
00803 
00804 /*!
00805 *************************************************************************************
00806 * \brief
00807 *     SAD distortion for an 8x8 Intra block 
00808 *************************************************************************************
00809 */
00810 void compute_sad8x8_cost(ImageParameters *p_Img, imgpel **cur_img, imgpel **mpr8x8, int pic_opix_x, int *cost, int min_cost)
00811 {
00812   int i, j;
00813   imgpel *cimg, *cmpr;
00814 
00815   for (j=0; j<8; j++)
00816   {
00817     cimg = &cur_img[j][pic_opix_x];
00818     cmpr = &mpr8x8[j][0];
00819     for (i=0; i<8; i++)
00820     {
00821       *cost += iabs(*cimg++ - *cmpr++);
00822     }
00823     if (*cost > min_cost)
00824     {
00825       break;
00826     }
00827   }
00828 }
00829 
00830 /*!
00831 *************************************************************************************
00832 * \brief
00833 *     SSE distortion for an 8x8 Intra block 
00834 *************************************************************************************
00835 */
00836 void compute_sse8x8_cost(ImageParameters *p_Img, imgpel **cur_img, imgpel **mpr8x8, int pic_opix_x, int *cost, int min_cost)
00837 {
00838   int i, j;
00839   imgpel *cimg, *cmpr;
00840 
00841   for (j=0; j<8; j++)
00842   {
00843     cimg = &cur_img[j][pic_opix_x];
00844     cmpr = &mpr8x8[j][0];
00845     for (i=0; i<8; i++)
00846     {
00847       *cost += iabs2(*cimg++ - *cmpr++);
00848     }
00849     if (*cost > min_cost)
00850     {
00851       break;
00852     }
00853   }
00854 }
00855 
00856 /*!
00857 *************************************************************************************
00858 * \brief
00859 *     SATD distortion for an 8x8 Intra block 
00860 *************************************************************************************
00861 */
00862 void compute_satd8x8_cost(ImageParameters *p_Img, imgpel **cur_img, imgpel **mpr8x8, int pic_opix_x, int *cost, int min_cost)
00863 {
00864   int i, j;
00865 #if defined(USEMMX)
00866 #if defined(_MSC_VER) || defined(__INTEL_COMPILER) // ICC
00867   __declspec(align(16)) int diff64[64];
00868 #else // GCC
00869   int diff64[64] __attribute__ ((aligned (16)));
00870 #endif
00871 #else
00872   int diff64[64];
00873 #endif
00874 
00875   int *diff = &diff64[0];
00876   imgpel *cimg, *cmpr;
00877 
00878   for (j=0; j<8; j++)
00879   {
00880     cimg = &cur_img[j][pic_opix_x];
00881     cmpr = &mpr8x8[j][0];
00882     for (i=0; i<8; i++)
00883     {
00884       *diff++ = *cimg++ - *cmpr++;
00885     }
00886   }
00887   *cost += HadamardSAD8x8 (diff64);
00888 }
00889