Presentation is loading. Please wait.

Presentation is loading. Please wait.

段志學 x264 code trace. Group4 block2. function Intra frame (16x16) x264_mb_encode_i16x16 predict x264_predict_lossless_16x16 DCT sub16x16_dct sub8x8_dct.

Similar presentations


Presentation on theme: "段志學 x264 code trace. Group4 block2. function Intra frame (16x16) x264_mb_encode_i16x16 predict x264_predict_lossless_16x16 DCT sub16x16_dct sub8x8_dct."— Presentation transcript:

1 9862568 段志學 x264 code trace. Group4 block2

2 function Intra frame (16x16) x264_mb_encode_i16x16 predict x264_predict_lossless_16x16 DCT sub16x16_dct sub8x8_dct sub4x4_dct x264_mb_encode_i16x16 x264_predict_lossless_16x16 x264_macroblock_encode sub16x16_dct sub8x8_dct sub4x4_dct

3 Macroblock encode x264_predict_lossless_16x16 x264_mb_encode_i16x16 Type = Intra 16x16 Lossless YN predict_16x16 sub16x16_dct sub8x8_dct sub4x4_dct

4 Code if( h->mb.i_type == I_16x16 ) { const int i_mode = h->mb.i_intra16x16_pred_mode; h->mb.b_transform_8x8 = 0; if( h->mb.b_lossless ) x264_predict_lossless_16x16( h, i_mode ); else h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] ); /* encode the 16x16 macroblock */ x264_mb_encode_i16x16( h, i_qp ); } In x264_macroblock_encode

5 x264_predict_lossless_16x16 Function x264_predict_lossless_16x16 argument x264_t *h, int i_mode return void description Copy pixel value into decode memory directly if the neighboring blocks are losslessly coded

6 x264_predict_lossless_16x16 Intra_16x16 4 modes: Vertical prediction Horizontal prediction DC-prediction Plane-prediction enum intra16x16_pred_e{ I_PRED_16x16_V = 0, I_PRED_16x16_H = 1, I_PRED_16x16_DC = 2, I_PRED_16x16_P = 3, I_PRED_16x16_DC_LEFT = 4, I_PRED_16x16_DC_TOP = 5, I_PRED_16x16_DC_128 = 6, };

7 example x264_predict_lossless_16x16

8 Intra_16x16 4 modes: Vertical prediction Horizontal prediction DC-prediction Plane-prediction void x264_predict_lossless_16x16( x264_t *h, int i_mode ) { int stride = h->fenc->i_stride[0] mb.b_interlaced; if( i_mode == I_PRED_16x16_V ) h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-stride, stride, 16 ); else if( i_mode == I_PRED_16x16_H ) h->mc.copy_16x16_unaligned( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-1, stride, 16 ); else h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] ); }

9 x264_mb_encode_i16x16 Function x264_mb_encode_i16x16 argument x264_t *h, int i_qp return void description Encode intra 16x16 macroblock

10 x264_mb_encode_i16x16 Encode intra 16x16 macroblock DCT and quantization Decimate (after sub16x16_dct and quantization ) Trellis quantization (after dct4x4dc)

11 x264_mb_encode_i16x16 Copy Residual into luma4x4 block in ZigZag order sub16x16_dct (DCT for residual) dct4x4dc (DCT for DC) quantization dequantization ZigZag Scan quantization dequantization ZigZag Scan iDCT for DC add iDCT (write to decode) lossless N Y

12 x264_mb_encode_i16x16 If lossless mode is used if( h->mb.b_lossless ) { for( i = 0; i < 16; i++ ) { int oe = block_idx_xy_fenc[i]; int od = block_idx_xy_fdec[i]; nz = h->zigzagf.sub_4x4ac( h->dct.luma4x4[i], p_src+oe, p_dst+od, &dct_dc4x4[block_idx_yx_1d[i]] ); h->mb.cache.non_zero_count[x264_scan8[i]] = nz; h->mb.i_cbp_luma |= nz; } h->mb.i_cbp_luma *= 0xf; h->mb.cache.non_zero_count[x264_scan8[24]] = array_non_zero( dct_dc4x4 ); h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 ); return; } x264_mb_encode_i16x16 Copy Residual into luma4x4 block in ZigZag order lossless Y

13 x264_mb_encode_i16x16 16 x16 macroblock DCT DC sub16x16_dct (DCT for residual) dct4x4dc (DCT for DC) quantization dequantization ZigZag Scan quantization dequantization ZigZag Scan iDCT for DC add iDCT (write to decode) DC

14 x264_mb_encode_i16x16 16 x 16 macroblock DCT h->dctf.sub16x16_dct( dct4x4, p_src, p_dst ); for( i = 0; i < 16; i++ ) { /* copy dc coeff */ dct_dc4x4[block_idx_xy_1d[i]] = dct4x4[i][0]; dct4x4[i][0] = 0; /* quant/scan/dequant */ nz = x264_quant_4x4( h, dct4x4[i], i_qp, DCT_LUMA_AC, 1, i ); h->mb.cache.non_zero_count[x264_scan8[i]] = nz; if( nz ) { h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[i] ); h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qp ); if( decimate_score quantf.decimate_score15( h->dct.luma4x4[i] ); h->mb.i_cbp_luma = 0xf; } if( decimate_score < 6 ) { h->mb.i_cbp_luma = 0; CLEAR_16x16_NNZ } sub16x16_dct (DCT for residual) quantization dequantization ZigZag Scan

15 x264_mb_encode_i16x16 4 x 4 DC DCT h->dctf.dct4x4dc( dct_dc4x4 ); if( h->mb.b_trellis ) nz = x264_quant_dc_trellis( h, dct_dc4x4, CQM_4IY, i_qp, DCT_LUMA_DC, 1, 0 ); else nz = h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 ); h->mb.cache.non_zero_count[x264_scan8[24]] = nz; if( nz ) { h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 ); /* output samples to fdec */ h->dctf.idct4x4dc( dct_dc4x4 ); h->quantf.dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qp ); /* XXX not inversed */ if( h->mb.i_cbp_luma ) for( i = 0; i < 16; i++ ) dct4x4[i][0] = dct_dc4x4[block_idx_xy_1d[i]]; } /* put pixels to fdec */ if( h->mb.i_cbp_luma ) h->dctf.add16x16_idct( p_dst, dct4x4 ); else if( nz ) h->dctf.add16x16_idct_dc( p_dst, dct_dc4x4 ); dct4x4dc (DCT for DC) quantization dequantization ZigZag Scan iDCT for DC add iDCT (write to decode)

16 sub16x16_dct Function sub16x16_dct argument int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 return static void output dct[16][16] description Divide block into 4 8x8 sub-block and call sub8x8_dct

17 sub8x8_dct Function sub8x8_dct argument int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 return static void output dct[4][16] description Divide block into 4 4x4 sub-block and call sub4x4_dct

18 sub4x4_dct Function sub4x4_dct argument int16_t dct[16], uint8_t *pix1, uint8_t *pix2 return static void output dct[16] description Compute DCT of block(pix1) –block( pix2)

19 Sub_dct static void sub8x8_dct( int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 ) { sub4x4_dct( dct[0], &pix1[0], &pix2[0] ); sub4x4_dct( dct[1], &pix1[4], &pix2[4] ); sub4x4_dct( dct[2], &pix1[4*FENC_STRIDE+0], &pix2[4*FDEC_STRIDE+0] ); sub4x4_dct( dct[3], &pix1[4*FENC_STRIDE+4], &pix2[4*FDEC_STRIDE+4] ); } static void sub16x16_dct( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 ) { sub8x8_dct( &dct[ 0], &pix1[0], &pix2[0] ); sub8x8_dct( &dct[ 4], &pix1[8], &pix2[8] ); sub8x8_dct( &dct[ 8], &pix1[8*FENC_STRIDE+0], &pix2[8*FDEC_STRIDE+0] ); sub8x8_dct( &dct[12], &pix1[8*FENC_STRIDE+8], &pix2[8*FDEC_STRIDE+8] ); }

20 sub4x4_dct static void sub4x4_dct( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 ) { int16_t d[16]; int16_t tmp[16]; int i; pixel_sub_wxh( d, 4, pix1, FENC_STRIDE, pix2, FDEC_STRIDE ); for( i = 0; i < 4; i++ ) { const int s03 = d[i*4+0] + d[i*4+3]; const int s12 = d[i*4+1] + d[i*4+2]; const int d03 = d[i*4+0] - d[i*4+3]; const int d12 = d[i*4+1] - d[i*4+2]; tmp[0*4+i] = s03 + s12; tmp[1*4+i] = 2*d03 + d12; tmp[2*4+i] = s03 - s12; tmp[3*4+i] = d03 - 2*d12; } for( i = 0; i < 4; i++ ) { const int s03 = tmp[i*4+0] + tmp[i*4+3]; const int s12 = tmp[i*4+1] + tmp[i*4+2]; const int d03 = tmp[i*4+0] - tmp[i*4+3]; const int d12 = tmp[i*4+1] - tmp[i*4+2]; dct[i*4+0] = s03 + s12; dct[i*4+1] = 2*d03 + d12; dct[i*4+2] = s03 - s12; dct[i*4+3] = d03 - 2*d12; }

21 sub4x4_dct DCT Y = A X A T X = A T Y A core scaling factor d = c/b  0.414 To simplify the implementation: d  0.5 To ensure orthogonal, b is modified a = ½, b =, d = ½ (2nd, 4th rows of C, 2nd, 4th columns of C T )  2, …

22 sub4x4_dct for( i = 0; i < 4; i++ ) { const int s03 = d[i*4+0] + d[i*4+3]; const int s12 = d[i*4+1] + d[i*4+2]; const int d03 = d[i*4+0] - d[i*4+3]; const int d12 = d[i*4+1] - d[i*4+2]; tmp[0*4+i] = s03 + s12; tmp[1*4+i] = 2*d03 + d12; tmp[2*4+i] = s03 - s12; tmp[3*4+i] = d03 - 2*d12; } for( i = 0; i < 4; i++ ) { const int s03 = tmp[i*4+0] + tmp[i*4+3]; const int s12 = tmp[i*4+1] + tmp[i*4+2]; const int d03 = tmp[i*4+0] - tmp[i*4+3]; const int d12 = tmp[i*4+1] - tmp[i*4+2]; dct[i*4+0] = s03 + s12; dct[i*4+1] = 2*d03 + d12; dct[i*4+2] = s03 - s12; dct[i*4+3] = d03 - 2*d12; } Table lookup round( / Qstep ) MF / 2 qbits Pre-scaling matrix Input matrix qbits=15+floor(QP/6) Transform Quantization

23 end


Download ppt "段志學 x264 code trace. Group4 block2. function Intra frame (16x16) x264_mb_encode_i16x16 predict x264_predict_lossless_16x16 DCT sub16x16_dct sub8x8_dct."

Similar presentations


Ads by Google