diff --git a/common/common.c b/common/common.c index 8a8f660..7e091bc 100644 --- common/common.c +++ common/common.c @@ -117,6 +117,7 @@ void x264_param_default( x264_param_t *param ) | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16; param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL; param->analyse.i_me_method = X264_ME_HEX; + param->analyse.i_rdcmp = X264_CMP_PSY; param->analyse.i_me_range = 16; param->analyse.i_subpel_refine = 5; param->analyse.b_chroma_me = 1; @@ -464,6 +465,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value ) p->analyse.i_mv_range_thread = atoi(value); OPT2("subme", "subq") p->analyse.i_subpel_refine = atoi(value); + OPT("rdcmp") + b_error |= parse_enum( value, x264_rdcmp_names, &p->analyse.i_rdcmp ); OPT("bime") p->analyse.b_bidir_me = atobool(value); OPT("chroma-me") @@ -856,6 +859,7 @@ char *x264_param2string( x264_param_t *p, int b_res ) s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter ); s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] ); s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine ); + s += sprintf( s, " rdcmp=%s", x264_rdcmp_names[ p->analyse.i_rdcmp ] ); s += sprintf( s, " brdo=%d", p->analyse.b_bframe_rdo ); s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references ); s += sprintf( s, " me_range=%d", p->analyse.i_me_range ); diff --git a/encoder/analyse.c b/encoder/analyse.c index f1aa034..f81c7fb 100644 --- encoder/analyse.c +++ encoder/analyse.c @@ -1901,7 +1901,7 @@ static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd ) static void x264_mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter ) { - int thresh = i_satd_inter * 17/16; + int thresh = i_satd_inter * (17 + (h->param.analyse.i_rdcmp == X264_CMP_PSY))/16; if( a->b_direct_available && a->i_rd16x16direct == COST_MAX ) { diff --git a/encoder/rdo.c b/encoder/rdo.c index 8607e07..a6eb3e3 100644 --- encoder/rdo.c +++ encoder/rdo.c @@ -52,21 +52,52 @@ static uint16_t cabac_prefix_size[15][128]; #define COPY_CABAC h->mc.memcpy_aligned( &cabac_tmp.f8_bits_encoded, &h->cabac.f8_bits_encoded, \ sizeof(x264_cabac_t) - offsetof(x264_cabac_t,f8_bits_encoded) ) - -static int ssd_mb( x264_t *h ) + +#define ADD_ABS_SATD(satdtype, pixel)\ + satd += (h->pixf.satdtype[pixel]( zero, 0, fdec, FDEC_STRIDE ) - dc_coefs[0]) - \ + (h->pixf.satdtype[pixel]( zero, 0, fenc, FENC_STRIDE ) - dc_coefs[1]); + +/* Psy RD distortion metric: SSD plus "Absolute Difference of Complexities" */ +/* SATD and SA8D are used to measure block complexity. */ +/* Blocks with a complexity most similar to that of the source are scored best. */ +/* The difference between SATD and SA8D scores are both used to avoid bias from the DCT size. Using SATD */ +/* only, for example, results in overusage of 8x8dct, while the opposite occurs when using SA8D. */ +/* This is because frequencies stored in an 8x8dct sum up to a larger value when viewed through a 4x4 */ +/* transform and vice versa with a 4x4dct and an 8x8 transform. */ +/* The weights chosen (5 for each) are arbitrary. They should probably be the same for SATD and SA8D but */ +/* the overall weight is chosen for no particularly good reason. */ +/* SSD is still used as the primary RD metric; this value is merely added to it for psy purposes. */ + +/* FIXME: Is there a better metric than averaged SATD/SA8D difference for complexity difference? */ +/* Hadamard transform is recursive, so a SATD+SA8D can be done faster by taking advantage of this fact. */ +/* We can factor out the fenc SATD/SA8D so they're only done once. */ + +static inline int ssd_plane( x264_t *h, int size, int p, int x, int y ) { - return h->pixf.ssd[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, - h->mb.pic.p_fdec[0], FDEC_STRIDE ) - + h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[1], FENC_STRIDE, - h->mb.pic.p_fdec[1], FDEC_STRIDE ) - + h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[2], FENC_STRIDE, - h->mb.pic.p_fdec[2], FDEC_STRIDE ); + DECLARE_ALIGNED_16(uint8_t zero[16]) = {0}; + int satd = 0; + uint8_t *fdec = h->mb.pic.p_fdec[p] + x + y*FDEC_STRIDE; + uint8_t *fenc = h->mb.pic.p_fenc[p] + x + y*FENC_STRIDE; + if(p == 0 && h->param.analyse.i_rdcmp == X264_CMP_PSY) + { + int dc_coefs[2]; + dc_coefs[0] = h->pixf.sad[size]( zero, 0, fdec, FDEC_STRIDE ) >> 1; + dc_coefs[1] = h->pixf.sad[size]( zero, 0, fenc, FENC_STRIDE ) >> 1; + ADD_ABS_SATD(satd, size); + /* If the plane is smaller than 8x8, we can't do an SA8D; this probably isn't a big problem. */ + if(size <= PIXEL_8x8) + { + ADD_ABS_SATD(sa8d, size); + } + else + satd *= 2; + } + return h->pixf.ssd[size]( fenc, FENC_STRIDE, fdec, FDEC_STRIDE ) + abs(satd) * 5; } -static int ssd_plane( x264_t *h, int size, int p, int x, int y ) +static inline int ssd_mb( x264_t *h ) { - return h->pixf.ssd[size]( h->mb.pic.p_fenc[p] + x+y*FENC_STRIDE, FENC_STRIDE, - h->mb.pic.p_fdec[p] + x+y*FDEC_STRIDE, FDEC_STRIDE ); + return ssd_plane(h, PIXEL_16x16, 0, 0, 0) + ssd_plane(h, PIXEL_8x8, 1, 0, 0) + ssd_plane(h, PIXEL_8x8, 2, 0, 0); } static int x264_rd_cost_mb( x264_t *h, int i_lambda2 ) diff --git a/x264.c b/x264.c index 8484cad..fc3729f 100644 --- x264.c +++ x264.c @@ -243,6 +243,10 @@ static void Help( x264_param_t *defaults, int b_longhelp ) H0( " -m, --subme Subpixel motion estimation and partition\n" " decision quality: 1=fast, 7=best. [%d]\n", defaults->analyse.i_subpel_refine ); H0( " --b-rdo RD based mode decision for B-frames. Requires subme 6.\n" ); + H0( " --rdcmp Metric used for RD mode decision [\"%s\"]\n", + strtable_lookup( x264_rdcmp_names, defaults->analyse.i_rdcmp ) ); + H0( " - ssd: normal (maximum PSNR)\n" + " - psy: psychovisual (sharper)\n" ); H0( " --mixed-refs Decide references on a per partition basis\n" ); H1( " --no-chroma-me Ignore chroma in motion estimation\n" ); H1( " --bime Jointly optimize both MVs in B-frames\n" ); @@ -411,6 +415,7 @@ static int Parse( int argc, char **argv, { "mvrange", required_argument, NULL, 0 }, { "mvrange-thread", required_argument, NULL, 0 }, { "subme", required_argument, NULL, 'm' }, + { "rdcmp", required_argument, NULL, 0 }, { "b-rdo", no_argument, NULL, 0 }, { "mixed-refs", no_argument, NULL, 0 }, { "no-chroma-me", no_argument, NULL, 0 }, diff --git a/x264.h b/x264.h index c41d6f0..7b32c31 100644 --- x264.h +++ x264.h @@ -85,9 +85,12 @@ typedef struct x264_t x264_t; #define X264_AQ_NONE 0 #define X264_AQ_LOCAL 1 #define X264_AQ_GLOBAL 2 +#define X264_CMP_SSD 0 +#define X264_CMP_PSY 1 static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 }; static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 }; +static const char * const x264_rdcmp_names[] = { "ssd", "psy", 0 }; static const char * const x264_overscan_names[] = { "undef", "show", "crop", 0 }; static const char * const x264_vidformat_names[] = { "component", "pal", "ntsc", "secam", "mac", "undef", 0 }; static const char * const x264_fullrange_names[] = { "off", "on", 0 }; @@ -237,6 +240,7 @@ typedef struct x264_param_t int b_fast_pskip; /* early SKIP detection on P-frames */ int b_dct_decimate; /* transform coefficient thresholding on P-frames */ int i_noise_reduction; /* adaptive pseudo-deadzone */ + int i_rdcmp; /* RD comparison metric */ /* the deadzone size that will be used in luma quantization */ int i_luma_deadzone[2]; /* {inter, intra} */