/******************************************************************************
|
*
|
* Copyright (C) 2018 The Android Open Source Project
|
*
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
* you may not use this file except in compliance with the License.
|
* You may obtain a copy of the License at:
|
*
|
* http://www.apache.org/licenses/LICENSE-2.0
|
*
|
* Unless required by applicable law or agreed to in writing, software
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* See the License for the specific language governing permissions and
|
* limitations under the License.
|
*
|
*****************************************************************************
|
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
*/
|
|
/*!
|
******************************************************************************
|
* \file ihevce_decomp_pre_intra_pass.c
|
*
|
* \brief
|
* This file contains definitions related to frame decomposition done during
|
* pre intra processing
|
*
|
* \date
|
* 19/02/2013
|
*
|
* \author
|
* Ittiam
|
*
|
* List of Functions
|
* ihevce_intra_populate_mode_bits_cost()
|
* ihevce_8x8_sad_computer()
|
* ihevce_4x4_sad_computer()
|
* ihevce_ed_4x4_find_best_modes()
|
* ihevce_ed_calc_4x4_blk()
|
* ihevce_ed_calc_8x8_blk()
|
* ihevce_ed_calc_incomplete_ctb()
|
* ihevce_cu_level_qp_mod()
|
* ihevce_ed_calc_ctb()
|
* ihevce_ed_frame_init()
|
* ihevce_scale_by_2()
|
* ihevce_decomp_pre_intra_process_row()
|
* ihevce_decomp_pre_intra_process()
|
* ihevce_decomp_pre_intra_get_num_mem_recs()
|
* ihevce_decomp_pre_intra_get_mem_recs()
|
* ihevce_decomp_pre_intra_init()
|
* ihevce_decomp_pre_intra_frame_init()
|
* ihevce_merge_sort()
|
* ihevce_decomp_pre_intra_curr_frame_pre_intra_deinit()
|
*
|
******************************************************************************
|
*/
|
|
/*****************************************************************************/
|
/* File Includes */
|
/*****************************************************************************/
|
/* System include files */
|
#include <stdio.h>
|
#include <string.h>
|
#include <stdlib.h>
|
#include <assert.h>
|
#include <stdarg.h>
|
#include <math.h>
|
#include <limits.h>
|
|
/* User include files */
|
#include "ihevc_typedefs.h"
|
#include "itt_video_api.h"
|
#include "ihevce_api.h"
|
|
#include "rc_cntrl_param.h"
|
#include "rc_frame_info_collector.h"
|
#include "rc_look_ahead_params.h"
|
|
#include "ihevc_defs.h"
|
#include "ihevc_debug.h"
|
#include "ihevc_structs.h"
|
#include "ihevc_platform_macros.h"
|
#include "ihevc_deblk.h"
|
#include "ihevc_itrans_recon.h"
|
#include "ihevc_chroma_itrans_recon.h"
|
#include "ihevc_chroma_intra_pred.h"
|
#include "ihevc_intra_pred.h"
|
#include "ihevc_inter_pred.h"
|
#include "ihevc_mem_fns.h"
|
#include "ihevc_padding.h"
|
#include "ihevc_weighted_pred.h"
|
#include "ihevc_sao.h"
|
#include "ihevc_resi_trans.h"
|
#include "ihevc_quant_iquant_ssd.h"
|
#include "ihevc_cabac_tables.h"
|
|
#include "ihevce_defs.h"
|
#include "ihevce_hle_interface.h"
|
#include "ihevce_lap_enc_structs.h"
|
#include "ihevce_multi_thrd_structs.h"
|
#include "ihevce_multi_thrd_funcs.h"
|
#include "ihevce_me_common_defs.h"
|
#include "ihevce_had_satd.h"
|
#include "ihevce_error_codes.h"
|
#include "ihevce_bitstream.h"
|
#include "ihevce_cabac.h"
|
#include "ihevce_rdoq_macros.h"
|
#include "ihevce_function_selector.h"
|
#include "ihevce_enc_structs.h"
|
#include "ihevce_entropy_structs.h"
|
#include "ihevce_cmn_utils_instr_set_router.h"
|
#include "ihevce_ipe_instr_set_router.h"
|
#include "ihevce_decomp_pre_intra_structs.h"
|
#include "ihevce_decomp_pre_intra_pass.h"
|
#include "ihevce_enc_loop_structs.h"
|
#include "hme_datatype.h"
|
#include "hme_interface.h"
|
#include "hme_common_defs.h"
|
#include "ihevce_global_tables.h"
|
|
/*****************************************************************************/
|
/* Typedefs */
|
/*****************************************************************************/
|
typedef void (*pf_ed_calc_ctb)(
|
ihevce_ed_ctxt_t *ps_ed_ctxt,
|
ihevce_ed_blk_t *ps_ed_ctb,
|
ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
|
UWORD8 *pu1_src,
|
WORD32 src_stride,
|
WORD32 num_4x4_blks_x,
|
WORD32 num_4x4_blks_y,
|
WORD32 *nbr_flags,
|
WORD32 i4_layer_id,
|
WORD32 row_block_no,
|
WORD32 col_block_no,
|
ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list,
|
ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list);
|
|
/*****************************************************************************/
|
/* Constant Macros */
|
/*****************************************************************************/
|
#define SATD_NOISE_FLOOR_THRESHOLD 16
|
#define MINIMUM_VARIANCE 15
|
#define SCALE_FACTOR_VARIANCE 20
|
#define SCALE_FACTOR_VARIANCE_8x8 60
|
#define MIN_SATD_THRSHLD 0
|
#define MAX_SATD_THRSHLD 64
|
#define SUB_NOISE_THRSHLD 0
|
#define MIN_BLKS 2
|
|
/*****************************************************************************/
|
/* Global variables */
|
/*****************************************************************************/
|
|
/**
|
*****************************************************************************
|
* @brief list of pointers to luma intra pred functions
|
*****************************************************************************
|
*/
|
pf_intra_pred g_apf_lum_ip[NUM_IP_FUNCS];
|
|
/*****************************************************************************/
|
/* Function Definitions */
|
/*****************************************************************************/
|
|
/*!
|
******************************************************************************
|
* \if Function name : ihevce_intra_populate_mode_bits_cost \endif
|
*
|
* \brief: look-up table of cost of signalling an intra mode in the
|
* bitstream
|
*
|
*****************************************************************************
|
*/
|
void ihevce_intra_populate_mode_bits_cost(
|
WORD32 top_intra_mode,
|
WORD32 left_intra_mode,
|
WORD32 available_top,
|
WORD32 available_left,
|
WORD32 cu_pos_y,
|
UWORD16 *mode_bits_cost,
|
WORD32 lambda)
|
{
|
WORD32 i;
|
// 5.5 * lambda
|
UWORD16 five_bits_cost = COMPUTE_RATE_COST_CLIP30(11, lambda, (LAMBDA_Q_SHIFT + 1));
|
|
(void)top_intra_mode;
|
(void)left_intra_mode;
|
(void)available_top;
|
(void)available_left;
|
(void)cu_pos_y;
|
for(i = 0; i < NUM_MODES; i++)
|
{
|
mode_bits_cost[i] = five_bits_cost;
|
}
|
}
|
|
/*!
|
******************************************************************************
|
* \if Function name : ihevce_8x8_sad_computer \endif
|
*
|
* \brief: compute sad between 2 8x8 blocks
|
*
|
*****************************************************************************
|
*/
|
UWORD16
|
ihevce_8x8_sad_computer(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd)
|
{
|
UWORD16 sad = 0;
|
WORD32 i, j;
|
|
for(i = 0; i < 8; i++)
|
{
|
for(j = 0; j < 8; j++)
|
{
|
sad += ABS(*pu1_src - *pu1_pred);
|
pu1_src++;
|
pu1_pred++;
|
}
|
pu1_src = pu1_src + (src_strd - 8);
|
pu1_pred = pu1_pred + (pred_strd - 8);
|
}
|
|
return sad;
|
}
|
|
/*!
|
******************************************************************************
|
* \if Function name : ihevce_4x4_sad_computer \endif
|
*
|
* \brief: compute sad between 2 4x4 blocks
|
*
|
*****************************************************************************
|
*/
|
UWORD16
|
ihevce_4x4_sad_computer(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd)
|
{
|
UWORD16 sad = 0;
|
WORD32 i, j;
|
|
for(i = 0; i < 4; i++)
|
{
|
for(j = 0; j < 4; j++)
|
{
|
sad += ABS(*pu1_src - *pu1_pred);
|
pu1_src++;
|
pu1_pred++;
|
}
|
pu1_src = pu1_src + (src_strd - 4);
|
pu1_pred = pu1_pred + (pred_strd - 4);
|
}
|
|
return sad;
|
}
|
|
/*!
|
******************************************************************************
|
* \if Function name : ihevce_ed_4x4_find_best_modes \endif
|
*
|
* \brief: evaluate input 4x4 block for pre-selected list of angular and normal
|
* intra modes and return best sad, cost
|
*
|
*****************************************************************************
|
*/
|
void ihevce_ed_4x4_find_best_modes(
|
UWORD8 *pu1_src,
|
WORD32 src_stride,
|
UWORD8 *ref,
|
UWORD16 *mode_bits_cost,
|
UWORD8 *pu1_best_modes,
|
WORD32 *pu1_best_sad_costs,
|
WORD32 u1_low_resol,
|
FT_SAD_COMPUTER *pf_4x4_sad_computer)
|
{
|
WORD32 i;
|
UWORD8 mode = 0, best_amode = 0, best_nmode = 0;
|
UWORD8 pred[16];
|
WORD32 sad = 0;
|
WORD32 sad_cost = 0;
|
WORD32 best_asad_cost = 0xFFFFF;
|
WORD32 best_nsad_cost = 0xFFFFF;
|
|
/* If lower layers, l1 or l2, all the 11 modes are evaluated */
|
/* If L0 layer, all modes excluding DC and Planar are evaluated */
|
if(1 == u1_low_resol)
|
i = 0;
|
else
|
i = 2;
|
|
/* Find the best non-angular and angular mode till level 4 */
|
for(; i < 11; i++)
|
{
|
mode = gau1_modes_to_eval[i];
|
g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
|
sad = pf_4x4_sad_computer(pu1_src, &pred[0], src_stride, 4);
|
sad_cost = sad;
|
sad_cost += mode_bits_cost[mode];
|
if(mode < 2)
|
{
|
if(sad_cost < best_nsad_cost)
|
{
|
best_nmode = mode;
|
best_nsad_cost = sad_cost;
|
}
|
}
|
else
|
{
|
if(sad_cost < best_asad_cost)
|
{
|
best_amode = mode;
|
best_asad_cost = sad_cost;
|
}
|
}
|
}
|
|
pu1_best_modes[0] = best_amode;
|
pu1_best_sad_costs[0] = best_asad_cost;
|
|
/* Accumalate the best non-angular mode and cost for the l1 and l2 layers */
|
if(1 == u1_low_resol)
|
{
|
pu1_best_modes[1] = best_nmode;
|
pu1_best_sad_costs[1] = best_nsad_cost;
|
}
|
}
|
|
/*!
|
******************************************************************************
|
* \if Function name : ihevce_ed_calc_4x4_blk \endif
|
*
|
* \brief: evaluate input 4x4 block for all intra modes and return best sad &
|
* cost
|
*
|
*****************************************************************************
|
*/
|
static void ihevce_ed_calc_4x4_blk(
|
ihevce_ed_blk_t *ps_ed,
|
UWORD8 *pu1_src,
|
WORD32 src_stride,
|
UWORD8 *ref,
|
UWORD16 *mode_bits_cost,
|
WORD32 *sad_ptr,
|
WORD32 *pi4_best_satd,
|
WORD32 i4_quality_preset,
|
WORD32 *pi4_best_sad_cost,
|
ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list)
|
{
|
WORD32 i, i_end;
|
UWORD8 mode, best_amode, best_nmode;
|
UWORD8 pred[16];
|
|
UWORD16 sad;
|
WORD32 sad_cost = 0;
|
WORD32 best_asad_cost = 0xFFFFF;
|
WORD32 best_nsad_cost = 0xFFFFF;
|
|
UWORD8 au1_best_modes[2];
|
WORD32 ai4_best_sad_costs[2];
|
|
/* L1/L2 resolution hence low resolution enable */
|
WORD32 u1_low_resol = 1;
|
|
UWORD8 modes_to_eval[2];
|
|
/* The *pi4_best_satd will be consumed only if current
|
layer has odd number of 4x4 blocks in either x or y
|
direction. But the function hme_derive_num_layers() makes
|
sure that every layer has width and height such that each one
|
is a multiple of 16. Which makes pi4_best_satd useless. Hence
|
feel free to remove pi4_best_satd. Concluded on 29th Aug13 */
|
*pi4_best_satd = -1;
|
ps_ipe_optimised_function_list->pf_ed_4x4_find_best_modes(
|
pu1_src,
|
src_stride,
|
ref,
|
mode_bits_cost,
|
au1_best_modes,
|
ai4_best_sad_costs,
|
u1_low_resol,
|
ps_ipe_optimised_function_list->pf_4x4_sad_computer);
|
|
best_nmode = au1_best_modes[1];
|
best_amode = au1_best_modes[0];
|
best_nsad_cost = ai4_best_sad_costs[1];
|
best_asad_cost = ai4_best_sad_costs[0];
|
|
/* Updation of pi4_best_satd here needed iff the mode given by
|
ihevce_ed_4x4_find_best_modes() comes out to be
|
the best mode at the end of the function */
|
*pi4_best_satd = best_asad_cost - mode_bits_cost[best_amode];
|
|
/* Around best level 4 angular mode, search for best level 2 mode */
|
modes_to_eval[0] = best_amode - 2;
|
modes_to_eval[1] = best_amode + 2;
|
i = 0;
|
i_end = 2;
|
if(best_amode == 2)
|
i = 1;
|
else if(best_amode == 34)
|
i_end = 1;
|
for(; i < i_end; i++)
|
{
|
mode = modes_to_eval[i];
|
g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
|
sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, &pred[0], src_stride, 4);
|
sad_cost = sad;
|
sad_cost += mode_bits_cost[mode];
|
if(sad_cost < best_asad_cost)
|
{
|
best_amode = mode;
|
best_asad_cost = sad_cost;
|
*pi4_best_satd = sad;
|
}
|
sad_ptr[mode] = sad;
|
}
|
|
/*To be done : Add a flag here instead of preset condn*/
|
if((i4_quality_preset < IHEVCE_QUALITY_P4))
|
{
|
/* Around best level 2 angular mode, search for best level 1 mode */
|
modes_to_eval[0] = best_amode - 1;
|
modes_to_eval[1] = best_amode + 1;
|
i = 0;
|
i_end = 2;
|
if(best_amode == 2)
|
i = 1;
|
else if(best_amode == 34)
|
i_end = 1;
|
for(; i < i_end; i++)
|
{
|
mode = modes_to_eval[i];
|
g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
|
sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(
|
pu1_src, &pred[0], src_stride, 4);
|
sad_cost = sad;
|
sad_cost += mode_bits_cost[mode];
|
if(sad_cost < best_asad_cost)
|
{
|
best_amode = mode;
|
best_asad_cost = sad_cost;
|
*pi4_best_satd = sad;
|
}
|
sad_ptr[mode] = sad;
|
}
|
}
|
|
if(best_asad_cost < best_nsad_cost)
|
{
|
ps_ed->best_mode = best_amode;
|
*pi4_best_sad_cost = best_asad_cost;
|
}
|
else
|
{
|
ps_ed->best_mode = best_nmode;
|
*pi4_best_sad_cost = best_nsad_cost;
|
}
|
ps_ed->intra_or_inter = 0;
|
ps_ed->merge_success = 0;
|
}
|
|
/*!
|
******************************************************************************
|
* \if Function name : ihevce_ed_calc_8x8_blk \endif
|
*
|
* \brief: evaluate input 8x8 block for intra modes basing on the intra mode
|
* decisions made at 4x4 level. This function also makes a decision whether
|
* to split blk in to 4x4 partitions or not.
|
*
|
*****************************************************************************
|
*/
|
static void ihevce_ed_calc_8x8_blk(
|
ihevce_ed_ctxt_t *ps_ed_ctxt,
|
ihevce_ed_blk_t *ps_ed_8x8,
|
UWORD8 *pu1_src,
|
WORD32 src_stride,
|
WORD32 *nbr_flags_ptr,
|
WORD32 *top_intra_mode_ptr,
|
WORD32 *left_intra_mode_ptr,
|
WORD32 cu_pos_y,
|
WORD32 lambda,
|
WORD32 *sad_ptr_8x8,
|
WORD32 *pi4_best_satd,
|
WORD32 i4_layer_id,
|
WORD32 i4_quality_preset,
|
WORD32 i4_slice_type,
|
WORD32 *pi4_best_sad_cost_8x8_l1_ipe,
|
WORD32 *pi4_best_sad_8x8_l1_ipe,
|
WORD32 *pi4_sum_4x4_satd,
|
WORD32 *pi4_min_4x4_satd,
|
ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list,
|
ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
|
{
|
WORD32 i, j;
|
WORD32 nbr_flags, nbr_flags_TR;
|
UWORD8 *pu1_src_4x4;
|
WORD32 top_available;
|
WORD32 left_available;
|
ihevce_ed_blk_t *ps_ed_4x4 = ps_ed_8x8;
|
WORD32 top_intra_mode;
|
WORD32 left_intra_mode;
|
WORD32 next_left_intra_mode;
|
WORD32 *sad_ptr = sad_ptr_8x8;
|
UWORD8 *pu1_src_arr[4];
|
WORD32 i4_4x4_best_sad_cost[4];
|
func_selector_t *ps_func_selector = ps_ed_ctxt->ps_func_selector;
|
ihevc_intra_pred_luma_ref_substitution_ft *pf_intra_pred_luma_ref_substitution =
|
ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
|
|
(void)i4_slice_type;
|
|
/* Compute ref samples for 8x8 merge block */
|
nbr_flags = nbr_flags_ptr[0];
|
nbr_flags_TR = nbr_flags_ptr[1];
|
|
if(CHECK_TR_AVAILABLE(nbr_flags_TR))
|
{
|
SET_TR_AVAILABLE(nbr_flags);
|
}
|
else
|
{
|
SET_TR_UNAVAILABLE(nbr_flags);
|
}
|
|
if(CHECK_BL_AVAILABLE(nbr_flags))
|
{
|
SET_BL_AVAILABLE(nbr_flags);
|
}
|
else
|
{
|
SET_BL_UNAVAILABLE(nbr_flags);
|
}
|
|
/* call the function which populates ref data for intra predicion */
|
pf_intra_pred_luma_ref_substitution(
|
pu1_src - src_stride - 1,
|
pu1_src - src_stride,
|
pu1_src - 1,
|
src_stride,
|
8,
|
nbr_flags,
|
&ps_ed_ctxt->au1_ref_8x8[0][0],
|
0);
|
|
for(i = 0; i < 2; i++)
|
{
|
pu1_src_4x4 = pu1_src + i * 4 * src_stride;
|
cu_pos_y += i * 4;
|
next_left_intra_mode = left_intra_mode_ptr[i];
|
for(j = 0; j < 2; j++)
|
{
|
WORD32 i4_best_satd;
|
pu1_src_arr[i * 2 + j] = pu1_src_4x4;
|
nbr_flags = nbr_flags_ptr[i * 8 + j];
|
top_intra_mode = top_intra_mode_ptr[j];
|
left_intra_mode = next_left_intra_mode;
|
/* call the function which populates ref data for intra predicion */
|
pf_intra_pred_luma_ref_substitution(
|
pu1_src_4x4 - src_stride - 1,
|
pu1_src_4x4 - src_stride,
|
pu1_src_4x4 - 1,
|
src_stride,
|
4,
|
nbr_flags,
|
&ps_ed_ctxt->au1_ref_full_ctb[i * 2 + j][0],
|
0);
|
|
top_available = CHECK_T_AVAILABLE(nbr_flags);
|
left_available = CHECK_L_AVAILABLE(nbr_flags);
|
/* call the function which populates sad cost for all the modes */
|
ihevce_intra_populate_mode_bits_cost(
|
top_intra_mode,
|
left_intra_mode,
|
top_available,
|
left_available,
|
cu_pos_y,
|
&ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i * 2 + j][0],
|
lambda);
|
ihevce_ed_calc_4x4_blk(
|
ps_ed_4x4,
|
pu1_src_4x4,
|
src_stride,
|
&ps_ed_ctxt->au1_ref_full_ctb[i * 2 + j][0],
|
&ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i * 2 + j][0],
|
sad_ptr,
|
&i4_best_satd,
|
i4_quality_preset,
|
&i4_4x4_best_sad_cost[i * 2 + j],
|
ps_ipe_optimised_function_list);
|
|
top_intra_mode_ptr[j] = ps_ed_4x4->best_mode;
|
next_left_intra_mode = ps_ed_4x4->best_mode;
|
pu1_src_4x4 += 4;
|
ps_ed_4x4 += 1;
|
sad_ptr += NUM_MODES;
|
}
|
left_intra_mode_ptr[i] = next_left_intra_mode;
|
}
|
|
/* 8x8 merge */
|
{
|
UWORD8 modes_to_eval[6];
|
WORD32 sad;
|
UWORD8 pred[16];
|
UWORD8 pred_8x8[64] = { 0 };
|
WORD32 merge_success;
|
UWORD8 mode;
|
|
ps_ed_4x4 = ps_ed_8x8;
|
mode = (ps_ed_4x4)->best_mode;
|
|
*pi4_best_satd = -1;
|
|
merge_success =
|
((((ps_ed_4x4)->best_mode == (ps_ed_4x4 + 1)->best_mode) +
|
((ps_ed_4x4)->best_mode == (ps_ed_4x4 + 2)->best_mode) +
|
((ps_ed_4x4)->best_mode == (ps_ed_4x4 + 3)->best_mode)) == 3);
|
|
{
|
WORD32 i4_satd;
|
//UWORD16 au2_4x4_sad_cost_array[4];/*SAD of 4x4 blocks*/
|
UWORD16 u2_sum_best_4x4_sad_cost; /*Sum of 4x4 sad costs*/
|
UWORD16 u2_sum_best_4x4_satd_cost; /*Sum of 4x4 satd costs*/
|
UWORD8 u1_best_8x8_mode; /*8x8 mode.*/
|
UWORD16 u2_best_8x8_cost; /*8x8 Cost. Can store SATD/SAD cost*/
|
WORD32 i4_best_8x8_sad_satd; /* SATD/SAD value of 8x8 block*/
|
UWORD16 au2_8x8_costs[6] = { 0 }; /*Cost of 8x8 block for 6 modes*/
|
UWORD8 u1_cond_4x4_satd; /*condition if 4x4 SATD needs to be done*/
|
UWORD8 u1_cond_8x8_satd; /*condition if 8x8 SATD needs to be done*/
|
UWORD8 u1_good_quality;
|
WORD32 i4_merge_success_stage2;
|
|
/*Initiallization*/
|
*pi4_best_satd = 0;
|
u2_best_8x8_cost = (UWORD16)(-1) /*max value*/;
|
u2_sum_best_4x4_sad_cost = 0;
|
*pi4_sum_4x4_satd = -1;
|
*pi4_min_4x4_satd = 0x7FFFFFFF;
|
i4_best_8x8_sad_satd = 0;
|
u2_sum_best_4x4_satd_cost = 0;
|
u1_best_8x8_mode = ps_ed_4x4->best_mode;
|
|
/*We thought of "replacing" SATDs by SADs for 4x4 vs 8x8 decision
|
for speed improvement, but it gave opposite results. Setting
|
good_quality to 1 in order to throw away the idea of "replacing".*/
|
u1_good_quality = 1;
|
//u1_good_quality = ((i4_quality_preset != IHEVCE_QUALITY_P5)
|
// && (i4_quality_preset != IHEVCE_QUALITY_P4));
|
|
/*Needed to disable some processing based on speed preset*/
|
i4_merge_success_stage2 = 0;
|
|
/*Store SAD cost of 4x4 blocks */
|
for(i = 0; i < 4; i++)
|
{
|
//au2_4x4_sad_cost_array[i] = (ps_ed_4x4 + i)->best_sad_cost;
|
u2_sum_best_4x4_sad_cost +=
|
i4_4x4_best_sad_cost[i]; //(ps_ed_4x4 + i)->best_sad_cost;
|
modes_to_eval[i] = (ps_ed_4x4 + i)->best_mode;
|
/*NOTE_01: i4_4x4_satd is not used anywhere at present.
|
Setting it to zero to avoid ASSERT failure */
|
/*Now taken care of incomplete CTB*/
|
//(ps_ed_4x4 + i)->i4_4x4_satd = 0;
|
}
|
|
/*Calculate SATD/SAd for 4x4 blocks*/
|
/*For (layer_2 && high_speed): No need to get 4x4 SATDs bcoz
|
it won't have any impact on quality but speed will improve.*/
|
u1_cond_4x4_satd = ((1 == i4_layer_id) || (u1_good_quality && (!merge_success)));
|
|
if(u1_cond_4x4_satd)
|
{
|
*pi4_sum_4x4_satd = 0;
|
/*FYI: 1. Level 2 doesn't need the SATD.
|
2. The 4x4 vs. 8x8 decision for high_speed will
|
happen based on SAD. */
|
/*Get SATD for 4x4 blocks */
|
for(i = 0; i < 4; i++)
|
{
|
mode = modes_to_eval[i];
|
g_apf_lum_ip[g_i4_ip_funcs[mode]](
|
&ps_ed_ctxt->au1_ref_full_ctb[i][0], 0, &pred[0], 4, 4, mode);
|
|
i4_satd = ps_cmn_utils_optimised_function_list->pf_HAD_4x4_8bit(
|
pu1_src_arr[i], src_stride, &pred[0], 4, NULL, 0);
|
|
{
|
/*Save 4x4x satd in ed blk struct */
|
(ps_ed_4x4 + i)->i4_4x4_satd = i4_satd;
|
}
|
|
/*(ps_ed_4x4 + i)->i4_4x4_satd = i4_satd; // See NOTE_01*/
|
u2_sum_best_4x4_satd_cost +=
|
((UWORD16)i4_satd + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i][mode]);
|
*pi4_best_satd += i4_satd;
|
}
|
}
|
/* Not being used in current code */
|
else /* (Level_2 && extreme_speed) */
|
{
|
/******DONT ENTER HERE AT aNY COST***************************/
|
/* Transistor killers lie ahead!!!!!!! */
|
/*This else part is not getting executed as of now*/
|
if(2 != i4_layer_id)
|
ASSERT(0);
|
/*Update values by SAD_cost_array */
|
for(i = 0; i < 4; i++)
|
{
|
mode = modes_to_eval[i];
|
//u2_sum_best_4x4_satd_cost += au2_4x4_sad_cost_array[i];
|
//sad = (WORD32)((ps_ed_4x4 + i)->best_sad_cost - ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i][mode]);
|
sad = (WORD32)(
|
i4_4x4_best_sad_cost[i] - ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i][mode]);
|
*pi4_sum_4x4_satd += sad;
|
/*(ps_ed_4x4 + i)->i4_4x4_satd = sad;// See NOTE_01*/
|
*pi4_best_satd += sad;
|
|
if(*pi4_min_4x4_satd > sad)
|
*pi4_min_4x4_satd = sad;
|
}
|
}
|
if(!merge_success) /*If the modes are not identical*/
|
{
|
UWORD8 i1_start; /* no of modes to evaluate */
|
UWORD8 ai1_modes[6];
|
|
/* Prepare 6 candidates for 8x8 block. Two are DC and planar */
|
ai1_modes[4] = 0;
|
ai1_modes[5] = 1;
|
i1_start = 4;
|
|
/*Assign along with removing duplicates rest 4 candidates. */
|
for(i = 3; i >= 0; i--)
|
{
|
WORD8 i1_fresh_mode_flag = 1;
|
mode = modes_to_eval[i];
|
/*Check if duplicate already exists in ai1_modes*/
|
for(j = i1_start; j < 6; j++)
|
{
|
if(mode == ai1_modes[j])
|
i1_fresh_mode_flag = 0;
|
}
|
if(i1_fresh_mode_flag)
|
{
|
i1_start--;
|
ai1_modes[i1_start] = mode;
|
}
|
}
|
|
/*Calculate SATD/SAD of 8x8 block for all modes*/
|
/*If (u1_good_quality == 0) then SATD gets replaced by SAD*/
|
if(u1_good_quality && (i4_quality_preset <= IHEVCE_QUALITY_P4))
|
{
|
//7.5 * lambda to incorporate transfrom flags
|
u2_sum_best_4x4_satd_cost +=
|
(COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1)));
|
|
/*Loop over all modes for calculating SATD*/
|
for(i = i1_start; i < 6; i++)
|
{
|
mode = ai1_modes[i];
|
g_apf_lum_ip[g_i4_ip_funcs[mode]](
|
&ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred_8x8[0], 8, 8, mode);
|
|
i4_satd = ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit(
|
pu1_src_arr[0], src_stride, &pred_8x8[0], 8, NULL, 0);
|
|
au2_8x8_costs[i] =
|
((UWORD16)i4_satd + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][mode]);
|
|
/*Update data correspoinding to least 8x8 cost */
|
if(au2_8x8_costs[i] <= u2_best_8x8_cost)
|
{
|
u2_best_8x8_cost = au2_8x8_costs[i];
|
i4_best_8x8_sad_satd = i4_satd;
|
u1_best_8x8_mode = mode;
|
}
|
}
|
/*8x8 vs 4x4 decision based on SATD values*/
|
if((u2_best_8x8_cost <= u2_sum_best_4x4_satd_cost) || (u2_best_8x8_cost <= 300))
|
{
|
i4_merge_success_stage2 = 1;
|
}
|
|
/* EIID: Early inter-intra decision */
|
/* Find the SAD based cost for 8x8 block for best mode */
|
if(/*(ISLICE != i4_slice_type) && */ (1 == i4_layer_id))
|
{
|
UWORD8 i4_best_8x8_mode = u1_best_8x8_mode;
|
WORD32 i4_best_8x8_sad_curr;
|
|
g_apf_lum_ip[g_i4_ip_funcs[i4_best_8x8_mode]](
|
&ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred_8x8[0], 8, 8, i4_best_8x8_mode);
|
|
i4_best_8x8_sad_curr = ps_ipe_optimised_function_list->pf_8x8_sad_computer(
|
pu1_src_arr[0], &pred_8x8[0], src_stride, 8);
|
|
//register best sad in the context
|
//ps_ed_8x8->i4_best_sad_8x8_l1_ipe = i4_best_8x8_sad_curr;
|
|
//register the best cost in the context
|
//[0]th index is used since all 4 blocks are having same cost right now
|
//also it doesnt depends on mode. It only depends on the lambda
|
|
*pi4_best_sad_cost_8x8_l1_ipe =
|
i4_best_8x8_sad_curr +
|
ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][i4_best_8x8_mode];
|
*pi4_best_sad_8x8_l1_ipe = i4_best_8x8_sad_curr;
|
}
|
}
|
else /*If high_speed or extreme speed*/
|
{
|
//7.5 * lambda to incorporate transfrom flags
|
u2_sum_best_4x4_sad_cost +=
|
(COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1)));
|
|
/*Loop over all modes for calculating SAD*/
|
for(i = i1_start; i < 6; i++)
|
{
|
mode = ai1_modes[i];
|
g_apf_lum_ip[g_i4_ip_funcs[mode]](
|
&ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred_8x8[0], 8, 8, mode);
|
|
sad = ps_ipe_optimised_function_list->pf_8x8_sad_computer(
|
pu1_src_arr[0], &pred_8x8[0], src_stride, 8);
|
|
au2_8x8_costs[i] +=
|
((UWORD16)sad + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][mode]);
|
|
/*Find the data correspoinding to least cost */
|
if(au2_8x8_costs[i] <= u2_best_8x8_cost)
|
{
|
u2_best_8x8_cost = au2_8x8_costs[i];
|
i4_best_8x8_sad_satd = sad;
|
u1_best_8x8_mode = mode;
|
}
|
}
|
/*8x8 vs 4x4 decision based on SAD values*/
|
if((u2_best_8x8_cost <= u2_sum_best_4x4_sad_cost) || (u2_best_8x8_cost <= 300))
|
{
|
i4_merge_success_stage2 = 1;
|
}
|
|
/* EIID: Early inter-intra decision */
|
/* Find the SAD based cost for 8x8 block for best mode */
|
if(/*(ISLICE != i4_slice_type) && */ (1 == i4_layer_id))
|
{
|
//UWORD8 i4_best_8x8_mode = u1_best_8x8_mode;
|
WORD32 i4_best_8x8_sad_cost_curr = u2_best_8x8_cost;
|
|
//register best sad in the context
|
//ps_ed_8x8->i4_best_sad_8x8_l1_ipe = i4_best_8x8_sad_curr;
|
|
//register the best cost in the context
|
*pi4_best_sad_cost_8x8_l1_ipe = i4_best_8x8_sad_cost_curr;
|
*pi4_best_sad_8x8_l1_ipe =
|
i4_best_8x8_sad_satd; //i4_best_8x8_sad_cost_curr;
|
}
|
}
|
}
|
|
/***** Modes for 4x4 and 8x8 are decided before this point ****/
|
if(merge_success || i4_merge_success_stage2)
|
{
|
/*FYI: 1. 8x8 SATD is not needed if merge is failed.
|
2. For layer_2: SATD won't be calculated for 8x8. So
|
the best_8x8_cost is SAD-cost. */
|
|
/* Store the 8x8 level data in the first 4x4 block*/
|
ps_ed_4x4->merge_success = 1;
|
ps_ed_4x4->best_merge_mode = u1_best_8x8_mode;
|
/* ps_ed_4x4->best_merge_sad_cost = u2_best_8x8_cost;
|
This data is not getting consumed anywhere at present */
|
|
top_intra_mode_ptr[0] = u1_best_8x8_mode;
|
top_intra_mode_ptr[1] = u1_best_8x8_mode;
|
left_intra_mode_ptr[0] = u1_best_8x8_mode;
|
left_intra_mode_ptr[1] = u1_best_8x8_mode;
|
|
/*If it is layer_1 and high_speed*/
|
u1_cond_8x8_satd =
|
((1 == i4_layer_id) &&
|
(merge_success || ((!u1_good_quality) && i4_merge_success_stage2)));
|
if(u1_cond_8x8_satd)
|
{
|
mode = u1_best_8x8_mode;
|
g_apf_lum_ip[g_i4_ip_funcs[mode]](
|
&ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred_8x8[0], 8, 8, mode);
|
|
if(i4_quality_preset > IHEVCE_QUALITY_P3)
|
{
|
i4_satd = ps_ipe_optimised_function_list->pf_8x8_sad_computer(
|
pu1_src_arr[0], &pred_8x8[0], src_stride, 8);
|
}
|
else
|
{
|
i4_satd = ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit(
|
pu1_src_arr[0], src_stride, &pred_8x8[0], 8, NULL, 0);
|
}
|
/* u2_best_8x8_cost = ((UWORD16)i4_satd + mode_bits_cost[0][mode]);
|
This data is not getting consumed at present */
|
i4_best_8x8_sad_satd = i4_satd;
|
}
|
*pi4_best_satd = i4_best_8x8_sad_satd;
|
|
/* EIID: Early inter-intra decision */
|
/* Find the SAD based cost for 8x8 block for best mode */
|
if(/*(ISLICE != i4_slice_type) && */ (1 == i4_layer_id))
|
{
|
UWORD8 i4_best_8x8_mode = u1_best_8x8_mode;
|
WORD32 i4_best_8x8_sad_curr;
|
|
g_apf_lum_ip[g_i4_ip_funcs[i4_best_8x8_mode]](
|
&ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred_8x8[0], 8, 8, i4_best_8x8_mode);
|
|
i4_best_8x8_sad_curr = ps_ipe_optimised_function_list->pf_8x8_sad_computer(
|
pu1_src_arr[0], &pred_8x8[0], src_stride, 8);
|
//register best sad in the context
|
//ps_ed_8x8->i4_best_sad_8x8_l1_ipe = i4_best_8x8_sad_curr;
|
|
//register the best cost in the context
|
//[0]th index is used since all 4 blocks are having same cost right now
|
//also it doesnt depends on mode. It only depends on the lambda
|
|
*pi4_best_sad_cost_8x8_l1_ipe =
|
i4_best_8x8_sad_curr +
|
ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][i4_best_8x8_mode];
|
*pi4_best_sad_8x8_l1_ipe = i4_best_8x8_sad_curr;
|
|
} // EIID ends
|
|
} //if(merge_success || i4_merge_success_stage2)
|
}
|
}
|
}
|
|
/*!
|
******************************************************************************
|
* \if Function name : ihevce_ed_calc_incomplete_ctb \endif
|
*
|
* \brief: performs L1 8x8 and 4x4 intra mode analysis
|
*
|
*****************************************************************************
|
*/
|
void ihevce_ed_calc_incomplete_ctb(
|
ihevce_ed_ctxt_t *ps_ed_ctxt,
|
ihevce_ed_blk_t *ps_ed_ctb,
|
ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
|
UWORD8 *pu1_src,
|
WORD32 src_stride,
|
WORD32 num_4x4_blks_x,
|
WORD32 num_4x4_blks_y,
|
WORD32 *nbr_flags,
|
WORD32 i4_layer_id,
|
WORD32 i4_row_block_no,
|
WORD32 i4_col_block_no,
|
ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list,
|
ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
|
{
|
WORD32 i, j, k;
|
WORD32 z_scan_idx = 0;
|
WORD32 z_scan_act_idx = 0;
|
ihevc_intra_pred_luma_ref_substitution_ft *pf_intra_pred_luma_ref_substitution =
|
ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
|
|
//UWORD8 ref[18];
|
//WORD32 top_intra_modes[20];
|
WORD32 *sad_ptr = &ps_ed_ctxt->sad[0];
|
WORD32 lambda = ps_ed_ctxt->lambda;
|
//UWORD16 mode_bits_cost[NUM_MODES];
|
|
UWORD8 *pu1_src_8x8;
|
ihevce_ed_blk_t *ps_ed_8x8, *ps_ed_4x4;
|
WORD32 *top_intra_mode_ptr;
|
WORD32 *left_intra_mode_ptr = ps_ed_ctxt->left_ctb_intra_modes;
|
WORD32 *nbr_flags_ptr;
|
WORD32 top_intra_mode;
|
WORD32 left_intra_mode;
|
WORD32 next_left_intra_mode;
|
WORD32 nbr_flag = 0;
|
WORD32 top_available;
|
WORD32 left_available;
|
UWORD8 *pu1_src_4x4;
|
WORD32 left_over_4x4_blks;
|
WORD32 i4_incomplete_sum_4x4_satd = 0;
|
WORD32 i4_incomplete_min_4x4_satd = 0x7FFFFFFF;
|
WORD32 i4_best_sad_cost_8x8_l1_ipe, i4_best_sad_8x8_l1_ipe, i4_sum_4x4_satd, i4_min_4x4_satd;
|
|
(void)i4_row_block_no;
|
(void)i4_col_block_no;
|
/*Find the modulated qp of 16*16 at L2 from 8*8 SATDs in L2
|
THis is used as 64*64 Qp in L0*/
|
/*For Incomplete CTB, init all SATD to -1 and then popualate for the complete 8x8 blocks (CU 16 in L0)*/
|
/* Not populated for 4x4 blocks (CU 8 in L0), can be done */
|
/*Also, not 32x32 satd is not populated, as it would correspong to CU 64 and it is not an incomplete CTB */
|
if(i4_layer_id == 1)
|
{
|
WORD32 i4_i;
|
|
for(i4_i = 0; i4_i < 64; i4_i++)
|
{
|
(ps_ed_ctb + i4_i)->i4_4x4_satd = -1;
|
(ps_ed_ctb + i4_i)->i4_4x4_cur_satd = -1;
|
}
|
|
for(i4_i = 0; i4_i < 16; i4_i++)
|
{
|
ps_ed_ctb_l1->i4_sum_4x4_satd[i4_i] = -2;
|
ps_ed_ctb_l1->i4_min_4x4_satd[i4_i] = 0x7FFFFFFF;
|
ps_ed_ctb_l1->i4_8x8_satd[i4_i][0] = -2;
|
ps_ed_ctb_l1->i4_8x8_satd[i4_i][1] = -2;
|
}
|
|
for(i4_i = 0; i4_i < 4; i4_i++)
|
{
|
ps_ed_ctb_l1->i4_16x16_satd[i4_i][0] = -2;
|
ps_ed_ctb_l1->i4_16x16_satd[i4_i][1] = -2;
|
ps_ed_ctb_l1->i4_16x16_satd[i4_i][2] = -2;
|
}
|
ps_ed_ctb_l1->i4_32x32_satd[0][0] = -2;
|
ps_ed_ctb_l1->i4_32x32_satd[0][1] = -2;
|
ps_ed_ctb_l1->i4_32x32_satd[0][2] = -2;
|
|
ps_ed_ctb_l1->i4_32x32_satd[0][3] = -2;
|
|
for(i4_i = 0; i4_i < 16; i4_i++)
|
{
|
ps_ed_ctb_l1->i4_best_satd_8x8[i4_i] = -1;
|
ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[i4_i] = -1;
|
ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i4_i] = -1;
|
ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_me[i4_i] = -1;
|
ps_ed_ctb_l1->i4_sad_cost_me_for_ref[i4_i] = -1;
|
ps_ed_ctb_l1->i4_sad_me_for_ref[i4_i] = -1;
|
ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[i4_i] = -1;
|
|
ps_ed_ctb_l1->i4_best_sad_8x8_l1_me_for_decide[i4_i] = -1;
|
}
|
}
|
/*
|
* src scan happens in raster scan order. ps_ed update happens in z-scan order.
|
*/
|
for(i = 0; i < num_4x4_blks_x; i++)
|
{
|
ps_ed_ctxt->ai4_top_intra_modes_ic_ctb[i] = INTRA_DC;
|
}
|
next_left_intra_mode = left_intra_mode_ptr[0];
|
for(i = 0; i < num_4x4_blks_y / 2; i++)
|
{
|
pu1_src_8x8 = pu1_src + i * 2 * 4 * src_stride;
|
top_intra_mode_ptr = &ps_ed_ctxt->ai4_top_intra_modes_ic_ctb[0];
|
nbr_flags_ptr = &nbr_flags[0] + 2 * 8 * i;
|
|
for(j = 0; j < num_4x4_blks_x / 2; j++)
|
{
|
WORD32 i4_best_satd;
|
// Multiply i by 16 since the
|
// matrix is prepared for ctb_size = 64
|
z_scan_idx = gau1_ctb_raster_to_zscan[i * 2 * 16 + j * 2];
|
z_scan_act_idx = gau1_ctb_raster_to_zscan[i * 16 + j];
|
ASSERT(z_scan_act_idx <= 15);
|
ps_ed_8x8 = ps_ed_ctb + z_scan_idx;
|
|
ihevce_ed_calc_8x8_blk(
|
ps_ed_ctxt,
|
ps_ed_8x8,
|
pu1_src_8x8,
|
src_stride,
|
nbr_flags_ptr,
|
top_intra_mode_ptr,
|
left_intra_mode_ptr,
|
i * 8,
|
lambda,
|
sad_ptr + z_scan_idx * NUM_MODES,
|
&i4_best_satd,
|
i4_layer_id,
|
ps_ed_ctxt->i4_quality_preset,
|
ps_ed_ctxt->i4_slice_type,
|
&i4_best_sad_cost_8x8_l1_ipe,
|
&i4_best_sad_8x8_l1_ipe,
|
&i4_sum_4x4_satd,
|
&i4_min_4x4_satd,
|
ps_ipe_optimised_function_list,
|
ps_cmn_utils_optimised_function_list);
|
|
ASSERT(i4_best_satd >= 0);
|
if(i4_layer_id == 1)
|
{
|
ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[z_scan_act_idx] =
|
i4_best_sad_cost_8x8_l1_ipe;
|
ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[z_scan_act_idx] = i4_best_sad_8x8_l1_ipe;
|
ps_ed_ctb_l1->i4_best_satd_8x8[z_scan_act_idx] = i4_best_satd;
|
ps_ed_ctxt->i8_sum_best_satd += i4_best_satd;
|
ps_ed_ctxt->i8_sum_sq_best_satd += (i4_best_satd * i4_best_satd);
|
//ps_ed_ctb_l1->i4_sum_4x4_satd[z_scan_act_idx] = i4_sum_4x4_satd;
|
//ps_ed_ctb_l1->i4_min_4x4_satd[z_scan_act_idx] = i4_min_4x4_satd;
|
}
|
|
pu1_src_8x8 += 8;
|
//ps_ed_8x8 += 4;
|
top_intra_mode_ptr += 2;
|
nbr_flags_ptr += 2;
|
}
|
|
next_left_intra_mode = left_intra_mode_ptr[0];
|
left_over_4x4_blks = (num_4x4_blks_x - (2 * (num_4x4_blks_x / 2)));
|
left_over_4x4_blks = left_over_4x4_blks * 2;
|
|
pu1_src_4x4 = pu1_src_8x8;
|
|
i4_incomplete_sum_4x4_satd = 0;
|
i4_incomplete_min_4x4_satd = 0x7FFFFFFF;
|
|
/* For leftover right 4x4 blks (num_4x4_blks_x - 2 *(num_4x4_blks_x/2))*/
|
for(k = 0; k < left_over_4x4_blks; k++)
|
{
|
WORD32 i4_best_satd;
|
WORD32 i4_dummy_sad_cost;
|
// Multiply i by 16 since the
|
// matrix is prepared for ctb_size = 64
|
ASSERT(left_over_4x4_blks == 2);
|
z_scan_idx = gau1_ctb_raster_to_zscan[i * 2 * 16 + k * 16 + j * 2];
|
ps_ed_4x4 = ps_ed_ctb + z_scan_idx;
|
|
top_intra_mode = ps_ed_ctxt->ai4_top_intra_modes_ic_ctb[j];
|
left_intra_mode = next_left_intra_mode;
|
|
nbr_flag = nbr_flags[i * 2 * 8 + k * 8 + j * 2];
|
|
/* call the function which populates ref data for intra predicion */
|
pf_intra_pred_luma_ref_substitution(
|
pu1_src_4x4 - src_stride - 1,
|
pu1_src_4x4 - src_stride,
|
pu1_src_4x4 - 1,
|
src_stride,
|
4,
|
nbr_flag,
|
&ps_ed_ctxt->au1_ref_ic_ctb[0],
|
0);
|
|
top_available = CHECK_T_AVAILABLE(nbr_flag);
|
left_available = CHECK_L_AVAILABLE(nbr_flag);
|
/* call the function which populates sad cost for all the modes */
|
ihevce_intra_populate_mode_bits_cost(
|
top_intra_mode,
|
left_intra_mode,
|
top_available,
|
left_available,
|
i * 4,
|
&ps_ed_ctxt->au2_mode_bits_cost_ic_ctb[0],
|
lambda);
|
|
ihevce_ed_calc_4x4_blk(
|
ps_ed_4x4,
|
pu1_src_4x4,
|
src_stride,
|
&ps_ed_ctxt->au1_ref_ic_ctb[0],
|
&ps_ed_ctxt->au2_mode_bits_cost_ic_ctb[0],
|
sad_ptr + z_scan_idx * NUM_MODES,
|
&i4_best_satd,
|
ps_ed_ctxt->i4_quality_preset,
|
&i4_dummy_sad_cost,
|
ps_ipe_optimised_function_list);
|
|
ASSERT(i4_best_satd >= 0);
|
if(i4_layer_id == 1) //Can we ignore this check?
|
{
|
z_scan_act_idx = gau1_ctb_raster_to_zscan[i * 16 + j];
|
/*Note : The satd population is not populated for last 4*4 block in incomplete CTB */
|
/* Which corresponds to CU 8 in L0 */
|
|
/*MAM_VAR_L1 */
|
i4_incomplete_sum_4x4_satd = i4_incomplete_sum_4x4_satd + i4_best_satd;
|
if(i4_incomplete_min_4x4_satd >= i4_best_satd)
|
i4_incomplete_min_4x4_satd = i4_best_satd;
|
ps_ed_ctxt->i8_sum_best_satd += i4_best_satd;
|
ps_ed_ctxt->i8_sum_sq_best_satd += (i4_best_satd * i4_best_satd);
|
if((k & 1) == 0)
|
{
|
ps_ed_ctb_l1->i4_best_satd_8x8[z_scan_act_idx] = 0;
|
}
|
ps_ed_ctb_l1->i4_best_satd_8x8[z_scan_act_idx] += i4_best_satd;
|
}
|
|
ps_ed_ctxt->ai4_top_intra_modes_ic_ctb[j * 2] = ps_ed_4x4->best_mode;
|
next_left_intra_mode = ps_ed_4x4->best_mode;
|
pu1_src_4x4 += src_stride;
|
left_intra_mode_ptr[k] = next_left_intra_mode;
|
}
|
left_intra_mode_ptr += 2;
|
}
|
|
if(num_4x4_blks_y & 1)
|
{
|
/* For leftover bottom 4x4 blks. (num_4x4_blks_x) */
|
pu1_src_4x4 = pu1_src + i * 2 * 4 * src_stride;
|
//memset(&ps_ed_ctb_l1->i4_best_satd_8x8[i][0],0,4*sizeof(WORD32));
|
for(j = 0; j < num_4x4_blks_x; j++)
|
{
|
WORD32 i4_best_satd;
|
WORD32 i4_dummy_sad_cost;
|
// Multiply i by 16 since the
|
// matrix is prepared for ctb_size = 64
|
z_scan_idx = gau1_ctb_raster_to_zscan[i * 2 * 16 + j];
|
ps_ed_4x4 = ps_ed_ctb + z_scan_idx;
|
|
if((j & 1) == 0)
|
{
|
i4_incomplete_sum_4x4_satd = 0;
|
i4_incomplete_min_4x4_satd = 0x7FFFFFFF;
|
}
|
|
top_intra_mode = ps_ed_ctxt->ai4_top_intra_modes_ic_ctb[j];
|
left_intra_mode = next_left_intra_mode;
|
|
nbr_flag = nbr_flags[i * 2 * 8 + j];
|
|
/* call the function which populates ref data for intra predicion */
|
pf_intra_pred_luma_ref_substitution(
|
pu1_src_4x4 - src_stride - 1,
|
pu1_src_4x4 - src_stride,
|
pu1_src_4x4 - 1,
|
src_stride,
|
4,
|
nbr_flag,
|
&ps_ed_ctxt->au1_ref_ic_ctb[0],
|
0);
|
|
top_available = CHECK_T_AVAILABLE(nbr_flag);
|
left_available = CHECK_L_AVAILABLE(nbr_flag);
|
/* call the function which populates sad cost for all the modes */
|
ihevce_intra_populate_mode_bits_cost(
|
top_intra_mode,
|
left_intra_mode,
|
top_available,
|
left_available,
|
i * 4,
|
&ps_ed_ctxt->au2_mode_bits_cost_ic_ctb[0],
|
lambda);
|
|
ihevce_ed_calc_4x4_blk(
|
ps_ed_4x4,
|
pu1_src_4x4,
|
src_stride,
|
&ps_ed_ctxt->au1_ref_ic_ctb[0],
|
&ps_ed_ctxt->au2_mode_bits_cost_ic_ctb[0],
|
sad_ptr + z_scan_idx * NUM_MODES,
|
&i4_best_satd,
|
ps_ed_ctxt->i4_quality_preset,
|
&i4_dummy_sad_cost,
|
ps_ipe_optimised_function_list);
|
|
/*Note : The satd population is not populated for last 4*4 block in incomplete CTB */
|
/* Which corresponds to CU 8 in L0 */
|
|
/*MAM_VAR_L1 */
|
ASSERT(i4_best_satd >= 0);
|
if(i4_layer_id == 1) //Can we ignore this check?
|
{
|
z_scan_act_idx = gau1_ctb_raster_to_zscan[i * 16 + (j >> 1)];
|
if((j & 1) == 0)
|
{
|
ps_ed_ctb_l1->i4_best_satd_8x8[z_scan_act_idx] = 0;
|
}
|
ps_ed_ctb_l1->i4_best_satd_8x8[z_scan_act_idx] += i4_best_satd;
|
ps_ed_ctxt->i8_sum_best_satd += i4_best_satd;
|
ps_ed_ctxt->i8_sum_sq_best_satd += (i4_best_satd * i4_best_satd);
|
i4_incomplete_sum_4x4_satd = i4_incomplete_sum_4x4_satd + i4_best_satd;
|
if(i4_incomplete_min_4x4_satd >= i4_best_satd)
|
i4_incomplete_min_4x4_satd = i4_best_satd;
|
}
|
|
ps_ed_ctxt->ai4_top_intra_modes_ic_ctb[j] = ps_ed_4x4->best_mode;
|
next_left_intra_mode = ps_ed_4x4->best_mode;
|
pu1_src_4x4 += 4;
|
}
|
}
|
left_intra_mode_ptr[0] = next_left_intra_mode;
|
}
|
|
/*!
|
******************************************************************************
|
* \if Function name : ihevce_cu_level_qp_mod \endif
|
*
|
* \brief: Performs CU level QP modulation
|
*
|
*****************************************************************************
|
*/
|
WORD32 ihevce_cu_level_qp_mod(
|
WORD32 i4_qscale,
|
WORD32 i4_satd,
|
long double ld_curr_frame_log_avg_act,
|
float f_mod_strength,
|
WORD32 *pi4_act_factor,
|
WORD32 *pi4_q_scale_mod,
|
rc_quant_t *ps_rc_quant_ctxt)
|
{
|
WORD32 i4_temp_qscale;
|
WORD32 i4_temp_qp;
|
|
if(i4_satd != -1)
|
{
|
WORD32 i4_loc_satd = i4_satd;
|
if(i4_loc_satd < 1)
|
{
|
i4_loc_satd = 1;
|
}
|
if((WORD32)ld_curr_frame_log_avg_act == 0)
|
{
|
*pi4_act_factor = (1 << (QP_LEVEL_MOD_ACT_FACTOR));
|
}
|
else
|
{
|
UWORD32 u4_log2_sq_cur_satd;
|
ULWORD64 u8_sq_cur_satd;
|
WORD32 qp_offset;
|
|
ASSERT(USE_SQRT_AVG_OF_SATD_SQR);
|
u8_sq_cur_satd = (i4_loc_satd * i4_loc_satd);
|
GET_POS_MSB_64(u4_log2_sq_cur_satd, u8_sq_cur_satd);
|
if(ABS((
|
long double)(((1 << u4_log2_sq_cur_satd) * POW_2_TO_1_BY_4) - ((long double)u8_sq_cur_satd))) >
|
ABS((
|
long double)(((1 << u4_log2_sq_cur_satd) * POW_2_TO_3_BY_4) - ((long double)u8_sq_cur_satd))))
|
{
|
u4_log2_sq_cur_satd += 1;
|
}
|
qp_offset = (WORD32)(
|
f_mod_strength *
|
(float)((long double)u4_log2_sq_cur_satd - ld_curr_frame_log_avg_act));
|
qp_offset = CLIP3(qp_offset, MIN_QP_MOD_OFFSET, MAX_QP_MOD_OFFSET);
|
*pi4_act_factor = (WORD32)(
|
gad_look_up_activity[qp_offset + ABS(MIN_QP_MOD_OFFSET)] *
|
(1 << QP_LEVEL_MOD_ACT_FACTOR));
|
}
|
|
ASSERT(*pi4_act_factor > 0);
|
i4_temp_qscale = ((i4_qscale * (*pi4_act_factor)) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
|
QP_LEVEL_MOD_ACT_FACTOR;
|
}
|
else
|
{
|
i4_temp_qscale = i4_qscale;
|
*pi4_act_factor = (1 << QP_LEVEL_MOD_ACT_FACTOR);
|
}
|
ASSERT(*pi4_act_factor > 0);
|
|
if(i4_temp_qscale > ps_rc_quant_ctxt->i2_max_qscale)
|
{
|
i4_temp_qscale = ps_rc_quant_ctxt->i2_max_qscale;
|
}
|
else if(i4_temp_qscale < ps_rc_quant_ctxt->i2_min_qscale)
|
{
|
i4_temp_qscale = ps_rc_quant_ctxt->i2_min_qscale;
|
}
|
/*store q scale for stat gen for I frame model*/
|
/*Here activity factor is not modified as the cu qp would be clipped in rd-opt stage*/
|
*pi4_q_scale_mod = i4_temp_qscale;
|
i4_temp_qp = ps_rc_quant_ctxt->pi4_qscale_to_qp[i4_temp_qscale];
|
if(i4_temp_qp > ps_rc_quant_ctxt->i2_max_qp)
|
{
|
i4_temp_qp = ps_rc_quant_ctxt->i2_max_qp;
|
}
|
else if(i4_temp_qp < ps_rc_quant_ctxt->i2_min_qp)
|
{
|
i4_temp_qp = ps_rc_quant_ctxt->i2_min_qp;
|
}
|
return (i4_temp_qp);
|
}
|
|
/*!
|
******************************************************************************
|
* \if Function name : ihevce_ed_calc_ctb \endif
|
*
|
* \brief: performs L1 8x8 and 4x4 intra mode analysis
|
*
|
*****************************************************************************
|
*/
|
void ihevce_ed_calc_ctb(
|
ihevce_ed_ctxt_t *ps_ed_ctxt,
|
ihevce_ed_blk_t *ps_ed_ctb,
|
ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
|
UWORD8 *pu1_src,
|
WORD32 src_stride,
|
WORD32 num_4x4_blks_x,
|
WORD32 num_4x4_blks_y,
|
WORD32 *nbr_flags,
|
WORD32 i4_layer_id,
|
WORD32 i4_row_block_no,
|
WORD32 i4_col_block_no,
|
ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list,
|
ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
|
{
|
WORD32 i, j;
|
WORD32 z_scan_idx = 0;
|
WORD32 z_scan_act_idx = 0;
|
ihevce_ed_blk_t *ps_ed_8x8;
|
UWORD8 *pu1_src_8x8;
|
|
WORD32 top_intra_modes[20];
|
WORD32 *top_intra_mode_ptr;
|
WORD32 *left_intra_mode_ptr = ps_ed_ctxt->left_ctb_intra_modes;
|
|
WORD32 *sad_ptr = &ps_ed_ctxt->sad[0];
|
WORD32 lambda = ps_ed_ctxt->lambda;
|
WORD32 *nbr_flags_ptr;
|
WORD32 i4_best_sad_cost_8x8_l1_ipe, i4_best_sad_8x8_l1_ipe, i4_sum_4x4_satd, i4_min_4x4_satd;
|
|
(void)num_4x4_blks_y;
|
(void)i4_row_block_no;
|
(void)i4_col_block_no;
|
ASSERT(num_4x4_blks_x % 2 == 0);
|
ASSERT(num_4x4_blks_y % 2 == 0);
|
ASSERT((num_4x4_blks_x == 4) || (num_4x4_blks_x == 8));
|
ASSERT((num_4x4_blks_y == 4) || (num_4x4_blks_y == 8));
|
|
if(i4_layer_id == 1)
|
{
|
WORD32 i4_i;
|
|
for(i4_i = 0; i4_i < 64; i4_i++)
|
{
|
(ps_ed_ctb + i4_i)->i4_4x4_satd = -1;
|
(ps_ed_ctb + i4_i)->i4_4x4_cur_satd = -1;
|
}
|
|
for(i4_i = 0; i4_i < 16; i4_i++)
|
{
|
ps_ed_ctb_l1->i4_sum_4x4_satd[i4_i] = -2;
|
ps_ed_ctb_l1->i4_min_4x4_satd[i4_i] = 0x7FFFFFFF;
|
ps_ed_ctb_l1->i4_8x8_satd[i4_i][0] = -2;
|
ps_ed_ctb_l1->i4_8x8_satd[i4_i][1] = -2;
|
}
|
|
for(i4_i = 0; i4_i < 4; i4_i++)
|
{
|
ps_ed_ctb_l1->i4_16x16_satd[i4_i][0] = -2;
|
ps_ed_ctb_l1->i4_16x16_satd[i4_i][1] = -2;
|
ps_ed_ctb_l1->i4_16x16_satd[i4_i][2] = -2;
|
}
|
ps_ed_ctb_l1->i4_32x32_satd[0][0] = -2;
|
ps_ed_ctb_l1->i4_32x32_satd[0][1] = -2;
|
ps_ed_ctb_l1->i4_32x32_satd[0][2] = -2;
|
ps_ed_ctb_l1->i4_32x32_satd[0][3] = -2;
|
for(i4_i = 0; i4_i < 16; i4_i++)
|
{
|
ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_me[i4_i] = -2;
|
ps_ed_ctb_l1->i4_sad_cost_me_for_ref[i4_i] = -2;
|
ps_ed_ctb_l1->i4_sad_me_for_ref[i4_i] = -2;
|
ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[i4_i] = -2;
|
|
ps_ed_ctb_l1->i4_best_sad_8x8_l1_me_for_decide[i4_i] = -2;
|
|
ps_ed_ctb_l1->i4_best_satd_8x8[i4_i] = -2;
|
ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[i4_i] = -2;
|
ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i4_i] = -2;
|
}
|
}
|
/*
|
* src scan happens in raster scan order. ps_ed update happens in z-scan order.
|
*/
|
for(i = 0; i < num_4x4_blks_x; i++)
|
{
|
top_intra_modes[i] = INTRA_DC;
|
}
|
for(i = 0; i < num_4x4_blks_x / 2; i++)
|
{
|
pu1_src_8x8 = pu1_src + i * 2 * 4 * src_stride;
|
top_intra_mode_ptr = &top_intra_modes[0];
|
nbr_flags_ptr = &nbr_flags[0] + 2 * 8 * i;
|
|
for(j = 0; j < num_4x4_blks_x / 2; j++)
|
{
|
WORD32 i4_best_satd;
|
ASSERT(i <= 3);
|
ASSERT(j <= 3);
|
|
// Multiply i by 16 since the
|
// matrix is prepared for ctb_size = 64
|
z_scan_idx = gau1_ctb_raster_to_zscan[i * 2 * 16 + j * 2];
|
z_scan_act_idx = gau1_ctb_raster_to_zscan[i * 16 + j];
|
ASSERT(z_scan_act_idx <= 15);
|
|
ps_ed_8x8 = ps_ed_ctb + z_scan_idx;
|
|
ihevce_ed_calc_8x8_blk(
|
ps_ed_ctxt,
|
ps_ed_8x8,
|
pu1_src_8x8,
|
src_stride,
|
nbr_flags_ptr,
|
top_intra_mode_ptr,
|
left_intra_mode_ptr,
|
i * 8,
|
lambda,
|
sad_ptr + z_scan_idx * NUM_MODES,
|
&i4_best_satd,
|
i4_layer_id,
|
ps_ed_ctxt->i4_quality_preset,
|
ps_ed_ctxt->i4_slice_type,
|
&i4_best_sad_cost_8x8_l1_ipe,
|
&i4_best_sad_8x8_l1_ipe,
|
&i4_sum_4x4_satd,
|
&i4_min_4x4_satd,
|
ps_ipe_optimised_function_list,
|
ps_cmn_utils_optimised_function_list);
|
|
if(i4_layer_id == 1)
|
{
|
ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[z_scan_act_idx] =
|
i4_best_sad_cost_8x8_l1_ipe;
|
ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[z_scan_act_idx] = i4_best_sad_8x8_l1_ipe;
|
ps_ed_ctb_l1->i4_best_satd_8x8[z_scan_act_idx] = i4_best_satd;
|
ps_ed_ctxt->i8_sum_best_satd += i4_best_satd;
|
ps_ed_ctxt->i8_sum_sq_best_satd += (i4_best_satd * i4_best_satd);
|
//ps_ed_ctb_l1->i4_sum_4x4_satd[z_scan_act_idx] = i4_sum_4x4_satd;
|
//ps_ed_ctb_l1->i4_min_4x4_satd[z_scan_act_idx] = i4_min_4x4_satd;
|
}
|
|
pu1_src_8x8 += 8;
|
//ps_ed_8x8 += 4;
|
top_intra_mode_ptr += 2;
|
nbr_flags_ptr += 2;
|
}
|
left_intra_mode_ptr += 2;
|
}
|
}
|
|
/*!
|
******************************************************************************
|
* \if Function name : ihevce_ed_frame_init \endif
|
*
|
* \brief: Initialize frame context for early decision
|
*
|
*****************************************************************************
|
*/
|
void ihevce_ed_frame_init(void *pv_ed_ctxt, WORD32 i4_layer_no)
|
{
|
ihevce_ed_ctxt_t *ps_ed_ctxt = (ihevce_ed_ctxt_t *)pv_ed_ctxt;
|
|
g_apf_lum_ip[IP_FUNC_MODE_0] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_planar_fptr;
|
g_apf_lum_ip[IP_FUNC_MODE_1] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_dc_fptr;
|
g_apf_lum_ip[IP_FUNC_MODE_2] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode2_fptr;
|
g_apf_lum_ip[IP_FUNC_MODE_3TO9] =
|
ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr;
|
g_apf_lum_ip[IP_FUNC_MODE_10] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_horz_fptr;
|
g_apf_lum_ip[IP_FUNC_MODE_11TO17] =
|
ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr;
|
g_apf_lum_ip[IP_FUNC_MODE_18_34] =
|
ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr;
|
g_apf_lum_ip[IP_FUNC_MODE_19TO25] =
|
ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr;
|
g_apf_lum_ip[IP_FUNC_MODE_26] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_ver_fptr;
|
g_apf_lum_ip[IP_FUNC_MODE_27TO33] =
|
ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr;
|
|
if(i4_layer_no == 1)
|
{
|
ps_ed_ctxt->i8_sum_best_satd = 0;
|
ps_ed_ctxt->i8_sum_sq_best_satd = 0;
|
}
|
}
|
|
/**
|
********************************************************************************
|
*
|
* @brief downscales by 2 in horz and vertical direction, creates output of
|
* size wd/2 * ht/2
|
*
|
* @param[in] pu1_src : source pointer
|
* @param[in] src_stride : source stride
|
* @param[out] pu1_dst : destination pointer. Starting of a row.
|
* @param[in] dst_stride : destination stride
|
* @param[in] wd : width
|
* @param[in] ht : height
|
* @param[in] pu1_wkg_mem : working memory (atleast of size CEIL16(wd) * ht))
|
* @param[in] ht_offset : height offset of the block to be scaled
|
* @param[in] block_ht : height of the block to be scaled
|
* @param[in] wd_offset : width offset of the block to be scaled
|
* @param[in] block_wd : width of the block to be scaled
|
*
|
* @return void
|
*
|
* @remarks Assumption made block_ht should me multiple of 2. LANCZOS_SCALER
|
*
|
********************************************************************************
|
*/
|
void ihevce_scaling_filter_mxn(
|
UWORD8 *pu1_src,
|
WORD32 src_strd,
|
UWORD8 *pu1_scrtch,
|
WORD32 scrtch_strd,
|
UWORD8 *pu1_dst,
|
WORD32 dst_strd,
|
WORD32 ht,
|
WORD32 wd)
|
{
|
#define FILT_TAP_Q 8
|
#define N_TAPS 7
|
const WORD16 i4_ftaps[N_TAPS] = { -18, 0, 80, 132, 80, 0, -18 };
|
WORD32 i, j;
|
WORD32 tmp;
|
UWORD8 *pu1_src_tmp = pu1_src - 3 * src_strd;
|
UWORD8 *pu1_scrtch_tmp = pu1_scrtch;
|
|
/* horizontal filtering */
|
for(i = -3; i < ht + 2; i++)
|
{
|
for(j = 0; j < wd; j += 2)
|
{
|
tmp = (i4_ftaps[3] * pu1_src_tmp[j] +
|
i4_ftaps[2] * (pu1_src_tmp[j - 1] + pu1_src_tmp[j + 1]) +
|
i4_ftaps[1] * (pu1_src_tmp[j + 2] + pu1_src_tmp[j - 2]) +
|
i4_ftaps[0] * (pu1_src_tmp[j + 3] + pu1_src_tmp[j - 3]) +
|
(1 << (FILT_TAP_Q - 1))) >>
|
FILT_TAP_Q;
|
pu1_scrtch_tmp[j >> 1] = CLIP_U8(tmp);
|
}
|
pu1_scrtch_tmp += scrtch_strd;
|
pu1_src_tmp += src_strd;
|
}
|
/* vertical filtering */
|
pu1_scrtch_tmp = pu1_scrtch + 3 * scrtch_strd;
|
for(i = 0; i < ht; i += 2)
|
{
|
for(j = 0; j < (wd >> 1); j++)
|
{
|
tmp =
|
(i4_ftaps[3] * pu1_scrtch_tmp[j] +
|
i4_ftaps[2] * (pu1_scrtch_tmp[j + scrtch_strd] + pu1_scrtch_tmp[j - scrtch_strd]) +
|
i4_ftaps[1] *
|
(pu1_scrtch_tmp[j + 2 * scrtch_strd] + pu1_scrtch_tmp[j - 2 * scrtch_strd]) +
|
i4_ftaps[0] *
|
(pu1_scrtch_tmp[j + 3 * scrtch_strd] + pu1_scrtch_tmp[j - 3 * scrtch_strd]) +
|
(1 << (FILT_TAP_Q - 1))) >>
|
FILT_TAP_Q;
|
pu1_dst[j] = CLIP_U8(tmp);
|
}
|
pu1_dst += dst_strd;
|
pu1_scrtch_tmp += (scrtch_strd << 1);
|
}
|
}
|
|
void ihevce_scale_by_2(
|
UWORD8 *pu1_src,
|
WORD32 src_strd,
|
UWORD8 *pu1_dst,
|
WORD32 dst_strd,
|
WORD32 wd,
|
WORD32 ht,
|
UWORD8 *pu1_wkg_mem,
|
WORD32 ht_offset,
|
WORD32 block_ht,
|
WORD32 wd_offset,
|
WORD32 block_wd,
|
FT_COPY_2D *pf_copy_2d,
|
FT_SCALING_FILTER_BY_2 *pf_scaling_filter_mxn)
|
{
|
#define N_TAPS 7
|
#define MAX_BLK_SZ (MAX_CTB_SIZE + ((N_TAPS >> 1) << 1))
|
UWORD8 au1_cpy[MAX_BLK_SZ * MAX_BLK_SZ];
|
UWORD32 cpy_strd = MAX_BLK_SZ;
|
UWORD8 *pu1_cpy = au1_cpy + cpy_strd * (N_TAPS >> 1) + (N_TAPS >> 1);
|
|
UWORD8 *pu1_in, *pu1_out;
|
WORD32 in_strd, wkg_mem_strd;
|
|
WORD32 row_start, row_end;
|
WORD32 col_start, col_end;
|
WORD32 i, fun_select;
|
WORD32 ht_tmp, wd_tmp;
|
FT_SCALING_FILTER_BY_2 *ihevce_scaling_filters[2];
|
|
assert((wd & 1) == 0);
|
assert((ht & 1) == 0);
|
assert(block_wd <= MAX_CTB_SIZE);
|
assert(block_ht <= MAX_CTB_SIZE);
|
|
/* function pointers for filtering different dimensions */
|
ihevce_scaling_filters[0] = ihevce_scaling_filter_mxn;
|
ihevce_scaling_filters[1] = pf_scaling_filter_mxn;
|
|
/* handle boundary blks */
|
col_start = (wd_offset < (N_TAPS >> 1)) ? 1 : 0;
|
row_start = (ht_offset < (N_TAPS >> 1)) ? 1 : 0;
|
col_end = ((wd_offset + block_wd) > (wd - (N_TAPS >> 1))) ? 1 : 0;
|
row_end = ((ht_offset + block_ht) > (ht - (N_TAPS >> 1))) ? 1 : 0;
|
if(col_end && (wd % block_wd != 0))
|
{
|
block_wd = (wd % block_wd);
|
}
|
if(row_end && (ht % block_ht != 0))
|
{
|
block_ht = (ht % block_ht);
|
}
|
|
/* boundary blks needs to be padded, copy src to tmp buffer */
|
if(col_start || col_end || row_end || row_start)
|
{
|
UWORD8 *pu1_src_tmp = pu1_src + wd_offset + ht_offset * src_strd;
|
|
pu1_cpy -= (3 * (1 - col_start) + cpy_strd * 3 * (1 - row_start));
|
pu1_src_tmp -= (3 * (1 - col_start) + src_strd * 3 * (1 - row_start));
|
ht_tmp = block_ht + 3 * (1 - row_start) + 3 * (1 - row_end);
|
wd_tmp = block_wd + 3 * (1 - col_start) + 3 * (1 - col_end);
|
pf_copy_2d(pu1_cpy, cpy_strd, pu1_src_tmp, src_strd, wd_tmp, ht_tmp);
|
pu1_in = au1_cpy + cpy_strd * 3 + 3;
|
in_strd = cpy_strd;
|
}
|
else
|
{
|
pu1_in = pu1_src + wd_offset + ht_offset * src_strd;
|
in_strd = src_strd;
|
}
|
|
/*top padding*/
|
if(row_start)
|
{
|
UWORD8 *pu1_cpy_tmp = au1_cpy + cpy_strd * 3;
|
|
pu1_cpy = au1_cpy + cpy_strd * (3 - 1);
|
memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
|
pu1_cpy -= cpy_strd;
|
memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
|
pu1_cpy -= cpy_strd;
|
memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
|
}
|
|
/*bottom padding*/
|
if(row_end)
|
{
|
UWORD8 *pu1_cpy_tmp = au1_cpy + cpy_strd * 3 + (block_ht - 1) * cpy_strd;
|
|
pu1_cpy = pu1_cpy_tmp + cpy_strd;
|
memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
|
pu1_cpy += cpy_strd;
|
memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
|
pu1_cpy += cpy_strd;
|
memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
|
}
|
|
/*left padding*/
|
if(col_start)
|
{
|
UWORD8 *pu1_cpy_tmp = au1_cpy + 3;
|
|
pu1_cpy = au1_cpy;
|
for(i = 0; i < block_ht + 6; i++)
|
{
|
pu1_cpy[0] = pu1_cpy[1] = pu1_cpy[2] = pu1_cpy_tmp[0];
|
pu1_cpy += cpy_strd;
|
pu1_cpy_tmp += cpy_strd;
|
}
|
}
|
|
/*right padding*/
|
if(col_end)
|
{
|
UWORD8 *pu1_cpy_tmp = au1_cpy + 3 + block_wd - 1;
|
|
pu1_cpy = au1_cpy + 3 + block_wd;
|
for(i = 0; i < block_ht + 6; i++)
|
{
|
pu1_cpy[0] = pu1_cpy[1] = pu1_cpy[2] = pu1_cpy_tmp[0];
|
pu1_cpy += cpy_strd;
|
pu1_cpy_tmp += cpy_strd;
|
}
|
}
|
|
wkg_mem_strd = block_wd >> 1;
|
pu1_out = pu1_dst + (wd_offset >> 1);
|
fun_select = (block_wd % 16 == 0);
|
ihevce_scaling_filters[fun_select](
|
pu1_in, in_strd, pu1_wkg_mem, wkg_mem_strd, pu1_out, dst_strd, block_ht, block_wd);
|
|
/* Left padding of 16 for 1st block of every row */
|
if(wd_offset == 0)
|
{
|
UWORD8 u1_val;
|
WORD32 pad_wd = 16;
|
WORD32 pad_ht = block_ht >> 1;
|
UWORD8 *dst = pu1_dst;
|
|
for(i = 0; i < pad_ht; i++)
|
{
|
u1_val = dst[0];
|
memset(&dst[-pad_wd], u1_val, pad_wd);
|
dst += dst_strd;
|
}
|
}
|
|
if(wd == wd_offset + block_wd)
|
{
|
/* Right padding of (16 + (CEIL16(wd/2))-wd/2) for last block of every row */
|
/* Right padding is done only after processing of last block of that row is done*/
|
UWORD8 u1_val;
|
WORD32 pad_wd = 16 + CEIL16((wd >> 1)) - (wd >> 1) + 4;
|
WORD32 pad_ht = block_ht >> 1;
|
UWORD8 *dst = pu1_dst + (wd >> 1) - 1;
|
|
for(i = 0; i < pad_ht; i++)
|
{
|
u1_val = dst[0];
|
memset(&dst[1], u1_val, pad_wd);
|
dst += dst_strd;
|
}
|
|
if(ht_offset == 0)
|
{
|
/* Top padding of 16 is done for 1st row only after we reach end of that row */
|
WORD32 pad_wd = dst_strd;
|
WORD32 pad_ht = 16;
|
UWORD8 *dst = pu1_dst - 16;
|
|
for(i = 1; i <= pad_ht; i++)
|
{
|
memcpy(dst - (i * dst_strd), dst, pad_wd);
|
}
|
}
|
|
/* Bottom padding of (16 + (CEIL16(ht/2)) - ht/2) is done only if we have
|
reached end of frame */
|
if(ht - ht_offset - block_ht == 0)
|
{
|
WORD32 pad_wd = dst_strd;
|
WORD32 pad_ht = 16 + CEIL16((ht >> 1)) - (ht >> 1) + 4;
|
UWORD8 *dst = pu1_dst + (((block_ht >> 1) - 1) * dst_strd) - 16;
|
|
for(i = 1; i <= pad_ht; i++)
|
memcpy(dst + (i * dst_strd), dst, pad_wd);
|
}
|
}
|
}
|
|
/*!
|
******************************************************************************
|
* \if Function name : ihevce_decomp_pre_intra_process_row \endif
|
*
|
* \brief
|
* Row level function which down scales a given row by 2 in horz and
|
* vertical direction creates output of size wd/2 * ht/2.
|
*
|
* @param[in] pu1_src : soource pointer
|
* @param[in] src_stride : source stride
|
* @param[out] pu1_dst : desitnation pointer
|
* @param[in] dst_stride : destination stride
|
* @param[in] layer_wd : layer width
|
* @param[in] layer_ht : layer height
|
* @param[in] ht_offset : height offset of the block to be scaled
|
* @param[in] block_ht : height of the block to be scaled
|
* @param[in] wd_offset : width offset of the block to be scaled
|
* @param[in] block_wd : width of the block to be scaled
|
* @param[in] num_col_blks : number of col blks in that row
|
*
|
* \return None
|
*
|
* @NOTE : When decompositionis done from L1 to L2 pre intra analysis is
|
* done on L1
|
*
|
*****************************************************************************
|
*/
|
void ihevce_decomp_pre_intra_process_row(
|
UWORD8 *pu1_src,
|
WORD32 src_stride,
|
UWORD8 *pu1_dst_decomp,
|
WORD32 dst_stride,
|
WORD32 layer_wd,
|
WORD32 layer_ht,
|
UWORD8 *pu1_wkg_mem,
|
WORD32 ht_offset,
|
WORD32 block_ht,
|
WORD32 block_wd,
|
WORD32 i4_cu_aligned_pic_wd,
|
WORD32 i4_cu_aligned_pic_ht,
|
WORD32 num_col_blks,
|
WORD32 layer_no,
|
ihevce_ed_ctxt_t *ps_ed_ctxt,
|
ihevce_ed_blk_t *ps_ed_row,
|
ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row,
|
ihevce_8x8_L0_satd_t *ps_layer0_cur_satd,
|
ihevce_8x8_L0_mean_t *ps_layer0_cur_mean,
|
WORD32 num_4x4_blks_ctb_y,
|
WORD32 num_4x4_blks_last_ctb_x,
|
WORD32 skip_decomp,
|
WORD32 skip_pre_intra,
|
WORD32 row_block_no,
|
WORD32 i4_enable_noise_detection,
|
ctb_analyse_t *ps_ctb_analyse,
|
ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list,
|
ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
|
{
|
WORD32 col_block_no;
|
|
//ihevce_ed_ctxt_t *ps_ed_ctxt = (ihevce_ed_ctxt_t *)pv_ed_ctxt;
|
UWORD8 *pu1_src_pre_intra = pu1_src + (ht_offset * src_stride);
|
WORD32 num_4x4_blks_in_ctb = block_wd >> 2;
|
//WORD32 nbr_flags[64];
|
WORD32 *nbr_flags_ptr = &ps_ed_ctxt->ai4_nbr_flags[0];
|
WORD32 src_inc_pre_intra = num_4x4_blks_in_ctb * 4;
|
WORD32 inc_ctb = 0;
|
ihevce_ed_blk_t *ps_ed_ctb = ps_ed_row;
|
ihevce_ed_ctb_l1_t *ps_ed_ctb_l1 = ps_ed_ctb_l1_row;
|
WORD32 i, j;
|
WORD32 do_pre_intra_analysis;
|
pf_ed_calc_ctb ed_calc_ctb;
|
ctb_analyse_t *ps_ctb_analyse_curr;
|
|
(void)i4_cu_aligned_pic_wd;
|
(void)i4_cu_aligned_pic_ht;
|
(void)ps_layer0_cur_satd;
|
(void)ps_layer0_cur_mean;
|
(void)i4_enable_noise_detection;
|
/*increment the struct pointer to point to the first CTB of the current row. */
|
ps_ctb_analyse_curr = ps_ctb_analyse + row_block_no * num_col_blks;
|
|
//if((num_4x4_blks_ctb_x == num_4x4_blks_ctb_y) && (num_4x4_blks_in_ctb == num_4x4_blks_ctb_x) )
|
if(num_4x4_blks_in_ctb == num_4x4_blks_ctb_y)
|
{
|
ed_calc_ctb = ihevce_ed_calc_ctb;
|
}
|
else
|
{
|
ed_calc_ctb = ihevce_ed_calc_incomplete_ctb;
|
}
|
|
inc_ctb = num_4x4_blks_in_ctb * num_4x4_blks_in_ctb;
|
|
do_pre_intra_analysis = ((layer_no == 1) || (layer_no == 2)) && (!skip_pre_intra);
|
|
/*
|
* For optimal pre intra analysis first block is processed outside
|
* the loop.
|
*/
|
if(!skip_decomp)
|
{
|
ihevce_scale_by_2(
|
pu1_src,
|
src_stride,
|
pu1_dst_decomp,
|
dst_stride,
|
layer_wd,
|
layer_ht,
|
pu1_wkg_mem,
|
ht_offset,
|
block_ht,
|
block_wd * 0,
|
block_wd,
|
ps_cmn_utils_optimised_function_list->pf_copy_2d,
|
ps_ipe_optimised_function_list->pf_scaling_filter_mxn);
|
/* Disable noise detection */
|
ps_ctb_analyse_curr->s_ctb_noise_params.i4_noise_present = 0;
|
|
memset(
|
ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy,
|
0,
|
sizeof(ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy));
|
}
|
|
/*
|
* Pre intra analysis for the first ctb.
|
* To analyse any given CTB we need to set the availability flags of the
|
* following neighbouring CTB: BL,L,TL,T,TR.
|
*/
|
if(do_pre_intra_analysis)
|
{
|
/*
|
* At the beginning of ctb row set left intra modes to default value.
|
*/
|
for(j = 0; j < num_4x4_blks_ctb_y; j++)
|
{
|
ps_ed_ctxt->left_ctb_intra_modes[j] = INTRA_DC;
|
}
|
|
/*
|
* Copy the neighbor flags for a general ctb (ctb inside the frame; not any corners).
|
* The table gau4_nbr_flags_8x8_4x4blks generated for 16x16 4x4 blocks(ctb_size = 64).
|
* But the same table holds good for other 4x4 blocks 2d arrays(eg 8x8 4x4 blks,4x4 4x4blks).
|
* But the flags must be accessed with stride of 16 since the table has been generated for
|
* ctb_size = 64. For odd 4x4 2d arrays(eg 3x3 4x4 blks) the flags needs modification.
|
* The flags also need modification for corner ctbs.
|
*/
|
memcpy(
|
ps_ed_ctxt->ai4_nbr_flags,
|
gau4_nbr_flags_8x8_4x4blks,
|
sizeof(gau4_nbr_flags_8x8_4x4blks));
|
|
/*
|
* Since this is the fist ctb in the ctb row, set left flags unavailable for 1st CTB col
|
*/
|
for(j = 0; j < num_4x4_blks_ctb_y; j++)
|
{
|
SET_L_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]);
|
SET_BL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]);
|
SET_TL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]);
|
}
|
/*
|
* If this is the fist ctb row, set top flags unavailable.
|
*/
|
if(ht_offset == 0)
|
{
|
for(j = 0; j < num_4x4_blks_in_ctb; j++)
|
{
|
SET_T_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j]);
|
SET_TR_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j]);
|
SET_TL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j]);
|
}
|
}
|
|
/* If this is last ctb row,set BL as not available. */
|
if(ht_offset + block_ht >= layer_ht)
|
{
|
for(j = 0; j < num_4x4_blks_in_ctb; j++)
|
{
|
SET_BL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[(num_4x4_blks_ctb_y - 1) * 8 + j]);
|
}
|
}
|
col_block_no = 0;
|
/* Call intra analysis for the ctb */
|
ed_calc_ctb(
|
ps_ed_ctxt,
|
ps_ed_ctb,
|
ps_ed_ctb_l1,
|
pu1_src_pre_intra,
|
src_stride,
|
num_4x4_blks_in_ctb,
|
num_4x4_blks_ctb_y,
|
nbr_flags_ptr,
|
layer_no,
|
row_block_no,
|
col_block_no,
|
ps_ipe_optimised_function_list,
|
ps_cmn_utils_optimised_function_list
|
|
);
|
|
pu1_src_pre_intra += src_inc_pre_intra;
|
ps_ed_ctb += inc_ctb;
|
ps_ed_ctb_l1 += 1;
|
/*
|
* For the rest of the ctbs, set left flags available.
|
*/
|
for(j = 0; j < num_4x4_blks_ctb_y; j++)
|
{
|
SET_L_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]);
|
}
|
for(j = 0; j < num_4x4_blks_ctb_y - 1; j++)
|
{
|
SET_BL_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]);
|
SET_TL_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[(j + 1) * 8]);
|
}
|
if(ht_offset != 0)
|
{
|
SET_TL_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[0]);
|
}
|
}
|
|
/* The first ctb is processed before the loop.
|
* The last one is processed after the loop.
|
*/
|
for(col_block_no = 1; col_block_no < num_col_blks - 1; col_block_no++)
|
{
|
if(!skip_decomp)
|
{
|
ihevce_scale_by_2(
|
pu1_src,
|
src_stride,
|
pu1_dst_decomp,
|
dst_stride,
|
layer_wd,
|
layer_ht,
|
pu1_wkg_mem,
|
ht_offset,
|
block_ht,
|
block_wd * col_block_no,
|
block_wd,
|
ps_cmn_utils_optimised_function_list->pf_copy_2d,
|
ps_ipe_optimised_function_list->pf_scaling_filter_mxn);
|
/* Disable noise detection */
|
memset(
|
ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy,
|
0,
|
sizeof(ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy));
|
|
ps_ctb_analyse_curr->s_ctb_noise_params.i4_noise_present = 0;
|
}
|
|
if(do_pre_intra_analysis)
|
{
|
ed_calc_ctb(
|
ps_ed_ctxt,
|
ps_ed_ctb,
|
ps_ed_ctb_l1,
|
pu1_src_pre_intra,
|
src_stride,
|
num_4x4_blks_in_ctb,
|
num_4x4_blks_ctb_y,
|
nbr_flags_ptr,
|
layer_no,
|
row_block_no,
|
col_block_no,
|
ps_ipe_optimised_function_list,
|
ps_cmn_utils_optimised_function_list);
|
pu1_src_pre_intra += src_inc_pre_intra;
|
ps_ed_ctb += inc_ctb;
|
ps_ed_ctb_l1 += 1;
|
}
|
}
|
|
/* Last ctb in row */
|
if((!skip_decomp) && (col_block_no == (num_col_blks - 1)))
|
{
|
ihevce_scale_by_2(
|
pu1_src,
|
src_stride,
|
pu1_dst_decomp,
|
dst_stride,
|
layer_wd,
|
layer_ht,
|
pu1_wkg_mem,
|
ht_offset,
|
block_ht,
|
block_wd * col_block_no,
|
block_wd,
|
ps_cmn_utils_optimised_function_list->pf_copy_2d,
|
ps_ipe_optimised_function_list->pf_scaling_filter_mxn);
|
{
|
/* Disable noise detection */
|
memset(
|
ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy,
|
0,
|
sizeof(ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy));
|
|
ps_ctb_analyse_curr->s_ctb_noise_params.i4_noise_present = 0;
|
}
|
}
|
|
if(do_pre_intra_analysis && (col_block_no == (num_col_blks - 1)))
|
{
|
/*
|
* The last ctb can be complete or incomplete. The complete
|
* ctb is handled in the if and incomplete is handled in the
|
* else case
|
*/
|
//if(num_4x4_blks_last_ctb == num_4x4_blks_in_ctb)
|
if((num_4x4_blks_last_ctb_x == num_4x4_blks_ctb_y) &&
|
(num_4x4_blks_in_ctb == num_4x4_blks_last_ctb_x))
|
{
|
/* Last ctb so set top right not available */
|
SET_TR_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[num_4x4_blks_in_ctb - 1]);
|
|
ed_calc_ctb(
|
ps_ed_ctxt,
|
ps_ed_ctb,
|
ps_ed_ctb_l1,
|
pu1_src_pre_intra,
|
src_stride,
|
num_4x4_blks_in_ctb,
|
num_4x4_blks_in_ctb,
|
nbr_flags_ptr,
|
layer_no,
|
row_block_no,
|
col_block_no,
|
ps_ipe_optimised_function_list,
|
ps_cmn_utils_optimised_function_list);
|
pu1_src_pre_intra += src_inc_pre_intra;
|
ps_ed_ctb += inc_ctb;
|
ps_ed_ctb_l1 += 1;
|
}
|
else
|
{
|
/* Last ctb so set top right not available */
|
for(i = 0; i < num_4x4_blks_ctb_y; i++)
|
{
|
SET_TR_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[i * 8 + num_4x4_blks_in_ctb - 1]);
|
}
|
|
ihevce_ed_calc_incomplete_ctb(
|
ps_ed_ctxt,
|
ps_ed_ctb,
|
ps_ed_ctb_l1,
|
pu1_src_pre_intra,
|
src_stride,
|
num_4x4_blks_last_ctb_x,
|
num_4x4_blks_ctb_y,
|
nbr_flags_ptr,
|
layer_no,
|
row_block_no,
|
col_block_no,
|
ps_ipe_optimised_function_list,
|
ps_cmn_utils_optimised_function_list);
|
}
|
}
|
}
|
|
/*!
|
******************************************************************************
|
* \if Function name : ihevce_decomp_pre_intra_process \endif
|
*
|
* \brief
|
* Frame level function to decompose given layer L0 into coarser layers
|
*
|
* \param[in] pv_ctxt : pointer to master context of decomp_pre_intra module
|
* \param[in] ps_inp : pointer to input yuv buffer (frame buffer)
|
* \param[in] pv_multi_thrd_ctxt : pointer to multithread context
|
* \param[out] thrd_id : thread id
|
*
|
* \return
|
* None
|
*
|
* \author
|
* Ittiam
|
*
|
*****************************************************************************
|
*/
|
void ihevce_decomp_pre_intra_process(
|
void *pv_ctxt,
|
ihevce_lap_output_params_t *ps_lap_out_prms,
|
frm_ctb_ctxt_t *ps_frm_ctb_prms,
|
void *pv_multi_thrd_ctxt,
|
WORD32 thrd_id,
|
WORD32 i4_ping_pong,
|
ihevce_8x8_L0_satd_t *ps_layer0_cur_satd,
|
ihevce_8x8_L0_mean_t *ps_layer0_cur_mean)
|
{
|
WORD32 i4_layer_no;
|
WORD32 i4_num_layers;
|
WORD32 end_of_layer;
|
UWORD8 *pu1_src, *pu1_dst;
|
WORD32 src_stride, dst_stride;
|
WORD32 i4_layer_wd, i4_layer_ht;
|
WORD32 ht_offset, block_ht;
|
WORD32 row_block_no, num_row_blocks;
|
UWORD8 *pu1_wkg_mem;
|
WORD32 block_wd;
|
WORD32 num_col_blks;
|
WORD32 skip_decomp, skip_pre_intra;
|
WORD32 i4_cu_aligned_pic_wd, i4_cu_aligned_pic_ht;
|
ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt =
|
(ihevce_decomp_pre_intra_master_ctxt_t *)pv_ctxt;
|
|
ihevce_decomp_pre_intra_ctxt_t *ps_ctxt =
|
ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thrd_id];
|
multi_thrd_ctxt_t *ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
|
|
ihevce_ed_ctxt_t *ps_ed_ctxt;
|
ihevce_ed_blk_t *ps_ed;
|
ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
|
WORD32 inc_ctb = 0;
|
WORD32 num_4x4_blks_lyr;
|
|
i4_num_layers = ps_ctxt->i4_num_layers;
|
|
ASSERT(i4_num_layers >= 3);
|
|
/*
|
* Always force minimum layers as 4 so that we would have both l1 and l2
|
* pre intra analysis
|
*/
|
if(i4_num_layers == 3)
|
{
|
i4_num_layers = 4;
|
}
|
|
ps_ctxt->as_layers[0].pu1_inp = (UWORD8 *)ps_lap_out_prms->s_input_buf.pv_y_buf;
|
ps_ctxt->as_layers[0].i4_inp_stride = ps_lap_out_prms->s_input_buf.i4_y_strd;
|
ps_ctxt->as_layers[0].i4_actual_wd = ps_lap_out_prms->s_input_buf.i4_y_wd;
|
ps_ctxt->as_layers[0].i4_actual_ht = ps_lap_out_prms->s_input_buf.i4_y_ht;
|
|
/* ------------ Loop over all the layers --------------- */
|
/* This loop does only decomp for all layers by picking jobs from job queue */
|
/* Decomp for all layers will completed with this for loop */
|
for(i4_layer_no = 0; i4_layer_no < (i4_num_layers - 1); i4_layer_no++)
|
{
|
WORD32 idx = 0;
|
src_stride = ps_ctxt->as_layers[i4_layer_no].i4_inp_stride;
|
pu1_src = ps_ctxt->as_layers[i4_layer_no].pu1_inp;
|
i4_layer_wd = ps_ctxt->as_layers[i4_layer_no].i4_actual_wd;
|
i4_layer_ht = ps_ctxt->as_layers[i4_layer_no].i4_actual_ht;
|
pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp;
|
dst_stride = ps_ctxt->as_layers[i4_layer_no + 1].i4_inp_stride;
|
block_wd = ps_ctxt->as_layers[i4_layer_no].i4_decomp_blk_wd;
|
block_ht = ps_ctxt->as_layers[i4_layer_no].i4_decomp_blk_ht;
|
num_col_blks = ps_ctxt->as_layers[i4_layer_no].i4_num_col_blks;
|
num_row_blocks = ps_ctxt->as_layers[i4_layer_no].i4_num_row_blks;
|
i4_cu_aligned_pic_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd;
|
i4_cu_aligned_pic_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht;
|
|
/* register ed_ctxt buffer pointer */
|
//pv_ed_ctxt = &ps_ctxt->as_layers[i4_layer_no].s_early_decision;
|
//ps_ed_ctxt = (ihevce_ed_ctxt_t *)pv_ed_ctxt;
|
//ps_ed = ps_ed_ctxt->ps_ed;
|
|
//pv_ed_ctxt = &ps_ctxt->ps_ed_ctxt;
|
ps_ed_ctxt = ps_ctxt->ps_ed_ctxt;
|
|
/* initialize ed_ctxt here */
|
/* init is moved here since now allocation is happening for only one instance
|
is allocated. for each layer it is re-used */
|
ps_ed_ctxt->lambda = ps_ctxt->ai4_lambda[i4_layer_no];
|
ps_ed_ctxt->i4_slice_type = ps_ctxt->i4_slice_type;
|
ps_ed_ctxt->level = ps_ctxt->i4_codec_level;
|
if(0 == i4_layer_no)
|
{
|
ps_ed_ctxt->ps_ed_pic = NULL;
|
ps_ed_ctxt->ps_ed = NULL;
|
ps_ed_ctxt->ps_ed_ctb_l1_pic = NULL;
|
ps_ed_ctxt->ps_ed_ctb_l1 = NULL;
|
}
|
else if(1 == i4_layer_no)
|
{
|
ps_ed_ctxt->ps_ed_pic = ps_ctxt->ps_layer1_buf;
|
ps_ed_ctxt->ps_ed = ps_ctxt->ps_layer1_buf;
|
ps_ed_ctxt->ps_ed_ctb_l1_pic = ps_ctxt->ps_ed_ctb_l1;
|
ps_ed_ctxt->ps_ed_ctb_l1 = ps_ctxt->ps_ed_ctb_l1;
|
ps_ctxt->ps_layer0_cur_satd = NULL;
|
ps_ctxt->ps_layer0_cur_mean = NULL;
|
}
|
else if(2 == i4_layer_no)
|
{
|
ps_ed_ctxt->ps_ed_pic = ps_ctxt->ps_layer2_buf;
|
ps_ed_ctxt->ps_ed = ps_ctxt->ps_layer2_buf;
|
ps_ed_ctxt->ps_ed_ctb_l1_pic = NULL;
|
ps_ed_ctxt->ps_ed_ctb_l1 = NULL;
|
ps_ctxt->ps_layer0_cur_satd = NULL;
|
ps_ctxt->ps_layer0_cur_mean = NULL;
|
}
|
|
/*Calculate the number of 4x4 blocks in a CTB in that layer*/
|
/*Divide block_wd by 4. 4 to get no of 4x4 blks*/
|
num_4x4_blks_lyr = block_wd >> 2;
|
inc_ctb = num_4x4_blks_lyr * num_4x4_blks_lyr;
|
|
ps_ed = ps_ed_ctxt->ps_ed;
|
ps_ed_ctb_l1 = ps_ed_ctxt->ps_ed_ctb_l1;
|
|
end_of_layer = 0;
|
skip_decomp = 0;
|
skip_pre_intra = 1;
|
//if( i4_layer_no >= ps_ctxt->i4_num_layers)
|
if(i4_layer_no >= (ps_ctxt->i4_num_layers - 1))
|
{
|
skip_decomp = 1;
|
}
|
/* ------------ Loop over all the CTB rows --------------- */
|
while(0 == end_of_layer)
|
{
|
job_queue_t *ps_pre_enc_job;
|
WORD32 num_4x4_blks_ctb_y = 0;
|
WORD32 num_4x4_blks_last_ctb_x = 0;
|
|
/* Get the current row from the job queue */
|
ps_pre_enc_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
|
pv_multi_thrd_ctxt, (DECOMP_JOB_LYR0 + i4_layer_no), 1, i4_ping_pong);
|
|
pu1_wkg_mem = ps_ctxt->pu1_wkg_mem;
|
|
/* If all rows are done, set the end of layer flag to 1, */
|
if(NULL == ps_pre_enc_job)
|
{
|
end_of_layer = 1;
|
}
|
else
|
{
|
/* Obtain the current row's details from the job */
|
row_block_no = ps_pre_enc_job->s_job_info.s_decomp_job_info.i4_vert_unit_row_no;
|
ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = row_block_no;
|
ht_offset = row_block_no * block_ht;
|
|
if(row_block_no < (num_row_blocks))
|
{
|
pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp +
|
((block_ht >> 1) * dst_stride * row_block_no);
|
|
/*L0 8x8 curr satd for qp mod*/
|
if(i4_layer_no == 0)
|
{
|
ps_ctxt->ps_layer0_cur_satd =
|
ps_layer0_cur_satd + (row_block_no * num_col_blks /*num ctbs*/ *
|
(block_wd >> 3) * (block_ht >> 3));
|
ps_ctxt->ps_layer0_cur_mean =
|
ps_layer0_cur_mean + (row_block_no * num_col_blks /*num ctbs*/ *
|
(block_wd >> 3) * (block_ht >> 3));
|
}
|
|
/* call the row level processing function */
|
ihevce_decomp_pre_intra_process_row(
|
pu1_src,
|
src_stride,
|
pu1_dst,
|
dst_stride,
|
i4_layer_wd,
|
i4_layer_ht,
|
pu1_wkg_mem,
|
ht_offset,
|
block_ht,
|
block_wd,
|
i4_cu_aligned_pic_wd,
|
i4_cu_aligned_pic_ht,
|
num_col_blks,
|
i4_layer_no,
|
ps_ed_ctxt,
|
ps_ed,
|
ps_ed_ctb_l1,
|
ps_ctxt->ps_layer0_cur_satd,
|
ps_ctxt->ps_layer0_cur_mean,
|
num_4x4_blks_ctb_y,
|
num_4x4_blks_last_ctb_x,
|
skip_decomp,
|
skip_pre_intra,
|
row_block_no,
|
ps_ctxt->i4_enable_noise_detection,
|
ps_ctxt->ps_ctb_analyse,
|
&ps_ctxt->s_ipe_optimised_function_list,
|
&ps_ctxt->s_cmn_opt_func);
|
|
/*When decompositionis done from L1 to L2
|
pre intra analysis is done on L1*/
|
if(i4_layer_no == 1 || i4_layer_no == 2)
|
{
|
// ps_ed = ps_ed_ctxt->ps_ed +
|
// (row_block_no * inc_ctb * (num_col_blks));
|
}
|
}
|
idx++;
|
/* set the output dependency */
|
ihevce_pre_enc_grp_job_set_out_dep(
|
pv_multi_thrd_ctxt, ps_pre_enc_job, i4_ping_pong);
|
}
|
}
|
ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = idx;
|
|
ihevce_ed_frame_init(ps_ed_ctxt, i4_layer_no);
|
|
if((1 == i4_layer_no) && (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset))
|
{
|
WORD32 vert_ctr, ctb_ctr, i;
|
WORD32 ctb_ctr_blks = ps_ctxt->as_layers[1].i4_num_col_blks;
|
WORD32 vert_ctr_blks = ps_ctxt->as_layers[1].i4_num_row_blks;
|
|
if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) &&
|
(ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))
|
{
|
for(vert_ctr = 0; vert_ctr < vert_ctr_blks; vert_ctr++)
|
{
|
ihevce_ed_ctb_l1_t *ps_ed_ctb_row_l1 =
|
ps_ctxt->ps_ed_ctb_l1 + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
|
|
for(ctb_ctr = 0; ctb_ctr < ctb_ctr_blks; ctb_ctr++)
|
{
|
ihevce_ed_ctb_l1_t *ps_ed_ctb_curr_l1 = ps_ed_ctb_row_l1 + ctb_ctr;
|
for(i = 0; i < 16; i++)
|
{
|
ps_ed_ctb_curr_l1->i4_best_sad_cost_8x8_l1_ipe[i] = 0x7fffffff;
|
ps_ed_ctb_curr_l1->i4_best_sad_8x8_l1_ipe[i] = 0x7fffffff;
|
}
|
}
|
}
|
}
|
}
|
|
#if DISABLE_L2_IPE_IN_PB_L1_IN_B
|
if(((2 == i4_layer_no) && (ps_lap_out_prms->i4_pic_type == IV_I_FRAME ||
|
ps_lap_out_prms->i4_pic_type == IV_IDR_FRAME)) ||
|
((1 == i4_layer_no) &&
|
(ps_lap_out_prms->i4_temporal_lyr_id <= TEMPORAL_LAYER_DISABLE)) ||
|
((IHEVCE_QUALITY_P6 != ps_ctxt->i4_quality_preset) && (0 != i4_layer_no)))
|
#else
|
if((0 != i4_layer_no) &&
|
(1 != ((IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
|
(ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))))
|
#endif
|
{
|
WORD32 i4_num_rows = ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed;
|
|
src_stride = ps_ctxt->as_layers[i4_layer_no].i4_inp_stride;
|
pu1_src = ps_ctxt->as_layers[i4_layer_no].pu1_inp;
|
i4_layer_wd = ps_ctxt->as_layers[i4_layer_no].i4_actual_wd;
|
i4_layer_ht = ps_ctxt->as_layers[i4_layer_no].i4_actual_ht;
|
pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp;
|
dst_stride = ps_ctxt->as_layers[i4_layer_no + 1].i4_inp_stride;
|
block_wd = ps_ctxt->as_layers[i4_layer_no].i4_decomp_blk_wd;
|
block_ht = ps_ctxt->as_layers[i4_layer_no].i4_decomp_blk_ht;
|
num_col_blks = ps_ctxt->as_layers[i4_layer_no].i4_num_col_blks;
|
num_row_blocks = ps_ctxt->as_layers[i4_layer_no].i4_num_row_blks;
|
i4_cu_aligned_pic_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd;
|
i4_cu_aligned_pic_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht;
|
|
/* register ed_ctxt buffer pointer */
|
ps_ed_ctxt = ps_ctxt->ps_ed_ctxt;
|
|
/* initialize ed_ctxt here */
|
/* init is moved here since now allocation is happening for only one instance
|
is allocated. for each layer it is re-used */
|
ps_ed_ctxt->lambda = ps_ctxt->ai4_lambda[i4_layer_no];
|
ps_ed_ctxt->i4_slice_type = ps_ctxt->i4_slice_type;
|
ps_ed_ctxt->level = ps_ctxt->i4_codec_level;
|
if(1 == i4_layer_no)
|
{
|
ps_ed_ctxt->ps_ed_pic = ps_ctxt->ps_layer1_buf;
|
ps_ed_ctxt->ps_ed = ps_ctxt->ps_layer1_buf;
|
ps_ed_ctxt->ps_ed_ctb_l1_pic = ps_ctxt->ps_ed_ctb_l1;
|
ps_ed_ctxt->ps_ed_ctb_l1 = ps_ctxt->ps_ed_ctb_l1;
|
ps_ctxt->ps_layer0_cur_satd = NULL;
|
ps_ctxt->ps_layer0_cur_mean = NULL;
|
}
|
else if(2 == i4_layer_no)
|
{
|
ps_ed_ctxt->ps_ed_pic = ps_ctxt->ps_layer2_buf;
|
ps_ed_ctxt->ps_ed = ps_ctxt->ps_layer2_buf;
|
ps_ed_ctxt->ps_ed_ctb_l1_pic = NULL;
|
ps_ed_ctxt->ps_ed_ctb_l1 = NULL;
|
ps_ctxt->ps_layer0_cur_satd = NULL;
|
ps_ctxt->ps_layer0_cur_mean = NULL;
|
}
|
|
/*Calculate the number of 4x4 blocks in a CTB in that layer*/
|
/*Divide block_wd by 4. 4 to get no of 4x4 blks*/
|
num_4x4_blks_lyr = block_wd >> 2;
|
inc_ctb = num_4x4_blks_lyr * num_4x4_blks_lyr;
|
|
ps_ed = ps_ed_ctxt->ps_ed;
|
ps_ed_ctb_l1 = ps_ed_ctxt->ps_ed_ctb_l1;
|
skip_decomp = 1;
|
skip_pre_intra = 0;
|
for(idx = 0; idx < i4_num_rows; idx++)
|
{
|
WORD32 num_4x4_blks_ctb_y = 0;
|
WORD32 num_4x4_blks_last_ctb_x = 0;
|
|
pu1_wkg_mem = ps_ctxt->pu1_wkg_mem;
|
|
{
|
/* Obtain the current row's details from the job */
|
row_block_no = ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx];
|
ht_offset = row_block_no * block_ht;
|
|
if(row_block_no < (num_row_blocks))
|
{
|
pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp +
|
((block_ht >> 1) * dst_stride * row_block_no);
|
|
if(i4_layer_no == 1 || i4_layer_no == 2)
|
{
|
ps_ed = ps_ed_ctxt->ps_ed + (row_block_no * inc_ctb * (num_col_blks));
|
ps_ed_ctb_l1 = ps_ed_ctxt->ps_ed_ctb_l1 + (row_block_no * num_col_blks);
|
|
ps_ed_ctxt->i4_quality_preset = ps_ctxt->i4_quality_preset;
|
num_4x4_blks_ctb_y = block_ht >> 2;
|
num_4x4_blks_last_ctb_x = block_wd >> 2;
|
|
if(row_block_no == num_row_blocks - 1)
|
{
|
if(i4_layer_ht % block_ht)
|
{
|
num_4x4_blks_ctb_y = ((i4_layer_ht % block_ht) + 3) >> 2;
|
}
|
}
|
|
if(i4_layer_wd % block_wd)
|
{
|
num_4x4_blks_last_ctb_x = ((i4_layer_wd % block_wd) + 3) >> 2;
|
}
|
}
|
|
/* call the row level processing function */
|
ihevce_decomp_pre_intra_process_row(
|
pu1_src,
|
src_stride,
|
pu1_dst,
|
dst_stride,
|
i4_layer_wd,
|
i4_layer_ht,
|
pu1_wkg_mem,
|
ht_offset,
|
block_ht,
|
block_wd,
|
i4_cu_aligned_pic_wd,
|
i4_cu_aligned_pic_ht,
|
num_col_blks,
|
i4_layer_no,
|
ps_ed_ctxt,
|
ps_ed,
|
ps_ed_ctb_l1,
|
ps_ctxt->ps_layer0_cur_satd,
|
ps_ctxt->ps_layer0_cur_mean,
|
num_4x4_blks_ctb_y,
|
num_4x4_blks_last_ctb_x,
|
skip_decomp,
|
skip_pre_intra,
|
row_block_no,
|
0,
|
NULL,
|
&ps_ctxt->s_ipe_optimised_function_list,
|
&ps_ctxt->s_cmn_opt_func);
|
}
|
}
|
if(1 == i4_layer_no)
|
{
|
ps_multi_thrd->aai4_l1_pre_intra_done[i4_ping_pong][row_block_no] = 1;
|
}
|
}
|
for(idx = 0; idx < MAX_NUM_CTB_ROWS_FRM; idx++)
|
{
|
ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = -1;
|
}
|
ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = 0;
|
}
|
|
#if DISABLE_L2_IPE_IN_PB_L1_IN_B
|
if((IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
|
(((i4_layer_no == 2) && (ps_lap_out_prms->i4_pic_type == ISLICE)) ||
|
((i4_layer_no == 1) && (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))))
|
{
|
WORD32 i4_num_rows = ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed;
|
if(1 == i4_layer_no)
|
{
|
for(idx = 0; idx < i4_num_rows; idx++)
|
{
|
row_block_no = ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx];
|
|
{
|
ps_multi_thrd->aai4_l1_pre_intra_done[i4_ping_pong][row_block_no] = 1;
|
}
|
}
|
}
|
for(idx = 0; idx < MAX_NUM_CTB_ROWS_FRM; idx++)
|
{
|
ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = -1;
|
}
|
ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = 0;
|
}
|
#else
|
if((i4_layer_no != 0) && ((IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
|
(ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE)))
|
{
|
WORD32 i4_num_rows = ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed;
|
for(idx = 0; idx < i4_num_rows; idx++)
|
{
|
row_block_no = ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx];
|
if(1 == i4_layer_no)
|
{
|
ps_multi_thrd->aai4_l1_pre_intra_done[i4_ping_pong][row_block_no] = 1;
|
}
|
}
|
for(idx = 0; idx < MAX_NUM_CTB_ROWS_FRM; idx++)
|
{
|
ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = -1;
|
}
|
ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = 0;
|
}
|
#endif
|
}
|
}
|
|
/*!
|
************************************************************************
|
* \brief
|
* return number of records used by decomp pre intra
|
*
|
************************************************************************
|
*/
|
WORD32 ihevce_decomp_pre_intra_get_num_mem_recs(void)
|
{
|
return (NUM_DECOMP_PRE_INTRA_MEM_RECS);
|
}
|
|
/*!
|
************************************************************************
|
* @brief
|
* return each record attributes of decomp pre intra
|
************************************************************************
|
*/
|
WORD32 ihevce_decomp_pre_intra_get_mem_recs(
|
iv_mem_rec_t *ps_mem_tab, WORD32 i4_num_proc_thrds, WORD32 i4_mem_space)
|
{
|
/* memories should be requested assuming worst case requirememnts */
|
|
/* Module context structure */
|
ps_mem_tab[DECOMP_PRE_INTRA_CTXT].i4_mem_size = sizeof(ihevce_decomp_pre_intra_master_ctxt_t);
|
ps_mem_tab[DECOMP_PRE_INTRA_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
|
ps_mem_tab[DECOMP_PRE_INTRA_CTXT].i4_mem_alignment = 8;
|
|
/* Thread context structure */
|
ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].i4_mem_size =
|
i4_num_proc_thrds * sizeof(ihevce_decomp_pre_intra_ctxt_t);
|
ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
|
ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].i4_mem_alignment = 8;
|
|
/* early decision context structure */
|
ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].i4_mem_size = i4_num_proc_thrds * sizeof(ihevce_ed_ctxt_t);
|
ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
|
ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].i4_mem_alignment = 8;
|
|
return (NUM_DECOMP_PRE_INTRA_MEM_RECS);
|
}
|
|
/*!
|
************************************************************************
|
* @brief
|
* Init decomp pre intra context
|
************************************************************************
|
*/
|
void *ihevce_decomp_pre_intra_init(
|
iv_mem_rec_t *ps_mem_tab,
|
ihevce_static_cfg_params_t *ps_init_prms,
|
WORD32 i4_num_proc_thrds,
|
func_selector_t *ps_func_selector,
|
WORD32 i4_resolution_id,
|
UWORD8 u1_is_popcnt_available)
|
{
|
ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt;
|
ihevce_decomp_pre_intra_ctxt_t *ps_ctxt;
|
WORD32 thread_no;
|
WORD32 n_tot_layers;
|
WORD32 count;
|
WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS], layer_no;
|
WORD32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
|
ihevce_ed_ctxt_t *ps_ed_ctxt;
|
WORD32 min_cu_size;
|
|
/* get the min cu size from config params */
|
min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
|
|
min_cu_size = 1 << min_cu_size;
|
|
/* Get the height and width of each layer */
|
*a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
|
SET_CTB_ALIGN(
|
ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
|
*a_ht =
|
ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
|
SET_CTB_ALIGN(
|
ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
|
|
n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht);
|
|
/* Decomp state structure */
|
ps_master_ctxt =
|
(ihevce_decomp_pre_intra_master_ctxt_t *)ps_mem_tab[DECOMP_PRE_INTRA_CTXT].pv_base;
|
ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds;
|
|
ps_ctxt = (ihevce_decomp_pre_intra_ctxt_t *)ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].pv_base;
|
ps_ed_ctxt = (ihevce_ed_ctxt_t *)ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].pv_base;
|
|
for(thread_no = 0; thread_no < ps_master_ctxt->i4_num_proc_thrds; thread_no++)
|
{
|
ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thread_no] = ps_ctxt;
|
|
ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thread_no]->i4_num_layers = n_tot_layers;
|
|
ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thread_no]->pu1_wkg_mem =
|
&ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thread_no]->au1_wkg_mem[0];
|
|
ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thread_no]->ps_ed_ctxt = ps_ed_ctxt;
|
|
for(layer_no = 0; layer_no < n_tot_layers; layer_no++)
|
{
|
WORD32 max_ctb_size;
|
WORD32 decomp_blk_ht, decomp_blk_wd;
|
|
ps_ctxt->as_layers[layer_no].i4_actual_wd = a_wd[layer_no];
|
ps_ctxt->as_layers[layer_no].i4_actual_ht = a_ht[layer_no];
|
ps_ctxt->as_layers[layer_no].i4_inp_stride = 0;
|
ps_ctxt->as_layers[layer_no].pu1_inp = NULL;
|
ps_ctxt->as_layers[layer_no].i4_num_rows_processed = 0;
|
|
for(count = 0; count < MAX_NUM_CTB_ROWS_FRM; count++)
|
{
|
ps_ctxt->as_layers[layer_no].ai4_curr_row_no[count] = -1;
|
}
|
if(0 == layer_no)
|
{
|
ps_ctxt->as_layers[layer_no].i4_padded_ht = a_ht[layer_no];
|
ps_ctxt->as_layers[layer_no].i4_padded_wd = a_wd[layer_no];
|
}
|
else
|
{
|
ps_ctxt->as_layers[layer_no].i4_padded_ht = a_ht[layer_no] + 32 + 4;
|
ps_ctxt->as_layers[layer_no].i4_padded_wd = a_wd[layer_no] + 32 + 4;
|
}
|
|
/** If CTB size= 64.decomp_blk_wd = 64 for L0, 32 for L1 , 16 for L2, 8 for L3 */
|
max_ctb_size = 1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size;
|
|
ps_ctxt->as_layers[layer_no].i4_decomp_blk_ht = max_ctb_size >> layer_no;
|
ps_ctxt->as_layers[layer_no].i4_decomp_blk_wd = max_ctb_size >> layer_no;
|
|
decomp_blk_ht = ps_ctxt->as_layers[layer_no].i4_decomp_blk_ht;
|
decomp_blk_wd = ps_ctxt->as_layers[layer_no].i4_decomp_blk_wd;
|
|
ps_ctxt->as_layers[layer_no].i4_num_row_blks =
|
((a_ht[layer_no] + (decomp_blk_ht - 1)) / decomp_blk_ht);
|
|
ps_ctxt->as_layers[layer_no].i4_num_col_blks =
|
((a_wd[layer_no] + (decomp_blk_wd - 1)) / decomp_blk_wd);
|
}
|
ps_ed_ctxt->ps_func_selector = ps_func_selector;
|
|
ps_ctxt->i4_quality_preset =
|
ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
|
|
if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7)
|
{
|
ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6;
|
}
|
|
if(ps_init_prms->s_coding_tools_prms.i4_vqet &
|
(1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER))
|
{
|
if(ps_init_prms->s_coding_tools_prms.i4_vqet &
|
(1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION))
|
{
|
ps_ctxt->i4_enable_noise_detection = 1;
|
}
|
else
|
{
|
ps_ctxt->i4_enable_noise_detection = 0;
|
}
|
}
|
else
|
{
|
ps_ctxt->i4_enable_noise_detection = 0;
|
}
|
|
ihevce_cmn_utils_instr_set_router(
|
&ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
|
|
ihevce_ipe_instr_set_router(
|
&ps_ctxt->s_ipe_optimised_function_list, ps_init_prms->e_arch_type);
|
|
ps_ctxt++;
|
ps_ed_ctxt++;
|
}
|
/* return the handle to caller */
|
return ((void *)ps_master_ctxt);
|
}
|
|
/*!
|
******************************************************************************
|
* \if Function name : ihevce_decomp_pre_intra_frame_init \endif
|
*
|
* \brief
|
* Frame Intialization for Decomp intra pre analysis.
|
*
|
* \param[in] pv_ctxt : pointer to module ctxt
|
* \param[in] ppu1_decomp_lyr_bufs : pointer to array of layer buffer pointers
|
* \param[in] pi4_lyr_buf_stride : pointer to array of layer buffer strides
|
*
|
* \return
|
* None
|
*
|
* \author
|
* Ittiam
|
*
|
*****************************************************************************
|
*/
|
void ihevce_decomp_pre_intra_frame_init(
|
void *pv_ctxt,
|
UWORD8 **ppu1_decomp_lyr_bufs,
|
WORD32 *pi4_lyr_buf_stride,
|
ihevce_ed_blk_t *ps_layer1_buf,
|
ihevce_ed_blk_t *ps_layer2_buf,
|
ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
|
WORD32 i4_ol_sad_lambda_qf,
|
WORD32 i4_slice_type,
|
ctb_analyse_t *ps_ctb_analyse)
|
{
|
ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt;
|
ihevce_decomp_pre_intra_ctxt_t *ps_ctxt;
|
WORD32 thread_no;
|
|
/* Decomp state structure */
|
ps_master_ctxt = (ihevce_decomp_pre_intra_master_ctxt_t *)pv_ctxt;
|
|
for(thread_no = 0; thread_no < ps_master_ctxt->i4_num_proc_thrds; thread_no++)
|
{
|
WORD32 layer_no;
|
|
ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thread_no];
|
|
/* L0 layer (actual input) is registered in process call */
|
for(layer_no = 1; layer_no < ps_ctxt->i4_num_layers; layer_no++)
|
{
|
ps_ctxt->as_layers[layer_no].i4_inp_stride = pi4_lyr_buf_stride[layer_no - 1];
|
ps_ctxt->as_layers[layer_no].pu1_inp = ppu1_decomp_lyr_bufs[layer_no - 1];
|
|
/*Populating the buffer pointers for layer1 and layer2 buffers to store the
|
structure for each 4x4 block after pre intra analysis on their respective laeyrs*/
|
|
if(layer_no == 1)
|
{
|
WORD32 sad_lambda_l1 = (3 * i4_ol_sad_lambda_qf >> 2);
|
WORD32 temp = 1 << LAMBDA_Q_SHIFT;
|
WORD32 lambda = ((temp) > sad_lambda_l1) ? temp : sad_lambda_l1;
|
//ps_ctxt->as_layers[1].s_early_decision.ps_ed_pic = ps_layer1_buf;
|
//ps_ctxt->as_layers[1].s_early_decision.ps_ed = ps_layer1_buf;
|
ps_ctxt->ps_layer1_buf = ps_layer1_buf;
|
ps_ctxt->ps_ed_ctb_l1 = ps_ed_ctb_l1;
|
ps_ctxt->ai4_lambda[layer_no] = lambda;
|
ps_ctxt->i4_codec_level = 0;
|
ps_ctxt->i4_slice_type = i4_slice_type;
|
}
|
else if(layer_no == 2)
|
{
|
WORD32 sad_lambda_l2 = i4_ol_sad_lambda_qf >> 1;
|
WORD32 temp = 1 << LAMBDA_Q_SHIFT;
|
WORD32 lambda = ((temp) > sad_lambda_l2) ? temp : sad_lambda_l2;
|
|
//ps_ctxt->as_layers[2].s_early_decision.ps_ed_pic = ps_layer2_buf;
|
//ps_ctxt->as_layers[2].s_early_decision.ps_ed = ps_layer2_buf;
|
ps_ctxt->ps_layer2_buf = ps_layer2_buf;
|
//ihevce_ed_frame_init(ps_ctxt->ps_ed_ctxt);
|
ps_ctxt->ai4_lambda[layer_no] = lambda;
|
ps_ctxt->i4_codec_level = 0;
|
ps_ctxt->i4_slice_type = i4_slice_type;
|
}
|
else
|
{
|
//ps_ctxt->as_layers[0].s_early_decision.ps_ed_pic = NULL;
|
//ps_ctxt->as_layers[0].s_early_decision.ps_ed = NULL;
|
//ps_ctxt->ps_layer1_buf = NULL;
|
ps_ctxt->ai4_lambda[layer_no] = -1;
|
ps_ctxt->i4_codec_level = 0;
|
ps_ctxt->i4_slice_type = i4_slice_type;
|
}
|
}
|
|
/* make the ps_ctb_analyse refernce as a part of the private context */
|
ps_ctxt->ps_ctb_analyse = ps_ctb_analyse;
|
}
|
}
|
|
/**
|
*******************************************************************************
|
*
|
* @brief
|
* Merge Sort function.
|
*
|
* @par Description:
|
* This function sorts the data in the input array in ascending
|
* order using merge sort algorithm. Intermediate data obtained in
|
* merge sort are stored in output 2-D array.
|
*
|
* @param[in]
|
* pi4_input_val : Input 1-D array
|
* aai4_output_val: Output 2-D array containing elements sorted in sets of
|
* 4,16,64 etc.
|
* i4_length : length of the array
|
* i4_ip_sort_level: Input sort level. Specifies the level upto which array is sorted.
|
* It should be 1 if the array is unsorted. Should be 4 if array is sorted
|
* in sets of 4.
|
* i4_op_sort_level: Output sort level. Specify the level upto which sorting is required.
|
* If it is given as length of array it sorts for whole array.
|
*
|
* @returns
|
*
|
* @remarks
|
* None
|
*
|
*******************************************************************************
|
*/
|
void ihevce_merge_sort(
|
WORD32 *pi4_input_val,
|
WORD32 aai4_output_val[][64],
|
WORD32 i4_length,
|
WORD32 i4_ip_sort_level,
|
WORD32 i4_op_sort_level)
|
{
|
WORD32 i, j, k;
|
WORD32 count, level;
|
WORD32 temp[64];
|
WORD32 *pi4_temp_buf_cpy;
|
WORD32 *pi4_temp = &temp[0];
|
WORD32 calc_level;
|
|
pi4_temp_buf_cpy = pi4_temp;
|
|
GETRANGE(calc_level, i4_op_sort_level / i4_ip_sort_level);
|
|
calc_level = calc_level - 1;
|
|
/*** This function is written under the assumption that we need only intermediate values of
|
sort in the range of 4,16,64 etc. ***/
|
ASSERT((calc_level % 2) == 0);
|
|
/** One iteration of this for loop does 1 sets of sort and produces one intermediate value in 2 iterations **/
|
for(level = 0; level < calc_level; level++)
|
{
|
/** Merges adjacent sets of elements based on current sort level **/
|
for(count = 0; count < i4_length; (count = count + (i4_ip_sort_level * 2)))
|
{
|
i = 0;
|
j = 0;
|
if(pi4_input_val[i4_ip_sort_level - 1] < pi4_input_val[i4_ip_sort_level])
|
{
|
/*** Condition for early exit ***/
|
memcpy(&pi4_temp[0], pi4_input_val, sizeof(WORD32) * i4_ip_sort_level * 2);
|
}
|
else
|
{
|
for(k = 0; k < (i4_ip_sort_level * 2); k++)
|
{
|
if((i < i4_ip_sort_level) && (j < i4_ip_sort_level))
|
{
|
if(pi4_input_val[i] > pi4_input_val[j + i4_ip_sort_level])
|
{
|
/** copy to output array **/
|
pi4_temp[k] = pi4_input_val[j + i4_ip_sort_level];
|
j++;
|
}
|
else
|
{
|
/** copy to output array **/
|
pi4_temp[k] = pi4_input_val[i];
|
i++;
|
}
|
}
|
else if(i == i4_ip_sort_level)
|
{
|
/** copy the remaining data to output array **/
|
pi4_temp[k] = pi4_input_val[j + i4_ip_sort_level];
|
j++;
|
}
|
else
|
{
|
/** copy the remaining data to output array **/
|
pi4_temp[k] = pi4_input_val[i];
|
i++;
|
}
|
}
|
}
|
pi4_input_val += (i4_ip_sort_level * 2);
|
pi4_temp += (i4_ip_sort_level * 2);
|
}
|
pi4_input_val = pi4_temp - i4_length;
|
|
if(level % 2)
|
{
|
/** Assign a temp address for storing next sort level output as we will not need this data as output **/
|
pi4_temp = pi4_temp_buf_cpy;
|
}
|
else
|
{
|
/** Assign address for storing the intermediate data into output 2-D array **/
|
pi4_temp = aai4_output_val[level / 2];
|
}
|
i4_ip_sort_level *= 2;
|
}
|
}
|
|
void ihevce_decomp_pre_intra_curr_frame_pre_intra_deinit(
|
void *pv_pre_intra_ctxt,
|
pre_enc_me_ctxt_t *ps_curr_out,
|
WORD32 i4_is_last_thread,
|
frm_ctb_ctxt_t *ps_frm_ctb_prms,
|
WORD32 i4_temporal_lyr_id,
|
WORD32 i4_enable_noise_detection)
|
{
|
ihevce_decomp_pre_intra_master_ctxt_t *ps_pre_intra_master_ctxt =
|
(ihevce_decomp_pre_intra_master_ctxt_t *)pv_pre_intra_ctxt;
|
ihevce_decomp_pre_intra_ctxt_t *ps_pre_intra_ctxt =
|
ps_pre_intra_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[0];
|
|
WORD32 i4_k;
|
WORD32 ctb_ctr, vert_ctr;
|
|
WORD32 ai4_curr_frame_8x8_sum_act[2] = { 0, 0 };
|
LWORD64 ai8_curr_frame_8x8_sum_act_sqr[2] = { 0, 0 };
|
WORD32 ai4_curr_frame_8x8_sum_blks[2] = { 0, 0 };
|
ULWORD64 u8_curr_frame_8x8_sum_act_sqr = 0;
|
|
LWORD64 ai8_curr_frame_16x16_sum_act_sqr[3] = { 0, 0, 0 };
|
WORD32 ai4_curr_frame_16x16_sum_act[3] = { 0, 0, 0 };
|
WORD32 ai4_curr_frame_16x16_sum_blks[3] = { 0, 0, 0 };
|
|
LWORD64 ai8_curr_frame_32x32_sum_act_sqr[3] = { 0, 0, 0 };
|
WORD32 ai4_curr_frame_32x32_sum_act[3] = { 0, 0, 0 };
|
WORD32 ai4_curr_frame_32x32_sum_blks[3] = { 0, 0, 0 };
|
|
(void)i4_temporal_lyr_id;
|
(void)i4_enable_noise_detection;
|
|
if(i4_is_last_thread == 1)
|
{
|
WORD32 i4_slice_type = ps_curr_out->s_slice_hdr.i1_slice_type;
|
//ps_pre_intra_ctxt->i4_slice_type;
|
WORD32 ctb_ctr_blks = ps_pre_intra_ctxt->as_layers[1].i4_num_col_blks;
|
WORD32 vert_ctr_blks = ps_pre_intra_ctxt->as_layers[1].i4_num_row_blks;
|
ihevce_ed_ctb_l1_t *ps_ed_ctb_pic_l1 = ps_curr_out->ps_ed_ctb_l1;
|
WORD32 block_wd = ps_pre_intra_ctxt->as_layers[1].i4_decomp_blk_wd;
|
WORD32 inc_ctb = ((block_wd >> 2) * (block_wd >> 2));
|
ihevce_ed_blk_t *ps_ed_blk_l1 = ps_curr_out->ps_layer1_buf;
|
ihevce_ed_blk_t *ps_ed;
|
WORD32 i, j;
|
WORD32 i4_avg_noise_satd;
|
WORD32 k;
|
WORD32 i4_layer_wd = ps_pre_intra_ctxt->as_layers[1].i4_actual_wd;
|
WORD32 i4_layer_ht = ps_pre_intra_ctxt->as_layers[1].i4_actual_ht;
|
|
/*Calculate min noise threshold */
|
/*Min noise threshold is calculted by taking average of lowest 1% satd val in the complete 4x4 frame satds*/
|
//ihevce_ed_ctxt_t *ps_ed_ctxt = ps_pre_intra_ctxt->ps_ed_ctxt;
|
WORD32 i4_min_blk = ((MIN_BLKS * (i4_layer_wd >> 1) * (i4_layer_ht >> 1)) / 100);
|
WORD32 ai4_noise_thr_hstrgm[MAX_SATD_THRSHLD];
|
memset(&ai4_noise_thr_hstrgm[0], 0, (sizeof(WORD32) * MAX_SATD_THRSHLD));
|
ASSERT(!(USE_CUR_L0_SATD && USE_CUR_SATD));
|
for(vert_ctr = 0; vert_ctr < vert_ctr_blks; vert_ctr++)
|
{
|
ps_ed = ps_ed_blk_l1 + (vert_ctr * inc_ctb * (ctb_ctr_blks));
|
for(ctb_ctr = 0; ctb_ctr < ctb_ctr_blks; ctb_ctr++)
|
{
|
/* Populate avg satd to calculate MI and activity factors */
|
for(i = 0; i < 4; i++)
|
{
|
for(j = 0; j < 4; j++)
|
{
|
for(k = 0; k < 4; k++)
|
{
|
if(-1 != (ps_ed + j * 4 + i * 16 + k)->i4_4x4_satd)
|
{
|
WORD32 i4_satd_lim;
|
i4_satd_lim = (ps_ed + j * 4 + i * 16 + k)->i4_4x4_satd;
|
/* Histogram creation for Noise threshold */
|
if(i4_satd_lim < MAX_SATD_THRSHLD)
|
{
|
ai4_noise_thr_hstrgm[i4_satd_lim]++;
|
}
|
}
|
}
|
}
|
}
|
ps_ed += inc_ctb;
|
}
|
}
|
{
|
WORD32 i4_total_blks = 0;
|
LWORD64 i8_acc_satd = 0;
|
for(i = MIN_SATD_THRSHLD; i < MAX_SATD_THRSHLD; i++)
|
{
|
i4_total_blks += ai4_noise_thr_hstrgm[i];
|
i8_acc_satd += (i * ai4_noise_thr_hstrgm[i]);
|
|
if(i4_total_blks > i4_min_blk)
|
break;
|
}
|
if(i4_total_blks < i4_min_blk)
|
{
|
i4_avg_noise_satd = SATD_NOISE_FLOOR_THRESHOLD;
|
}
|
else
|
{
|
i4_avg_noise_satd = (WORD32)(i8_acc_satd + (i4_total_blks >> 1)) / i4_total_blks;
|
}
|
}
|
|
ps_curr_out->i4_avg_noise_thrshld_4x4 = i4_avg_noise_satd;
|
|
for(vert_ctr = 0; vert_ctr < vert_ctr_blks; vert_ctr++)
|
{
|
ihevce_ed_ctb_l1_t *ps_ed_ctb_row_l1 =
|
ps_ed_ctb_pic_l1 + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
|
ps_ed = ps_ed_blk_l1 + (vert_ctr * inc_ctb * (ctb_ctr_blks));
|
|
for(ctb_ctr = 0; ctb_ctr < ctb_ctr_blks; ctb_ctr++)
|
{
|
/*sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16 level */
|
WORD32 ai4_sum_sum_4x4_satd_16x16[4] = { 0, 0, 0, 0 };
|
/*min of (sum of L1_4x4 @ L1_8x8) @ L1_16x16 level */
|
WORD32 ai4_min_sum_4x4_satd_16x16[4] = {
|
MAX_32BIT_VAL, MAX_32BIT_VAL, MAX_32BIT_VAL, MAX_32BIT_VAL
|
};
|
/*min of (min of L1_4x4 @ L1_8x8) @ L1_16x16 level */
|
WORD32 ai4_min_min_4x4_satd_16x16[4] = {
|
MAX_32BIT_VAL, MAX_32BIT_VAL, MAX_32BIT_VAL, MAX_32BIT_VAL
|
};
|
WORD32 i4_sum_4x4_satd, i4_min_4x4_satd;
|
ihevce_ed_ctb_l1_t *ps_ed_ctb_curr_l1 = ps_ed_ctb_row_l1 + ctb_ctr;
|
|
WORD32 is_min_block_uncompensated_in_l32x32 = 0;
|
|
/*min of L1_4x4 @ L1_8x8*/
|
WORD32 ai4_min_satd_ctb[MAX_CTB_SIZE];
|
/*** This 2-D array will contain 4x4 satds sorted in ascending order in sets of 4,16,64 ***/
|
/*** For example : '5 10 2 7 6 12 3 1' array input will return '2 5 7 10 1 3 6 12' if sorted in sets of 4 ***/
|
WORD32 aai4_min_4_16_64_satd[3][MAX_CTB_SIZE];
|
|
/*sum of L1_4x4 @ L1_8x8*/
|
WORD32 ai4_sum_satd_ctb[MAX_CTB_SIZE >> 2];
|
/*** This 2-D array will contain 4x4 satds sorted in ascending order in sets of 4,16***/
|
WORD32 aai4_sum_4_16_satd_ctb[2][MAX_CTB_SIZE];
|
|
/* sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16 */
|
WORD32 ai4_sum_sum_satd_ctb[(MAX_CTB_SIZE >> 2) >> 2];
|
/*L1_32x32 = L0_64x64
|
so in L1_32x32 there are 64 L1_4x4blocks*/
|
for(i = 0; i < MAX_CTB_SIZE; i++)
|
{
|
ai4_min_satd_ctb[i] = -1;
|
}
|
for(j = 0; j < 3; j++)
|
{
|
for(i = 0; i < MAX_CTB_SIZE; i++)
|
{
|
aai4_min_4_16_64_satd[j][i] = -1;
|
}
|
}
|
/*L1_32x32 = L0_64x64
|
so in L1_32x32 there are 16 L1_8x8blocks*/
|
for(i = 0; i < (MAX_CTB_SIZE >> 2); i++)
|
{
|
ai4_sum_satd_ctb[i] = -1;
|
}
|
for(j = 0; j < 2; j++)
|
{
|
for(i = 0; i < (MAX_CTB_SIZE >> 2); i++)
|
{
|
aai4_sum_4_16_satd_ctb[j][i] = -1;
|
}
|
}
|
/*L1_32x32 = L0_64x64
|
so in L1_32x32 there are 16 L1_16x16blocks*/
|
for(i = 0; i < ((MAX_CTB_SIZE >> 2) >> 2); i++)
|
{
|
ai4_sum_sum_satd_ctb[i] = 0;
|
}
|
/*Populate sum min 4x4 activty */
|
/*loop for L1_32x32 block*/
|
for(i = 0; i < 4; i++)
|
{
|
/*loop for L1_16x16 block*/
|
for(j = 0; j < 4; j++)
|
{
|
WORD32 i4_sum_satd_dumyy = 0;
|
WORD32 i4_num_satd_blks = 0;
|
/* loop for L1_8x8 block*/
|
for(k = 0; k < 4; k++)
|
{
|
WORD32 i4_satd_lim;
|
i4_satd_lim = (ps_ed + j * 4 + i * 16 + k)->i4_4x4_satd;
|
|
/*complete ctb will not have i4_4x4_satd = -1*/
|
if(-1 != i4_satd_lim)
|
{
|
#if SUB_NOISE_THRSHLD
|
i4_satd_lim = i4_satd_lim - i4_avg_noise_satd;
|
if(i4_satd_lim < 0)
|
{
|
i4_satd_lim = 0;
|
}
|
#else
|
if(i4_satd_lim < i4_avg_noise_satd)
|
{
|
i4_satd_lim = i4_avg_noise_satd;
|
}
|
#endif
|
i4_num_satd_blks++;
|
/*populate 4x4 data to calculate modulation index */
|
(ps_ed + j * 4 + i * 16 + k)->i4_4x4_satd = i4_satd_lim;
|
|
i4_sum_satd_dumyy += i4_satd_lim;
|
ai4_min_satd_ctb[j * 4 + i * 16 + k] = i4_satd_lim;
|
}
|
}
|
if(i4_num_satd_blks != 0)
|
{
|
/*make the sum of satd always for 4 blocks even it is incomplete ctb */
|
i4_sum_satd_dumyy = i4_sum_satd_dumyy * 4 / i4_num_satd_blks;
|
}
|
else
|
{
|
i4_sum_satd_dumyy = -1;
|
}
|
/*sum of L1_4x4 @ L1_8x8block level*/
|
ai4_sum_satd_ctb[j + i * 4] = i4_sum_satd_dumyy;
|
/*sum of L1_8x8 @ L1_16x16block level*/
|
ai4_sum_sum_satd_ctb[i] += i4_sum_satd_dumyy;
|
/*store sum of 4x4 @ L1_8x8block level*/
|
ps_ed_ctb_curr_l1->i4_sum_4x4_satd[i * 4 + j] = i4_sum_satd_dumyy;
|
/*store min of 4x4 @ L1_8x8block level */
|
//ps_ed_ctb_curr_l1->i4_min_4x4_satd[i * 4 + j] = i4_min_satd_dumyy;
|
}
|
}
|
{
|
WORD32 i4_array_length = sizeof(ai4_min_satd_ctb) / sizeof(WORD32);
|
|
/*** This function will sort 64 elements in array ai4_min_satd_ctb in ascending order to ***/
|
/*** 3 arrays in sets of 4,16,64 into the 2-D array aai4_min_4_16_64_satd ***/
|
ihevce_merge_sort(
|
&ai4_min_satd_ctb[0], aai4_min_4_16_64_satd, i4_array_length, 1, 64);
|
|
i4_array_length = sizeof(ai4_sum_satd_ctb) / sizeof(WORD32);
|
|
/*** This function will sort 16 elements in array ai4_sum_satd_ctb in ascending order to ***/
|
/*** 2 arrays in sets of 4,16 into the 2-D array aai4_sum_4_16_satd_ctb ***/
|
ihevce_merge_sort(
|
&ai4_sum_satd_ctb[0], aai4_sum_4_16_satd_ctb, i4_array_length, 1, 16);
|
}
|
|
/*Populate avg satd to calculate MI and activity factors*/
|
for(i = 0; i < 4; i++)
|
{
|
WORD32 is_min_block_uncompensated_in_l116x16 = 0;
|
ps_ed_ctb_curr_l1->i4_16x16_satd[i][0] = -1;
|
ps_ed_ctb_curr_l1->i4_16x16_satd[i][1] = -1;
|
ps_ed_ctb_curr_l1->i4_16x16_satd[i][2] = -1;
|
|
for(j = 0; j < 4; j++)
|
{
|
ps_ed_ctb_curr_l1->i4_min_4x4_satd[i * 4 + j] =
|
aai4_min_4_16_64_satd[0][i * 16 + j * 4 + MEDIAN_CU_TU];
|
/*Accumulate the sum of 8*8 activities in the current layer (16*16 CU in L0)*/
|
i4_sum_4x4_satd = ps_ed_ctb_curr_l1->i4_sum_4x4_satd[i * 4 + j];
|
i4_min_4x4_satd = ps_ed_ctb_curr_l1->i4_min_4x4_satd[i * 4 + j];
|
ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][0] = -1;
|
ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][1] = -1;
|
ASSERT(-2 != i4_sum_4x4_satd);
|
|
if((-1 != i4_sum_4x4_satd))
|
{
|
WORD32 not_skipped = 1;
|
|
if((i4_slice_type == ISLICE) || (1 == not_skipped))
|
{
|
is_min_block_uncompensated_in_l116x16 = 1;
|
is_min_block_uncompensated_in_l32x32 = 1;
|
|
u8_curr_frame_8x8_sum_act_sqr +=
|
(i4_sum_4x4_satd * i4_sum_4x4_satd);
|
|
ai4_curr_frame_8x8_sum_act[0] += i4_sum_4x4_satd;
|
ai8_curr_frame_8x8_sum_act_sqr[0] +=
|
(i4_sum_4x4_satd * i4_sum_4x4_satd);
|
ai4_curr_frame_8x8_sum_blks[0] += 1;
|
ai4_curr_frame_8x8_sum_act[1] += i4_min_4x4_satd;
|
ai8_curr_frame_8x8_sum_act_sqr[1] +=
|
(i4_min_4x4_satd * i4_min_4x4_satd);
|
ai4_curr_frame_8x8_sum_blks[1] += 1;
|
}
|
|
ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][0] = i4_sum_4x4_satd;
|
ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][1] = i4_min_4x4_satd;
|
}
|
else
|
{
|
ai4_sum_sum_4x4_satd_16x16[i] = MAX_32BIT_VAL;
|
ai4_min_sum_4x4_satd_16x16[i] = MAX_32BIT_VAL;
|
ai4_min_min_4x4_satd_16x16[i] = MAX_32BIT_VAL;
|
}
|
}
|
|
//if(1 == is_min_block_comensated_in_l116x16)
|
{
|
ai4_min_sum_4x4_satd_16x16[i] =
|
aai4_sum_4_16_satd_ctb[0][i * 4 + MEDIAN_CU_TU];
|
ai4_min_min_4x4_satd_16x16[i] =
|
aai4_min_4_16_64_satd[1][i * 16 + MEDIAN_CU_TU_BY_2];
|
|
if(ai4_sum_sum_4x4_satd_16x16[i] != MAX_32BIT_VAL)
|
{
|
ai4_sum_sum_4x4_satd_16x16[i] = 0;
|
for(j = 0; j < 4; j++)
|
{
|
ai4_sum_sum_4x4_satd_16x16[i] +=
|
ps_ed_ctb_curr_l1->i4_sum_4x4_satd[i * 4 + j];
|
}
|
ps_ed_ctb_curr_l1->i4_16x16_satd[i][0] = ai4_sum_sum_4x4_satd_16x16[i];
|
ps_ed_ctb_curr_l1->i4_16x16_satd[i][1] = ai4_min_sum_4x4_satd_16x16[i];
|
ps_ed_ctb_curr_l1->i4_16x16_satd[i][2] = ai4_min_min_4x4_satd_16x16[i];
|
}
|
}
|
if(1 == is_min_block_uncompensated_in_l116x16)
|
{
|
if(MAX_32BIT_VAL != ai4_sum_sum_4x4_satd_16x16[i])
|
{
|
ai4_curr_frame_16x16_sum_act[0] += ai4_sum_sum_4x4_satd_16x16[i];
|
ai8_curr_frame_16x16_sum_act_sqr[0] +=
|
(ai4_sum_sum_4x4_satd_16x16[i] * ai4_sum_sum_4x4_satd_16x16[i]);
|
ai4_curr_frame_16x16_sum_blks[0] += 1;
|
}
|
if(MAX_32BIT_VAL != ai4_min_sum_4x4_satd_16x16[i])
|
{
|
ai4_curr_frame_16x16_sum_act[1] += ai4_min_sum_4x4_satd_16x16[i];
|
ai8_curr_frame_16x16_sum_act_sqr[1] +=
|
(ai4_min_sum_4x4_satd_16x16[i] * ai4_min_sum_4x4_satd_16x16[i]);
|
ai4_curr_frame_16x16_sum_blks[1] += 1;
|
ai4_curr_frame_16x16_sum_act[2] += ai4_min_min_4x4_satd_16x16[i];
|
ai8_curr_frame_16x16_sum_act_sqr[2] +=
|
(ai4_min_min_4x4_satd_16x16[i] * ai4_min_min_4x4_satd_16x16[i]);
|
ai4_curr_frame_16x16_sum_blks[2] += 1;
|
}
|
}
|
}
|
/*32x32*/
|
{
|
ps_ed_ctb_curr_l1->i4_32x32_satd[0][0] = -1;
|
ps_ed_ctb_curr_l1->i4_32x32_satd[0][1] = -1;
|
ps_ed_ctb_curr_l1->i4_32x32_satd[0][2] = -1;
|
ps_ed_ctb_curr_l1->i4_32x32_satd[0][3] = -1;
|
|
if((MAX_32BIT_VAL != ai4_sum_sum_4x4_satd_16x16[0]) ||
|
(MAX_32BIT_VAL != ai4_sum_sum_4x4_satd_16x16[2]) ||
|
(MAX_32BIT_VAL != ai4_sum_sum_4x4_satd_16x16[1]) ||
|
(MAX_32BIT_VAL != ai4_sum_sum_4x4_satd_16x16[3]))
|
{
|
//if(1 == is_min_block_comensated_in_l32x32)
|
{
|
{
|
WORD32 aai4_min_sum_sum_4x4_satd_16x16[1][64];
|
WORD32 i4_array_length =
|
sizeof(ai4_sum_sum_4x4_satd_16x16) / sizeof(WORD32);
|
/*** Sort 4 elements in ascending order ***/
|
ihevce_merge_sort(
|
&ai4_sum_sum_4x4_satd_16x16[0],
|
aai4_min_sum_sum_4x4_satd_16x16,
|
i4_array_length,
|
1,
|
4);
|
|
ps_ed_ctb_curr_l1->i4_32x32_satd[0][0] =
|
aai4_min_sum_sum_4x4_satd_16x16[0][MEDIAN_CU_TU];
|
}
|
{
|
ps_ed_ctb_curr_l1->i4_32x32_satd[0][1] =
|
aai4_sum_4_16_satd_ctb[1][MEDIAN_CU_TU_BY_2];
|
}
|
{
|
ps_ed_ctb_curr_l1->i4_32x32_satd[0][2] =
|
aai4_min_4_16_64_satd[2][MEDIAN_CU_TU_BY_4];
|
}
|
|
/*Sum of all 32x32 activity */
|
ps_ed_ctb_curr_l1->i4_32x32_satd[0][3] = 0;
|
for(j = 0; j < 4; j++)
|
{
|
if(MAX_32BIT_VAL != ai4_sum_sum_4x4_satd_16x16[j])
|
ps_ed_ctb_curr_l1->i4_32x32_satd[0][3] +=
|
ai4_sum_sum_4x4_satd_16x16[j];
|
}
|
|
if(1 == is_min_block_uncompensated_in_l32x32)
|
{
|
/*Accumulate the sum of 32*32 activities in the current layer (64*64 CU in L0)*/
|
if(MAX_32BIT_VAL != ps_ed_ctb_curr_l1->i4_32x32_satd[0][0])
|
{
|
ai4_curr_frame_32x32_sum_act[0] +=
|
ps_ed_ctb_curr_l1->i4_32x32_satd[0][0];
|
ai8_curr_frame_32x32_sum_act_sqr[0] +=
|
(ps_ed_ctb_curr_l1->i4_32x32_satd[0][0] *
|
ps_ed_ctb_curr_l1->i4_32x32_satd[0][0]);
|
ai4_curr_frame_32x32_sum_blks[0] += 1;
|
}
|
|
if(MAX_32BIT_VAL != ps_ed_ctb_curr_l1->i4_32x32_satd[0][1])
|
{
|
ai4_curr_frame_32x32_sum_act[1] +=
|
ps_ed_ctb_curr_l1->i4_32x32_satd[0][1];
|
ai8_curr_frame_32x32_sum_act_sqr[1] +=
|
(ps_ed_ctb_curr_l1->i4_32x32_satd[0][1] *
|
ps_ed_ctb_curr_l1->i4_32x32_satd[0][1]);
|
ai4_curr_frame_32x32_sum_blks[1] += 1;
|
}
|
|
if(MAX_32BIT_VAL != ps_ed_ctb_curr_l1->i4_32x32_satd[0][2])
|
{
|
ai4_curr_frame_32x32_sum_act[2] +=
|
ps_ed_ctb_curr_l1->i4_32x32_satd[0][2];
|
ai8_curr_frame_32x32_sum_act_sqr[2] +=
|
(ps_ed_ctb_curr_l1->i4_32x32_satd[0][2] *
|
ps_ed_ctb_curr_l1->i4_32x32_satd[0][2]);
|
ai4_curr_frame_32x32_sum_blks[2] += 1;
|
}
|
}
|
}
|
}
|
}
|
/*Increment ctb count*/
|
ps_ed += inc_ctb;
|
}
|
}
|
|
/* Spatial Variation and modulation index calculated for the frame */
|
{
|
for(i4_k = 0; i4_k < 2; i4_k++)
|
{
|
/*8x8*/
|
#if USE_SQRT_AVG_OF_SATD_SQR
|
ps_curr_out->i8_curr_frame_8x8_sum_act[i4_k] = ai8_curr_frame_8x8_sum_act_sqr[i4_k];
|
#else
|
ps_curr_out->i8_curr_frame_8x8_sum_act[i4_k] = ai4_curr_frame_8x8_sum_act[i4_k];
|
#endif
|
ps_curr_out->i4_curr_frame_8x8_sum_act_for_strength[i4_k] =
|
ai4_curr_frame_8x8_sum_act[i4_k];
|
ps_curr_out->i4_curr_frame_8x8_num_blks[i4_k] = ai4_curr_frame_8x8_sum_blks[i4_k];
|
ps_curr_out->u8_curr_frame_8x8_sum_act_sqr = u8_curr_frame_8x8_sum_act_sqr;
|
|
/*16x16*/
|
#if USE_SQRT_AVG_OF_SATD_SQR
|
ps_curr_out->i8_curr_frame_16x16_sum_act[i4_k] =
|
ai8_curr_frame_16x16_sum_act_sqr[i4_k];
|
#else
|
ps_curr_out->i8_curr_frame_16x16_sum_act[i4_k] = ai4_curr_frame_16x16_sum_act[i4_k];
|
#endif
|
ps_curr_out->i4_curr_frame_16x16_num_blks[i4_k] =
|
ai4_curr_frame_16x16_sum_blks[i4_k];
|
|
/*32x32*/
|
#if USE_SQRT_AVG_OF_SATD_SQR
|
ps_curr_out->i8_curr_frame_32x32_sum_act[i4_k] =
|
ai8_curr_frame_32x32_sum_act_sqr[i4_k];
|
#else
|
ps_curr_out->i8_curr_frame_32x32_sum_act[i4_k] = ai4_curr_frame_32x32_sum_act[i4_k];
|
#endif
|
ps_curr_out->i4_curr_frame_32x32_num_blks[i4_k] =
|
ai4_curr_frame_32x32_sum_blks[i4_k];
|
}
|
|
/*16x16*/
|
#if USE_SQRT_AVG_OF_SATD_SQR
|
ps_curr_out->i8_curr_frame_16x16_sum_act[2] = ai8_curr_frame_16x16_sum_act_sqr[2];
|
#else
|
ps_curr_out->i8_curr_frame_16x16_sum_act[2] = ai4_curr_frame_16x16_sum_act[2];
|
#endif
|
|
ps_curr_out->i4_curr_frame_16x16_num_blks[2] = ai4_curr_frame_16x16_sum_blks[2];
|
|
/*32x32*/
|
#if USE_SQRT_AVG_OF_SATD_SQR
|
ps_curr_out->i8_curr_frame_32x32_sum_act[2] = ai8_curr_frame_32x32_sum_act_sqr[2];
|
#else
|
ps_curr_out->i8_curr_frame_32x32_sum_act[2] = ai4_curr_frame_32x32_sum_act[2];
|
#endif
|
ps_curr_out->i4_curr_frame_32x32_num_blks[2] = ai4_curr_frame_32x32_sum_blks[2];
|
}
|
}
|
}
|
|
/*!
|
******************************************************************************
|
* \if Function name : ihevce_decomp_pre_intra_get_frame_satd \endif
|
*
|
* \brief
|
* Number of memory records are returned for enc_loop module
|
*
|
*
|
* \return
|
* None
|
*
|
* \author
|
* Ittiam
|
*
|
*****************************************************************************
|
*/
|
LWORD64 ihevce_decomp_pre_intra_get_frame_satd(void *pv_ctxt, WORD32 *i4_width, WORD32 *i4_hieght)
|
{
|
ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt =
|
(ihevce_decomp_pre_intra_master_ctxt_t *)pv_ctxt;
|
WORD32 i4_i;
|
LWORD64 i8_tot_satd = 0;
|
|
/*accumulate SATD acorss all thread. note that every thread will enter this function,
|
hence it must be guranteed that all thread must have completed preintra pass by now*/
|
for(i4_i = 0; i4_i < ps_master_ctxt->i4_num_proc_thrds; i4_i++)
|
{
|
ihevce_decomp_pre_intra_ctxt_t *ps_ctxt =
|
ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[i4_i];
|
|
//i8_tot_satd += ps_ctxt->as_layers[1].s_early_decision.i8_sum_best_satd;
|
i8_tot_satd += ps_ctxt->ps_ed_ctxt->i8_sum_best_satd;
|
|
*i4_width = ps_ctxt->as_layers[1].i4_actual_wd;
|
*i4_hieght = ps_ctxt->as_layers[1].i4_actual_ht;
|
}
|
|
return i8_tot_satd;
|
}
|
|
LWORD64 ihevce_decomp_pre_intra_get_frame_satd_squared(
|
void *pv_ctxt, WORD32 *i4_width, WORD32 *i4_hieght)
|
{
|
ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt =
|
(ihevce_decomp_pre_intra_master_ctxt_t *)pv_ctxt;
|
WORD32 i4_i;
|
LWORD64 i8_tot_satd = 0;
|
|
/*accumulate SATD acorss all thread. note that every thread will enter this function,
|
hence it must be guranteed that all thread must have completed preintra pass by now*/
|
for(i4_i = 0; i4_i < ps_master_ctxt->i4_num_proc_thrds; i4_i++)
|
{
|
ihevce_decomp_pre_intra_ctxt_t *ps_ctxt =
|
ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[i4_i];
|
|
//i8_tot_satd += ps_ctxt->as_layers[1].s_early_decision.i8_sum_best_satd;
|
i8_tot_satd += (ps_ctxt->ps_ed_ctxt->i8_sum_sq_best_satd);
|
|
*i4_width = ps_ctxt->as_layers[1].i4_actual_wd;
|
*i4_hieght = ps_ctxt->as_layers[1].i4_actual_ht;
|
}
|
|
return i8_tot_satd;
|
}
|