diff options
Diffstat (limited to 'drivers/media/platform/vicodec/codec-fwht.c')
-rw-r--r-- | drivers/media/platform/vicodec/codec-fwht.c | 121 |
1 files changed, 75 insertions, 46 deletions
diff --git a/drivers/media/platform/vicodec/codec-fwht.c b/drivers/media/platform/vicodec/codec-fwht.c index d1d6085da9f1..31faf319e508 100644 --- a/drivers/media/platform/vicodec/codec-fwht.c +++ b/drivers/media/platform/vicodec/codec-fwht.c @@ -46,8 +46,12 @@ static const uint8_t zigzag[64] = { 63, }; - -static int rlc(const s16 *in, __be16 *output, int blocktype) +/* + * noinline_for_stack to work around + * https://bugs.llvm.org/show_bug.cgi?id=38809 + */ +static int noinline_for_stack +rlc(const s16 *in, __be16 *output, int blocktype) { s16 block[8 * 8]; s16 *wp = block; @@ -106,8 +110,8 @@ static int rlc(const s16 *in, __be16 *output, int blocktype) * This function will worst-case increase rlc_in by 65*2 bytes: * one s16 value for the header and 8 * 8 coefficients of type s16. */ -static u16 derlc(const __be16 **rlc_in, s16 *dwht_out, - const __be16 *end_of_input) +static noinline_for_stack u16 +derlc(const __be16 **rlc_in, s16 *dwht_out, const __be16 *end_of_input) { /* header */ const __be16 *input = *rlc_in; @@ -240,8 +244,9 @@ static void dequantize_inter(s16 *coeff) *coeff <<= *quant; } -static void fwht(const u8 *block, s16 *output_block, unsigned int stride, - unsigned int input_step, bool intra) +static void noinline_for_stack fwht(const u8 *block, s16 *output_block, + unsigned int stride, + unsigned int input_step, bool intra) { /* we'll need more than 8 bits for the transformed coefficients */ s32 workspace1[8], workspace2[8]; @@ -373,7 +378,8 @@ static void fwht(const u8 *block, s16 *output_block, unsigned int stride, * Furthermore values can be negative... This is just a version that * works with 16 signed data */ -static void fwht16(const s16 *block, s16 *output_block, int stride, int intra) +static void noinline_for_stack +fwht16(const s16 *block, s16 *output_block, int stride, int intra) { /* we'll need more than 8 bits for the transformed coefficients */ s32 workspace1[8], workspace2[8]; @@ -456,7 +462,8 @@ static void fwht16(const s16 *block, s16 *output_block, int stride, int intra) } } -static void ifwht(const s16 *block, s16 *output_block, int intra) +static noinline_for_stack void +ifwht(const s16 *block, s16 *output_block, int intra) { /* * we'll need more than 8 bits for the transformed coefficients @@ -604,9 +611,9 @@ static int var_inter(const s16 *old, const s16 *new) return ret; } -static int decide_blocktype(const u8 *cur, const u8 *reference, - s16 *deltablock, unsigned int stride, - unsigned int input_step) +static noinline_for_stack int +decide_blocktype(const u8 *cur, const u8 *reference, s16 *deltablock, + unsigned int stride, unsigned int input_step) { s16 tmp[64]; s16 old[64]; @@ -632,12 +639,13 @@ static int decide_blocktype(const u8 *cur, const u8 *reference, return vari <= vard ? IBLOCK : PBLOCK; } -static void fill_decoder_block(u8 *dst, const s16 *input, int stride) +static void fill_decoder_block(u8 *dst, const s16 *input, int stride, + unsigned int dst_step) { int i, j; for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++, input++, dst++) { + for (j = 0; j < 8; j++, input++, dst += dst_step) { if (*input < 0) *dst = 0; else if (*input > 255) @@ -645,17 +653,19 @@ static void fill_decoder_block(u8 *dst, const s16 *input, int stride) else *dst = *input; } - dst += stride - 8; + dst += stride - (8 * dst_step); } } -static void add_deltas(s16 *deltas, const u8 *ref, int stride) +static void add_deltas(s16 *deltas, const u8 *ref, int stride, + unsigned int ref_step) { int k, l; for (k = 0; k < 8; k++) { for (l = 0; l < 8; l++) { - *deltas += *ref++; + *deltas += *ref; + ref += ref_step; /* * Due to quantizing, it might possible that the * decoded coefficients are slightly out of range @@ -666,7 +676,7 @@ static void add_deltas(s16 *deltas, const u8 *ref, int stride) *deltas = 255; deltas++; } - ref += stride - 8; + ref += stride - (8 * ref_step); } } @@ -711,8 +721,8 @@ static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max, ifwht(cf->de_coeffs, cf->de_fwht, blocktype); if (blocktype == PBLOCK) - add_deltas(cf->de_fwht, refp, 8); - fill_decoder_block(refp, cf->de_fwht, 8); + add_deltas(cf->de_fwht, refp, 8, 1); + fill_decoder_block(refp, cf->de_fwht, 8, 1); } input += 8 * input_step; @@ -821,23 +831,31 @@ u32 fwht_encode_frame(struct fwht_raw_frame *frm, return encoding; } -static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref, - u32 height, u32 width, u32 coded_width, +static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, + u32 height, u32 width, const u8 *ref, u32 ref_stride, + unsigned int ref_step, u8 *dst, + unsigned int dst_stride, unsigned int dst_step, bool uncompressed, const __be16 *end_of_rlco_buf) { unsigned int copies = 0; s16 copy[8 * 8]; u16 stat; unsigned int i, j; + bool is_intra = !ref; width = round_up(width, 8); height = round_up(height, 8); if (uncompressed) { + int i; + if (end_of_rlco_buf + 1 < *rlco + width * height / 2) return false; - memcpy(ref, *rlco, width * height); - *rlco += width * height / 2; + for (i = 0; i < height; i++) { + memcpy(dst, *rlco, width); + dst += dst_stride; + *rlco += width / 2; + } return true; } @@ -849,15 +867,17 @@ static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref, */ for (j = 0; j < height / 8; j++) { for (i = 0; i < width / 8; i++) { - u8 *refp = ref + j * 8 * coded_width + i * 8; + const u8 *refp = ref + j * 8 * ref_stride + + i * 8 * ref_step; + u8 *dstp = dst + j * 8 * dst_stride + i * 8 * dst_step; if (copies) { memcpy(cf->de_fwht, copy, sizeof(copy)); - if (stat & PFRAME_BIT) + if ((stat & PFRAME_BIT) && !is_intra) add_deltas(cf->de_fwht, refp, - coded_width); - fill_decoder_block(refp, cf->de_fwht, - coded_width); + ref_stride, ref_step); + fill_decoder_block(dstp, cf->de_fwht, + dst_stride, dst_step); copies--; continue; } @@ -865,35 +885,41 @@ static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref, stat = derlc(rlco, cf->coeffs, end_of_rlco_buf); if (stat & OVERFLOW_BIT) return false; - if (stat & PFRAME_BIT) + if ((stat & PFRAME_BIT) && !is_intra) dequantize_inter(cf->coeffs); else dequantize_intra(cf->coeffs); ifwht(cf->coeffs, cf->de_fwht, - (stat & PFRAME_BIT) ? 0 : 1); + ((stat & PFRAME_BIT) && !is_intra) ? 0 : 1); copies = (stat & DUPS_MASK) >> 1; if (copies) memcpy(copy, cf->de_fwht, sizeof(copy)); - if (stat & PFRAME_BIT) - add_deltas(cf->de_fwht, refp, coded_width); - fill_decoder_block(refp, cf->de_fwht, coded_width); + if ((stat & PFRAME_BIT) && !is_intra) + add_deltas(cf->de_fwht, refp, + ref_stride, ref_step); + fill_decoder_block(dstp, cf->de_fwht, dst_stride, + dst_step); } } return true; } -bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref, - u32 hdr_flags, unsigned int components_num, - unsigned int width, unsigned int height, - unsigned int coded_width) +bool fwht_decode_frame(struct fwht_cframe *cf, u32 hdr_flags, + unsigned int components_num, unsigned int width, + unsigned int height, const struct fwht_raw_frame *ref, + unsigned int ref_stride, unsigned int ref_chroma_stride, + struct fwht_raw_frame *dst, unsigned int dst_stride, + unsigned int dst_chroma_stride) { const __be16 *rlco = cf->rlc_data; const __be16 *end_of_rlco_buf = cf->rlc_data + (cf->size / sizeof(*rlco)) - 1; - if (!decode_plane(cf, &rlco, ref->luma, height, width, coded_width, + if (!decode_plane(cf, &rlco, height, width, ref->luma, ref_stride, + ref->luma_alpha_step, dst->luma, dst_stride, + dst->luma_alpha_step, hdr_flags & FWHT_FL_LUMA_IS_UNCOMPRESSED, end_of_rlco_buf)) return false; @@ -901,27 +927,30 @@ bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref, if (components_num >= 3) { u32 h = height; u32 w = width; - u32 c = coded_width; if (!(hdr_flags & FWHT_FL_CHROMA_FULL_HEIGHT)) h /= 2; - if (!(hdr_flags & FWHT_FL_CHROMA_FULL_WIDTH)) { + if (!(hdr_flags & FWHT_FL_CHROMA_FULL_WIDTH)) w /= 2; - c /= 2; - } - if (!decode_plane(cf, &rlco, ref->cb, h, w, c, + + if (!decode_plane(cf, &rlco, h, w, ref->cb, ref_chroma_stride, + ref->chroma_step, dst->cb, dst_chroma_stride, + dst->chroma_step, hdr_flags & FWHT_FL_CB_IS_UNCOMPRESSED, end_of_rlco_buf)) return false; - if (!decode_plane(cf, &rlco, ref->cr, h, w, c, + if (!decode_plane(cf, &rlco, h, w, ref->cr, ref_chroma_stride, + ref->chroma_step, dst->cr, dst_chroma_stride, + dst->chroma_step, hdr_flags & FWHT_FL_CR_IS_UNCOMPRESSED, end_of_rlco_buf)) return false; } if (components_num == 4) - if (!decode_plane(cf, &rlco, ref->alpha, height, width, - coded_width, + if (!decode_plane(cf, &rlco, height, width, ref->alpha, ref_stride, + ref->luma_alpha_step, dst->alpha, dst_stride, + dst->luma_alpha_step, hdr_flags & FWHT_FL_ALPHA_IS_UNCOMPRESSED, end_of_rlco_buf)) return false; |