25 #include <fvutils/color/yuvrgb.h> 26 #include <core/macros.h> 28 #include <fvutils/cpu/mmx.h> 51 yuv411packed_to_rgb_plainc(
const unsigned char *YUV,
unsigned char *RGB,
52 unsigned int width,
unsigned int height)
54 int y0, y1, y2, y3, u, v;
56 while (i < (width * height)*3/2) {
65 *RGB++ = clip( (76284 * y0 + 104595 * v ) >> 16 );
66 *RGB++ = clip( (76284 * y0 - 25625 * u - 53281 * v ) >> 16 );
67 *RGB++ = clip( (76284 * y0 + 132252 * u ) >> 16 );
70 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
71 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
72 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
75 *RGB++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
76 *RGB++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
77 *RGB++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
80 *RGB++ = clip( (76284 * y3 + 104595 * v ) >> 16 );
81 *RGB++ = clip( (76284 * y3 - 25625 * u - 53281 * v ) >> 16 );
82 *RGB++ = clip( (76284 * y3 + 132252 * u ) >> 16 );
104 yuv422planar_to_rgb_plainc(
const unsigned char *planar,
unsigned char *RGB,
unsigned int width,
unsigned int height)
108 const unsigned char *yp, *up, *vp;
112 up = planar + (width * height);
113 vp = up + (width * height / 2);
115 for (i = 0; i < (width * height / 2); ++i) {
128 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
129 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
130 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
133 *RGB++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
134 *RGB++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
135 *RGB++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
158 yuv422packed_to_rgb_plainc(
const unsigned char *YUV,
unsigned char *RGB,
159 const unsigned int width,
const unsigned int height)
163 for (
unsigned int pixel = 0; pixel < (width * height); pixel += 2) {
170 *RGB++ = clip( (76284 * y0 + 104595 * v ) >> 16 );
171 *RGB++ = clip( (76284 * y0 - 25625 * u - 53281 * v ) >> 16 );
172 *RGB++ = clip( (76284 * y0 + 132252 * u ) >> 16 );
175 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
176 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
177 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
189 yuv422planar_to_bgr_plainc(
const unsigned char *planar,
unsigned char *BGR,
190 unsigned int width,
unsigned int height)
194 const unsigned char *yp, *up, *vp;
198 up = planar + (width * height);
199 vp = up + (width * height / 2);
201 for (i = 0; i < (width * height / 2); ++i) {
214 *BGR++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
215 *BGR++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
216 *BGR++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
219 *BGR++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
220 *BGR++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
221 *BGR++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
227 yuv422planar_to_rgb_with_alpha_plainc(
const unsigned char *planar,
unsigned char *RGB,
unsigned int width,
unsigned int height)
231 const unsigned char *yp, *up, *vp;
235 up = planar + (width * height);
236 vp = up + (width * height / 2);
238 for (i = 0; i < (width * height / 2); ++i) {
251 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
252 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
253 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
257 *RGB++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
258 *RGB++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
259 *RGB++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
268 yuv422planar_to_bgr_with_alpha_plainc(
const unsigned char *planar,
unsigned char *BGR,
unsigned int width,
unsigned int height)
272 const unsigned char *yp, *up, *vp;
276 up = planar + (width * height);
277 vp = up + (width * height / 2);
279 for (i = 0; i < (width * height / 2); ++i) {
292 *BGR++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
293 *BGR++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
294 *BGR++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
298 *BGR++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
299 *BGR++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
300 *BGR++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
309 yuv422packed_to_bgr_with_alpha_plainc(
const unsigned char *YUV,
unsigned char *BGR,
310 unsigned int width,
unsigned int height)
315 while (i < (width * height * 2)) {
322 *BGR++ = clip( (76284 * y0 + 132252 * u ) >> 16 );
323 *BGR++ = clip( (76284 * y0 - 25625 * u - 53281 * v ) >> 16 );
324 *BGR++ = clip( (76284 * y0 + 104595 * v ) >> 16 );
328 *BGR++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
329 *BGR++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
330 *BGR++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
338 defined __i386__ || \ 359 #define RZ(i) (i >> (BITRES - RES)) 360 #define FOUR(i) {i, i, i, i} 362 __aligned(8) const volatile
unsigned short _const_crvcrv[4] = FOUR(RZ(CRV));
363 __aligned(8) const volatile
unsigned short _const_cbucbu[4] = FOUR(RZ(CBU));
364 __aligned(8) const volatile
unsigned short _const_cgucgu[4] = FOUR(RZ(CGU));
365 __aligned(8) const volatile
unsigned short _const_cgvcgv[4] = FOUR(RZ(CGV));
366 __aligned(8) const volatile
unsigned short _const_ymul [4] = FOUR(RZ(YMUL));
367 __aligned(8) const volatile
unsigned short _const_128 [4] = FOUR(128);
368 __aligned(8) const volatile
unsigned short _const_32 [4] = FOUR(RZ(OFF));
369 __aligned(8) const volatile
unsigned short _const_16 [4] = FOUR(16);
371 #define CONST_CRVCRV *_const_crvcrv 372 #define CONST_CBUCBU *_const_cbucbu 373 #define CONST_CGUCGU *_const_cgucgu 374 #define CONST_CGVCGV *_const_cgvcgv 375 #define CONST_YMUL *_const_ymul 376 #define CONST_128 *_const_128 377 #define CONST_32 *_const_32 378 #define CONST_16 *_const_16 381 yuv411planar_to_rgb_mmx (
const unsigned char *yuv,
unsigned char *rgb,
382 unsigned int w,
unsigned int h)
385 const unsigned char *yp1, *up, *vp;
391 vp = up + (w * (h / 4));
399 vp = up + ((w / 2) * (h / 2));
401 for (yy = 0; yy < h; yy++)
403 for (xx = 0; xx < w; xx += 8)
415 punpcklbw_r2r(mm7, mm2);
416 punpcklbw_r2r(mm7, mm3);
418 movq_m2r(CONST_16, mm4);
419 psubsw_r2r(mm4, mm0);
420 psubsw_r2r(mm4, mm1);
422 movq_m2r(CONST_128, mm5);
423 psubsw_r2r(mm5, mm2);
424 psubsw_r2r(mm5, mm3);
426 movq_m2r(CONST_YMUL, mm4);
427 pmullw_r2r(mm4, mm0);
428 pmullw_r2r(mm4, mm1);
430 movq_m2r(CONST_CRVCRV, mm7);
431 pmullw_r2r(mm3, mm7);
433 movq_m2r(CONST_CBUCBU, mm6);
434 pmullw_r2r(mm2, mm6);
436 movq_m2r(CONST_CGUCGU, mm5);
437 pmullw_r2r(mm2, mm5);
439 movq_m2r(CONST_CGVCGV, mm4);
440 pmullw_r2r(mm3, mm4);
443 paddsw_r2r(mm7, mm2);
444 paddsw_r2r(mm1, mm7);
448 packuswb_r2r(mm7, mm2);
452 punpckhbw_r2r(mm7, mm2);
453 punpcklbw_r2r(mm3, mm7);
457 psubsw_r2r(mm5, mm3);
458 psubsw_r2r(mm4, mm3);
459 paddsw_m2r(CONST_32, mm3);
462 psubsw_r2r(mm5, mm7);
463 psubsw_r2r(mm4, mm7);
464 paddsw_m2r(CONST_32, mm7);
468 packuswb_r2r(mm7, mm3);
472 punpckhbw_r2r(mm7, mm3);
473 punpcklbw_r2r(mm4, mm7);
476 movq_m2r(CONST_32, mm4);
477 paddsw_r2r(mm6, mm0);
478 paddsw_r2r(mm6, mm1);
479 paddsw_r2r(mm4, mm0);
480 paddsw_r2r(mm4, mm1);
483 packuswb_r2r(mm1, mm0);
487 punpckhbw_r2r(mm7, mm0);
488 punpcklbw_r2r(mm5, mm7);
495 punpckhbw_r2r(mm3, mm2);
496 punpcklbw_r2r(mm6, mm7);
497 punpckhbw_r2r(mm1, mm0);
498 punpcklbw_r2r(mm1, mm5);
501 punpckhwd_r2r(mm5, mm7);
502 punpcklwd_r2r(mm5, mm1);
505 punpckhwd_r2r(mm0, mm2);
506 punpcklwd_r2r(mm0, mm4);
508 movntq_r2m(mm1, *(dp1));
509 movntq_r2m(mm7, *(dp1 + 8));
510 movntq_r2m(mm4, *(dp1 + 16));
511 movntq_r2m(mm2, *(dp1 + 24));