Fawkes API  Fawkes Development Version
yuvrgb.cpp
1 
2 /****************************************************************************
3  * yuvrgb.h - YUV to RGB conversion - specific methods, macros and constants
4  *
5  * Created: Sat Aug 12 15:02:41 2006
6  * based on colorspaces.h from Tue Feb 23 13:49:38 2005
7  * Copyright 2005-2006 Tim Niemueller [www.niemueller.de]
8  *
9  ****************************************************************************/
10 
11 /* This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version. A runtime exception applies to
15  * this software (see LICENSE.GPL_WRE file mentioned below for details).
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU Library General Public License for more details.
21  *
22  * Read the full text in the LICENSE.GPL_WRE file in the doc directory.
23  */
24 
25 #include <fvutils/color/yuvrgb.h>
26 #include <core/macros.h>
27 
28 #include <fvutils/cpu/mmx.h>
29 
30 namespace firevision {
31 #if 0 /* just to make Emacs auto-indent happy */
32 }
33 #endif
34 
35 /** YUV to RGB Conversion
36  * B = 1.164(Y - 16) + 2.018(U - 128)
37  * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
38  * R = 1.164(Y - 16) + 1.596(V - 128)
39  *
40  * Values have to be clamped to keep them in the [0-255] range.
41  * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range
42  * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results.
43  * @param YUV unsigned char array that contains the pixels, 4 pixels in 6 byte macro pixel, line after
44  * line
45  * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel
46  * (thus this is a 24bit RGB with one byte per color) line by line.
47  * @param width Width of the image contained in the YUV buffer
48  * @param height Height of the image contained in the YUV buffer
49  */
50 void
51 yuv411packed_to_rgb_plainc(const unsigned char *YUV, unsigned char *RGB,
52  unsigned int width, unsigned int height)
53 {
54  int y0, y1, y2, y3, u, v;
55  unsigned int i = 0;
56  while (i < (width * height)*3/2) {
57  u = YUV[i++] - 128;
58  y0 = YUV[i++] - 16;
59  y1 = YUV[i++] - 16;
60  v = YUV[i++] - 128;
61  y2 = YUV[i++] - 16;
62  y3 = YUV[i++] - 16;
63 
64  // Set red, green and blue bytes for pixel 0
65  *RGB++ = clip( (76284 * y0 + 104595 * v ) >> 16 );
66  *RGB++ = clip( (76284 * y0 - 25625 * u - 53281 * v ) >> 16 );
67  *RGB++ = clip( (76284 * y0 + 132252 * u ) >> 16 );
68 
69  // Set red, green and blue bytes for pixel 1
70  *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
71  *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
72  *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
73 
74  // Set red, green and blue bytes for pixel 2
75  *RGB++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
76  *RGB++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
77  *RGB++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
78 
79  // Set red, green and blue bytes for pixel 3
80  *RGB++ = clip( (76284 * y3 + 104595 * v ) >> 16 );
81  *RGB++ = clip( (76284 * y3 - 25625 * u - 53281 * v ) >> 16 );
82  *RGB++ = clip( (76284 * y3 + 132252 * u ) >> 16 );
83 
84  }
85 }
86 
87 
88 /** YUV to RGB Conversion
89  * B = 1.164(Y - 16) + 2.018(U - 128)
90  * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
91  * R = 1.164(Y - 16) + 1.596(V - 128)
92  *
93  * Values have to be clamped to keep them in the [0-255] range.
94  * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range
95  * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results.
96  * @param YUV unsigned char array that contains the pixels, 4 pixels in 6 byte macro pixel, line after
97  * line
98  * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel
99  * (thus this is a 24bit RGB with one byte per color) line by line.
100  * @param width Width of the image contained in the YUV buffer
101  * @param height Height of the image contained in the YUV buffer
102  */
103 void
104 yuv422planar_to_rgb_plainc(const unsigned char *planar, unsigned char *RGB, unsigned int width, unsigned int height)
105 {
106 
107  short y1, y2, u, v;
108  const unsigned char *yp, *up, *vp;
109  unsigned int i;
110 
111  yp = planar;
112  up = planar + (width * height);
113  vp = up + (width * height / 2);
114 
115  for (i = 0; i < (width * height / 2); ++i) {
116 
117  y1 = *yp++;
118  y2 = *yp++;
119  u = *up++;
120  v = *vp++;
121 
122  y1 -= 16;
123  y2 -= 16;
124  u -= 128;
125  v -= 128;
126 
127  // Set red, green and blue bytes for pixel 0
128  *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
129  *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
130  *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
131 
132  // Set red, green and blue bytes for pixel 1
133  *RGB++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
134  *RGB++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
135  *RGB++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
136 
137  }
138 }
139 
140 
141 
142 /** YUV to RGB Conversion
143  * B = 1.164(Y - 16) + 2.018(U - 128)
144  * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
145  * R = 1.164(Y - 16) + 1.596(V - 128)
146  *
147  * Values have to be clamped to keep them in the [0-255] range.
148  * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range
149  * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results.
150  * @param YUV unsigned char array that contains the pixels, 4 pixels in 8 byte macro pixel, line after
151  * line
152  * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel
153  * (thus this is a 24bit RGB with one byte per color) line by line.
154  * @param width Width of the image contained in the YUV buffer
155  * @param height Height of the image contained in the YUV buffer
156  */
157 void
158 yuv422packed_to_rgb_plainc(const unsigned char *YUV, unsigned char *RGB,
159  const unsigned int width, const unsigned int height)
160 {
161  int y0, y1, u, v;
162  unsigned int i = 0;
163  for (unsigned int pixel = 0; pixel < (width * height); pixel += 2) {
164  u = YUV[i++] - 128;
165  y0 = YUV[i++] - 16;
166  v = YUV[i++] - 128;
167  y1 = YUV[i++] - 16;
168 
169  // Set red, green and blue bytes for pixel 0
170  *RGB++ = clip( (76284 * y0 + 104595 * v ) >> 16 );
171  *RGB++ = clip( (76284 * y0 - 25625 * u - 53281 * v ) >> 16 );
172  *RGB++ = clip( (76284 * y0 + 132252 * u ) >> 16 );
173 
174  // Set red, green and blue bytes for pixel 1
175  *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
176  *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
177  *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
178  }
179 }
180 
181 /** Convert YUV422 planar to BGR.
182  * Use formula in aforementioned function.
183  * @param YUV YUV422 planar buffer
184  * @param BGR BGR buffer
185  * @param width Width of the image contained in the YUV buffer
186  * @param height Height of the image contained in the YUV buffer
187  */
188 void
189 yuv422planar_to_bgr_plainc(const unsigned char *planar, unsigned char *BGR,
190  unsigned int width, unsigned int height)
191 {
192 
193  short y1, y2, u, v;
194  const unsigned char *yp, *up, *vp;
195  unsigned int i;
196 
197  yp = planar;
198  up = planar + (width * height);
199  vp = up + (width * height / 2);
200 
201  for (i = 0; i < (width * height / 2); ++i) {
202 
203  y1 = *yp++;
204  y2 = *yp++;
205  u = *up++;
206  v = *vp++;
207 
208  y1 -= 16;
209  y2 -= 16;
210  u -= 128;
211  v -= 128;
212 
213  // Set red, green and blue bytes for pixel 0
214  *BGR++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
215  *BGR++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
216  *BGR++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
217 
218  // Set red, green and blue bytes for pixel 1
219  *BGR++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
220  *BGR++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
221  *BGR++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
222  }
223 }
224 
225 
226 void
227 yuv422planar_to_rgb_with_alpha_plainc(const unsigned char *planar, unsigned char *RGB, unsigned int width, unsigned int height)
228 {
229 
230  short y1, y2, u, v;
231  const unsigned char *yp, *up, *vp;
232  unsigned int i;
233 
234  yp = planar;
235  up = planar + (width * height);
236  vp = up + (width * height / 2);
237 
238  for (i = 0; i < (width * height / 2); ++i) {
239 
240  y1 = *yp++;
241  y2 = *yp++;
242  u = *up++;
243  v = *vp++;
244 
245  y1 -= 16;
246  y2 -= 16;
247  u -= 128;
248  v -= 128;
249 
250  // Set red, green and blue bytes for pixel 0
251  *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
252  *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
253  *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
254  *RGB++ = 255;
255 
256  // Set red, green and blue bytes for pixel 1
257  *RGB++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
258  *RGB++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
259  *RGB++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
260  *RGB++ = 255;
261 
262  }
263 
264 }
265 
266 
267 void
268 yuv422planar_to_bgr_with_alpha_plainc(const unsigned char *planar, unsigned char *BGR, unsigned int width, unsigned int height)
269 {
270 
271  short y1, y2, u, v;
272  const unsigned char *yp, *up, *vp;
273  unsigned int i;
274 
275  yp = planar;
276  up = planar + (width * height);
277  vp = up + (width * height / 2);
278 
279  for (i = 0; i < (width * height / 2); ++i) {
280 
281  y1 = *yp++;
282  y2 = *yp++;
283  u = *up++;
284  v = *vp++;
285 
286  y1 -= 16;
287  y2 -= 16;
288  u -= 128;
289  v -= 128;
290 
291  // Set red, green and blue bytes for pixel 0
292  *BGR++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
293  *BGR++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
294  *BGR++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
295  *BGR++ = 255;
296 
297  // Set red, green and blue bytes for pixel 1
298  *BGR++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
299  *BGR++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
300  *BGR++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
301  *BGR++ = 255;
302 
303  }
304 
305 }
306 
307 
308 void
309 yuv422packed_to_bgr_with_alpha_plainc(const unsigned char *YUV, unsigned char *BGR,
310  unsigned int width, unsigned int height)
311 {
312 
313  int y0, y1, u, v;
314  unsigned int i = 0;
315  while (i < (width * height * 2)) {
316  u = YUV[i++] - 128;
317  y0 = YUV[i++] - 16;
318  v = YUV[i++] - 128;
319  y1 = YUV[i++] - 16;
320 
321  // Set red, green and blue bytes for pixel 0
322  *BGR++ = clip( (76284 * y0 + 132252 * u ) >> 16 );
323  *BGR++ = clip( (76284 * y0 - 25625 * u - 53281 * v ) >> 16 );
324  *BGR++ = clip( (76284 * y0 + 104595 * v ) >> 16 );
325  *BGR++ = 255;
326 
327  // Set red, green and blue bytes for pixel 1
328  *BGR++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
329  *BGR++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
330  *BGR++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
331  *BGR++ = 255;
332 
333  }
334 }
335 
336 
337 #if ( \
338  defined __i386__ || \
339  defined __386__ || \
340  defined __X86__ || \
341  defined _M_IX86 || \
342  defined i386)
343 
344 #define CRV 104595
345 #define CBU 132251
346 #define CGU 25624
347 #define CGV 53280
348 #define YMUL 76283
349 #define OFF 32768
350 #define BITRES 16
351 
352 /* calculation float resolution in bits */
353 /* ie RES = 6 is 10.6 fixed point */
354 /* RES = 8 is 8.8 fixed point */
355 /* RES = 4 is 12.4 fixed point */
356 /* NB: going above 6 will lead to overflow... :( */
357 #define RES 6
358 
359 #define RZ(i) (i >> (BITRES - RES))
360 #define FOUR(i) {i, i, i, i}
361 
362 __aligned(8) const volatile unsigned short _const_crvcrv[4] = FOUR(RZ(CRV));
363 __aligned(8) const volatile unsigned short _const_cbucbu[4] = FOUR(RZ(CBU));
364 __aligned(8) const volatile unsigned short _const_cgucgu[4] = FOUR(RZ(CGU));
365 __aligned(8) const volatile unsigned short _const_cgvcgv[4] = FOUR(RZ(CGV));
366 __aligned(8) const volatile unsigned short _const_ymul [4] = FOUR(RZ(YMUL));
367 __aligned(8) const volatile unsigned short _const_128 [4] = FOUR(128);
368 __aligned(8) const volatile unsigned short _const_32 [4] = FOUR(RZ(OFF));
369 __aligned(8) const volatile unsigned short _const_16 [4] = FOUR(16);
370 
371 #define CONST_CRVCRV *_const_crvcrv
372 #define CONST_CBUCBU *_const_cbucbu
373 #define CONST_CGUCGU *_const_cgucgu
374 #define CONST_CGVCGV *_const_cgvcgv
375 #define CONST_YMUL *_const_ymul
376 #define CONST_128 *_const_128
377 #define CONST_32 *_const_32
378 #define CONST_16 *_const_16
379 
380 void
381 yuv411planar_to_rgb_mmx (const unsigned char *yuv, unsigned char *rgb,
382  unsigned int w, unsigned int h)
383 {
384  unsigned int xx, yy;
385  const unsigned char *yp1, *up, *vp;
386  unsigned char *dp1;
387 
388  /* plane pointers */
389  yp1 = yuv;
390  up = yuv + (w * h);
391  vp = up + (w * (h / 4));
392  /* destination pointers */
393  dp1 = rgb;
394 
395 
396 
397  yp1 = yuv;
398  up = yuv + (w * h);
399  vp = up + ((w / 2) * (h / 2));
400  dp1 = rgb;
401  for (yy = 0; yy < h; yy++)
402  {
403  for (xx = 0; xx < w; xx += 8)
404  {
405  movq_m2r(*yp1, mm0);
406  movq_r2r(mm0, mm1);
407  psrlw_i2r(8, mm0);
408  psllw_i2r(8, mm1);
409  psrlw_i2r(8, mm1);
410 
411  pxor_r2r(mm7, mm7);
412  movd_m2r(*up, mm3);
413  movd_m2r(*vp, mm2);
414 
415  punpcklbw_r2r(mm7, mm2);
416  punpcklbw_r2r(mm7, mm3);
417 
418  movq_m2r(CONST_16, mm4);
419  psubsw_r2r(mm4, mm0);
420  psubsw_r2r(mm4, mm1);
421 
422  movq_m2r(CONST_128, mm5);
423  psubsw_r2r(mm5, mm2);
424  psubsw_r2r(mm5, mm3);
425 
426  movq_m2r(CONST_YMUL, mm4);
427  pmullw_r2r(mm4, mm0);
428  pmullw_r2r(mm4, mm1);
429 
430  movq_m2r(CONST_CRVCRV, mm7);
431  pmullw_r2r(mm3, mm7);
432 
433  movq_m2r(CONST_CBUCBU, mm6);
434  pmullw_r2r(mm2, mm6);
435 
436  movq_m2r(CONST_CGUCGU, mm5);
437  pmullw_r2r(mm2, mm5);
438 
439  movq_m2r(CONST_CGVCGV, mm4);
440  pmullw_r2r(mm3, mm4);
441 
442  movq_r2r(mm0, mm2);
443  paddsw_r2r(mm7, mm2);
444  paddsw_r2r(mm1, mm7);
445 
446  psraw_i2r(RES, mm2);
447  psraw_i2r(RES, mm7);
448  packuswb_r2r(mm7, mm2);
449 
450  pxor_r2r(mm7, mm7);
451  movq_r2r(mm2, mm3);
452  punpckhbw_r2r(mm7, mm2);
453  punpcklbw_r2r(mm3, mm7);
454  por_r2r(mm7, mm2);
455 
456  movq_r2r(mm0, mm3);
457  psubsw_r2r(mm5, mm3);
458  psubsw_r2r(mm4, mm3);
459  paddsw_m2r(CONST_32, mm3);
460 
461  movq_r2r(mm1, mm7);
462  psubsw_r2r(mm5, mm7);
463  psubsw_r2r(mm4, mm7);
464  paddsw_m2r(CONST_32, mm7);
465 
466  psraw_i2r(RES, mm3);
467  psraw_i2r(RES, mm7);
468  packuswb_r2r(mm7, mm3);
469 
470  pxor_r2r(mm7, mm7);
471  movq_r2r(mm3, mm4);
472  punpckhbw_r2r(mm7, mm3);
473  punpcklbw_r2r(mm4, mm7);
474  por_r2r(mm7, mm3);
475 
476  movq_m2r(CONST_32, mm4);
477  paddsw_r2r(mm6, mm0);
478  paddsw_r2r(mm6, mm1);
479  paddsw_r2r(mm4, mm0);
480  paddsw_r2r(mm4, mm1);
481  psraw_i2r(RES, mm0);
482  psraw_i2r(RES, mm1);
483  packuswb_r2r(mm1, mm0);
484 
485  pxor_r2r(mm7, mm7);
486  movq_r2r(mm0, mm5);
487  punpckhbw_r2r(mm7, mm0);
488  punpcklbw_r2r(mm5, mm7);
489  por_r2r(mm7, mm0);
490 
491  pxor_r2r(mm1, mm1);
492  movq_r2r(mm0, mm5);
493  movq_r2r(mm3, mm6);
494  movq_r2r(mm2, mm7);
495  punpckhbw_r2r(mm3, mm2);
496  punpcklbw_r2r(mm6, mm7);
497  punpckhbw_r2r(mm1, mm0);
498  punpcklbw_r2r(mm1, mm5);
499 
500  movq_r2r(mm7, mm1);
501  punpckhwd_r2r(mm5, mm7);
502  punpcklwd_r2r(mm5, mm1);
503 
504  movq_r2r(mm2, mm4);
505  punpckhwd_r2r(mm0, mm2);
506  punpcklwd_r2r(mm0, mm4);
507 
508  movntq_r2m(mm1, *(dp1));
509  movntq_r2m(mm7, *(dp1 + 8));
510  movntq_r2m(mm4, *(dp1 + 16));
511  movntq_r2m(mm2, *(dp1 + 24));
512 
513  yp1 += 8;
514  up += 4;
515  vp += 4;
516  dp1 += 8 * 4;
517  }
518  if (yy & 0x1)
519  {
520  up -= w / 2;
521  vp -= w / 2;
522  }
523  }
524  emms();
525 }
526 #endif
527 
528 } // end namespace firevision