MythTV  0.26-pre
util-opengl.cpp
Go to the documentation of this file.
00001 // -*- Mode: c++ -*-
00002 
00003 #include <stdint.h>
00004 #include <QSize>
00005 #include "compat.h"
00006 #include "util-opengl.h"
00007 
00008 #ifdef MMX
00009 extern "C" {
00010 #include "ffmpeg-mmx.h"
00011 }
00012 
00013 static mmx_t mmx_1s = {0xffffffffffffffffLL};
00014 
00015 static inline void mmx_pack_alpha1s_high(uint8_t *y1, uint8_t *y2)
00016 {
00017     movq_m2r (mmx_1s, mm4);
00018     punpckhbw_m2r (*y1, mm4);
00019     movq_m2r (mmx_1s, mm7);
00020     punpckhbw_m2r (*y2, mm7);
00021 }
00022 
00023 static inline void mmx_pack_alpha1s_low(uint8_t *y1, uint8_t *y2)
00024 {
00025     movq_m2r (mmx_1s, mm4);
00026     punpcklbw_m2r (*y1, mm4);
00027     movq_m2r (mmx_1s, mm7);
00028     punpcklbw_m2r (*y2, mm7);
00029 }
00030 
00031 static inline void mmx_pack_middle(uint8_t *dest1, uint8_t *dest2)
00032 {
00033     movq_r2r (mm3, mm5);
00034     punpcklbw_r2r (mm2, mm5);
00035 
00036     movq_r2r (mm5, mm6);
00037     punpcklbw_r2r (mm4, mm6);
00038     movq_r2m (mm6, *(dest1));
00039 
00040     movq_r2r (mm5, mm6);
00041     punpckhbw_r2r (mm4, mm6);
00042     movq_r2m (mm6, *(dest1 + 8));
00043 
00044     movq_r2r (mm5, mm6);
00045     punpcklbw_r2r (mm7, mm6);
00046     movq_r2m (mm6, *(dest2));
00047 
00048     movq_r2r (mm5, mm6);
00049     punpckhbw_r2r (mm7, mm6);
00050     movq_r2m (mm6, *(dest2 + 8));
00051 }
00052 
00053 static inline void mmx_pack_end(uint8_t *dest1, uint8_t *dest2)
00054 {
00055     punpckhbw_r2r (mm2, mm3);
00056 
00057     movq_r2r (mm3, mm6);
00058     punpcklbw_r2r (mm4, mm6);
00059     movq_r2m (mm6, *(dest1 + 16));
00060 
00061     movq_r2r (mm3, mm6);
00062     punpckhbw_r2r (mm4, mm6);
00063     movq_r2m (mm6, *(dest1 + 24));
00064 
00065     movq_r2r (mm3, mm6);
00066     punpcklbw_r2r (mm7, mm6);
00067     movq_r2m (mm6, *(dest2 + 16));
00068 
00069     punpckhbw_r2r (mm7, mm3);
00070     movq_r2m (mm3, *(dest2 + 24));
00071 }
00072 
00073 static inline void mmx_pack_easy(uint8_t *dest, uint8_t *y)
00074 {
00075     movq_m2r (mmx_1s, mm4);
00076     punpcklbw_m2r (*y, mm4);
00077 
00078     movq_r2r (mm3, mm5);
00079     punpcklbw_r2r (mm2, mm5);
00080 
00081     movq_r2r (mm5, mm6);
00082     punpcklbw_r2r (mm4, mm6);
00083     movq_r2m (mm6, *(dest));
00084 
00085     movq_r2r (mm5, mm6);
00086     punpckhbw_r2r (mm4, mm6);
00087     movq_r2m (mm6, *(dest + 8));
00088 
00089     movq_m2r (mmx_1s, mm4);
00090     punpckhbw_m2r (*y, mm4);
00091 
00092     punpckhbw_r2r (mm2, mm3);
00093 
00094     movq_r2r (mm3, mm6);
00095     punpcklbw_r2r (mm4, mm6);
00096     movq_r2m (mm6, *(dest + 16));
00097 
00098     punpckhbw_r2r (mm4, mm3);
00099     movq_r2m (mm3, *(dest + 24));
00100 }
00101 
00102 static mmx_t mmx_0s = {0x0000000000000000LL};
00103 static mmx_t round  = {0x0002000200020002LL};
00104 
00105 static inline void mmx_interp_start(uint8_t *left, uint8_t *right)
00106 {
00107     movd_m2r  (*left, mm5);
00108     punpcklbw_m2r (mmx_0s, mm5);
00109 
00110     movq_r2r  (mm5, mm4);
00111     paddw_r2r (mm4, mm4);
00112     paddw_r2r (mm5, mm4);
00113     paddw_m2r (round, mm4);
00114 
00115     movd_m2r  (*right, mm5);
00116     punpcklbw_m2r (mmx_0s, mm5);
00117     paddw_r2r (mm5, mm4);
00118 
00119     psrlw_i2r (2, mm4);
00120 }
00121 
00122 static inline void mmx_interp_endu(void)
00123 {
00124     movq_r2r  (mm4, mm2);
00125     psllw_i2r (8, mm2);
00126     paddb_r2r (mm4, mm2);
00127 }
00128 
00129 static inline void mmx_interp_endv(void)
00130 {
00131     movq_r2r  (mm4, mm3);
00132     psllw_i2r (8, mm3);
00133     paddb_r2r (mm4, mm3);
00134 }
00135 
00136 static inline void mmx_pack_chroma(uint8_t *u, uint8_t *v)
00137 {
00138     movd_m2r (*u,  mm2);
00139     movd_m2r (*v,  mm3);
00140     punpcklbw_r2r (mm2, mm2);
00141     punpcklbw_r2r (mm3, mm3);
00142 }
00143 #endif // MMX
00144 
00145 static inline void c_interp(uint8_t *dest, uint8_t *a, uint8_t *b,
00146                             uint8_t *c, uint8_t *d)
00147 {
00148     unsigned int tmp = (unsigned int) *a;
00149     tmp *= 3;
00150     tmp += 2;
00151     tmp += (unsigned int) *c;
00152     dest[0] = (uint8_t) (tmp >> 2);
00153 
00154     tmp = (unsigned int) *b;
00155     tmp *= 3;
00156     tmp += 2;
00157     tmp += (unsigned int) *d;
00158     dest[1] = (uint8_t) (tmp >> 2);
00159 
00160     tmp = (unsigned int) *c;
00161     tmp *= 3;
00162     tmp += 2;
00163     tmp += (unsigned int) *a;
00164     dest[2] = (uint8_t) (tmp >> 2);
00165 
00166     tmp = (unsigned int) *d;
00167     tmp *= 3;
00168     tmp += 2;
00169     tmp += (unsigned int) *b;
00170     dest[3] = (uint8_t) (tmp >> 2);
00171 }
00172 
00173 void pack_yv12progressive(const unsigned char *source,
00174                           const unsigned char *dest,
00175                           const int *offsets, const int *pitches,
00176                           const QSize &size)
00177 {
00178     const int width = size.width();
00179     const int height = size.height();
00180 
00181     if (height % 2 || width % 2)
00182         return;
00183 
00184 #ifdef MMX
00185     int residual  = width % 8;
00186     int mmx_width = width - residual;
00187     int c_start_w = mmx_width;
00188 #else
00189     int residual  = 0;
00190     int c_start_w = 0;
00191 #endif
00192 
00193     uint bgra_width  = width << 2;
00194     uint chroma_width = width >> 1;
00195 
00196     uint y_extra     = (pitches[0] << 1) - width + residual;
00197     uint u_extra     = pitches[1] - chroma_width + (residual >> 1);
00198     uint v_extra     = pitches[2] - chroma_width + (residual >> 1);
00199     uint d_extra     = bgra_width + (residual << 2);
00200 
00201     uint8_t *ypt_1   = (uint8_t *)source + offsets[0];
00202     uint8_t *ypt_2   = ypt_1 + pitches[0];
00203     uint8_t *upt     = (uint8_t *)source + offsets[1];
00204     uint8_t *vpt     = (uint8_t *)source + offsets[2];
00205     uint8_t *dst_1   = (uint8_t *) dest;
00206     uint8_t *dst_2   = dst_1 + bgra_width;
00207 
00208 #ifdef MMX
00209     for (int row = 0; row < height; row += 2)
00210     {
00211         for (int col = 0; col < mmx_width; col += 8)
00212         {
00213             mmx_pack_chroma(upt,  vpt);
00214             mmx_pack_alpha1s_low(ypt_1, ypt_2);
00215             mmx_pack_middle(dst_1, dst_2);
00216             mmx_pack_alpha1s_high(ypt_1, ypt_2);
00217             mmx_pack_end(dst_1, dst_2);
00218 
00219             dst_1 += 32; dst_2 += 32;
00220             ypt_1 += 8;  ypt_2 += 8;
00221             upt   += 4;  vpt   += 4;
00222 
00223         }
00224         ypt_1 += y_extra; ypt_2 += y_extra;
00225         upt   += u_extra; vpt   += v_extra;
00226         dst_1 += d_extra; dst_2 += d_extra;
00227     }
00228 
00229     emms();
00230 
00231     if (residual)
00232     {
00233         y_extra     = (pitches[0] << 1) - width + mmx_width;
00234         u_extra     = pitches[1] - chroma_width + (mmx_width >> 1);
00235         v_extra     = pitches[2] - chroma_width + (mmx_width >> 1);
00236         d_extra     = bgra_width + (mmx_width << 2);
00237 
00238         ypt_1   = (uint8_t *)source + offsets[0] + mmx_width;
00239         ypt_2   = ypt_1 + pitches[0];
00240         upt     = (uint8_t *)source + offsets[1] + (mmx_width>>1);
00241         vpt     = (uint8_t *)source + offsets[2] + (mmx_width>>1);
00242         dst_1   = (uint8_t *) dest + (mmx_width << 2);
00243         dst_2   = dst_1 + bgra_width;
00244     }
00245     else
00246     {
00247         return;
00248     }
00249 #endif //MMX
00250 
00251     for (int row = 0; row < height; row += 2)
00252     {
00253         for (int col = c_start_w; col < width; col += 2)
00254         {
00255             *(dst_1++) = *vpt; *(dst_2++) = *vpt;
00256             *(dst_1++) = 255;  *(dst_2++) = 255;
00257             *(dst_1++) = *upt; *(dst_2++) = *upt;
00258             *(dst_1++) = *(ypt_1++);
00259             *(dst_2++) = *(ypt_2++);
00260 
00261             *(dst_1++) = *vpt; *(dst_2++) = *(vpt++);
00262             *(dst_1++) = 255;  *(dst_2++) = 255;
00263             *(dst_1++) = *upt; *(dst_2++) = *(upt++);
00264             *(dst_1++) = *(ypt_1++);
00265             *(dst_2++) = *(ypt_2++);
00266         }
00267         ypt_1   += y_extra; ypt_2   += y_extra;
00268         upt     += u_extra; vpt     += v_extra;
00269         dst_1   += d_extra; dst_2   += d_extra;
00270     }
00271 }
00272 
00273 void pack_yv12interlaced(const unsigned char *source,
00274                          const unsigned char *dest,
00275                          const int *offsets,
00276                          const int *pitches,
00277                          const QSize &size)
00278 {
00279     int width = size.width();
00280     int height = size.height();
00281 
00282     if (height % 4 || width % 2)
00283         return;
00284 
00285     uint bgra_width  = width << 2;
00286     uint dwrap  = (bgra_width << 2) - bgra_width;
00287     uint chroma_width = width >> 1;
00288     uint ywrap     = (pitches[0] << 1) - width;
00289     uint uwrap     = (pitches[1] << 1) - chroma_width;
00290     uint vwrap     = (pitches[2] << 1) - chroma_width;
00291 
00292     uint8_t *ypt_1   = (uint8_t *)source + offsets[0];
00293     uint8_t *ypt_2   = ypt_1 + pitches[0];
00294     uint8_t *ypt_3   = ypt_1 + (pitches[0] * (height - 2));
00295     uint8_t *ypt_4   = ypt_3 + pitches[0];
00296 
00297     uint8_t *u1     = (uint8_t *)source + offsets[1];
00298     uint8_t *v1     = (uint8_t *)source + offsets[2];
00299     uint8_t *u2     = u1 + pitches[1]; uint8_t *v2     = v1 + pitches[2];
00300     uint8_t *u3     = u1 + (pitches[1] * ((height - 4) >> 1));
00301     uint8_t *v3     = v1 + (pitches[2] * ((height - 4) >> 1));
00302     uint8_t *u4     = u3 + pitches[1]; uint8_t *v4     = v3 + pitches[2];
00303 
00304     uint8_t *dst_1   = (uint8_t *) dest;
00305     uint8_t *dst_2   = dst_1 + bgra_width;
00306     uint8_t *dst_3   = dst_1 + (bgra_width * (height - 2));
00307     uint8_t *dst_4   = dst_3 + bgra_width;
00308 
00309 #ifdef MMX
00310 
00311     if (!(width % 8))
00312     {
00313         // pack first 2 and last 2 rows
00314         for (int col = 0; col < width; col += 8)
00315         {
00316             mmx_pack_chroma(u1, v1);
00317             mmx_pack_easy(dst_1, ypt_1);
00318             mmx_pack_chroma(u2, v2);
00319             mmx_pack_easy(dst_2, ypt_2);
00320             mmx_pack_chroma(u3, v3);
00321             mmx_pack_easy(dst_3, ypt_3);
00322             mmx_pack_chroma(u4, v4);
00323             mmx_pack_easy(dst_4, ypt_4);
00324 
00325             dst_1 += 32; dst_2 += 32; dst_3 += 32; dst_4 += 32;
00326             ypt_1 += 8; ypt_2 += 8; ypt_3 += 8; ypt_4 += 8;
00327             u1   += 4; v1   += 4; u2   += 4; v2   += 4;
00328             u3   += 4; v3   += 4; u4   += 4; v4   += 4;
00329         }
00330 
00331         ypt_1 += ywrap; ypt_2 += ywrap;
00332         dst_1 += bgra_width; dst_2 += bgra_width;
00333 
00334         ypt_3 = ypt_2 + pitches[0];
00335         ypt_4 = ypt_3 + pitches[0];
00336         dst_3 = dst_2 + bgra_width;
00337         dst_4 = dst_3 + bgra_width;
00338 
00339         ywrap = (pitches[0] << 2) - width;
00340 
00341         u1 = (uint8_t *)source + offsets[1];
00342         v1 = (uint8_t *)source + offsets[2];
00343         u2 = u1 + pitches[1]; v2 = v1 + pitches[2];
00344         u3 = u2 + pitches[1]; v3 = v2 + pitches[2];
00345         u4 = u3 + pitches[1]; v4 = v3 + pitches[2];
00346 
00347         height -= 4;
00348 
00349         // pack main body
00350         for (int row = 0 ; row < height; row += 4)
00351         {
00352             for (int col = 0; col < width; col += 8)
00353             {
00354                 mmx_interp_start(u1, u3); mmx_interp_endu();
00355                 mmx_interp_start(v1, v3); mmx_interp_endv();
00356                 mmx_pack_easy(dst_1, ypt_1);
00357 
00358                 mmx_interp_start(u2, u4); mmx_interp_endu();
00359                 mmx_interp_start(v2, v4); mmx_interp_endv();
00360                 mmx_pack_easy(dst_2, ypt_2);
00361 
00362                 mmx_interp_start(u3, u1); mmx_interp_endu();
00363                 mmx_interp_start(v3, v1); mmx_interp_endv();
00364                 mmx_pack_easy(dst_3, ypt_3);
00365 
00366                 mmx_interp_start(u4, u2); mmx_interp_endu();
00367                 mmx_interp_start(v4, v2); mmx_interp_endv();
00368                 mmx_pack_easy(dst_4, ypt_4);
00369 
00370                 dst_1 += 32; dst_2 += 32; dst_3 += 32; dst_4 += 32;
00371                 ypt_1 += 8; ypt_2 += 8; ypt_3 += 8; ypt_4 += 8;
00372                 u1   += 4; u2   += 4; u3   += 4; u4   += 4;
00373                 v1   += 4; v2   += 4; v3   += 4; v4   += 4;
00374             }
00375 
00376             ypt_1 += ywrap; ypt_2 += ywrap; ypt_3 += ywrap; ypt_4 += ywrap;
00377             dst_1 += dwrap; dst_2 += dwrap; dst_3 += dwrap; dst_4 += dwrap;
00378             u1 += uwrap; v1 += vwrap; u2 += uwrap; v2 += vwrap;
00379             u3 += uwrap; v3 += vwrap; u4 += uwrap;v4 += vwrap;
00380         }
00381 
00382         emms();
00383 
00384         return;
00385     }
00386 #endif //MMX
00387 
00388     // pack first 2 and last 2 rows
00389     for (int col = 0; col < width; col += 2)
00390     {
00391         *(dst_1++) = *v1; *(dst_2++) = *v2; *(dst_3++) = *v3; *(dst_4++) = *v4;
00392         *(dst_1++) = 255; *(dst_2++) = 255; *(dst_3++) = 255; *(dst_4++) = 255;
00393         *(dst_1++) = *u1; *(dst_2++) = *u2; *(dst_3++) = *u3; *(dst_4++) = *u4;
00394         *(dst_1++) = *(ypt_1++); *(dst_2++) = *(ypt_2++);
00395         *(dst_3++) = *(ypt_3++); *(dst_4++) = *(ypt_4++);
00396 
00397         *(dst_1++) = *(v1++); *(dst_2++) = *(v2++);
00398         *(dst_3++) = *(v3++); *(dst_4++) = *(v4++);
00399         *(dst_1++) = 255; *(dst_2++) = 255; *(dst_3++) = 255; *(dst_4++) = 255;
00400         *(dst_1++) = *(u1++); *(dst_2++) = *(u2++);
00401         *(dst_3++) = *(u3++); *(dst_4++) = *(u4++);
00402         *(dst_1++) = *(ypt_1++); *(dst_2++) = *(ypt_2++);
00403         *(dst_3++) = *(ypt_3++); *(dst_4++) = *(ypt_4++);
00404     }
00405 
00406     ypt_1 += ywrap; ypt_2 += ywrap;
00407     dst_1 += bgra_width; dst_2 += bgra_width;
00408 
00409     ypt_3 = ypt_2 + pitches[0];
00410     ypt_4 = ypt_3 + pitches[0];
00411     dst_3 = dst_2 + bgra_width;
00412     dst_4 = dst_3 + bgra_width;
00413 
00414     ywrap = (pitches[0] << 2) - width;
00415 
00416     u1 = (uint8_t *)source + offsets[1];
00417     v1 = (uint8_t *)source + offsets[2];
00418     u2 = u1 + pitches[1]; v2 = v1 + pitches[2];
00419     u3 = u2 + pitches[1]; v3 = v2 + pitches[2];
00420     u4 = u3 + pitches[1]; v4 = v3 + pitches[2];
00421 
00422     height -= 4;
00423 
00424     uint8_t v[4], u[4];
00425 
00426     // pack main body
00427     for (int row = 0; row < height; row += 4)
00428     {
00429         for (int col = 0; col < width; col += 2)
00430         {
00431             c_interp(v, v1, v2, v3, v4);
00432             c_interp(u, u1, u2, u3, u4);
00433 
00434             *(dst_1++) = v[0]; *(dst_2++) = v[1];
00435             *(dst_3++) = v[2]; *(dst_4++) = v[3];
00436             *(dst_1++) = 255; *(dst_2++) = 255; *(dst_3++) = 255; *(dst_4++) = 255;
00437             *(dst_1++) = u[0]; *(dst_2++) = u[1];
00438             *(dst_3++) = u[2]; *(dst_4++) = u[3];
00439             *(dst_1++) = *(ypt_1++); *(dst_2++) = *(ypt_2++);
00440             *(dst_3++) = *(ypt_3++); *(dst_4++) = *(ypt_4++);
00441 
00442             *(dst_1++) = v[0]; *(dst_2++) = v[1];
00443             *(dst_3++) = v[2]; *(dst_4++) = v[3];
00444             *(dst_1++) = 255; *(dst_2++) = 255; *(dst_3++) = 255; *(dst_4++) = 255;
00445             *(dst_1++) = u[0]; *(dst_2++) = u[1];
00446             *(dst_3++) = u[2]; *(dst_4++) = u[3];
00447             *(dst_1++) = *(ypt_1++); *(dst_2++) = *(ypt_2++);
00448             *(dst_3++) = *(ypt_3++); *(dst_4++) = *(ypt_4++);
00449 
00450             v1++; v2++; v3++; v4++;
00451             u1++; u2++; u3++; u4++;
00452         }
00453         ypt_1 += ywrap; ypt_2 += ywrap; ypt_3 += ywrap; ypt_4 += ywrap;
00454         u1 += uwrap; u2 += uwrap; u3 += uwrap; u4 += uwrap;
00455         v1 += vwrap; v2 += vwrap; v3 += vwrap; v4 += vwrap;
00456         dst_1 += dwrap; dst_2 += dwrap; dst_3 += dwrap; dst_4 += dwrap;
00457     }
00458 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends