|
MythTV
0.26-pre
|
00001 // -*- Mode: c++ -*- 00002 00003 #include <stdint.h> 00004 #include <QSize> 00005 #include "compat.h" 00006 #include "util-opengl.h" 00007 00008 #ifdef MMX 00009 extern "C" { 00010 #include "ffmpeg-mmx.h" 00011 } 00012 00013 static mmx_t mmx_1s = {0xffffffffffffffffLL}; 00014 00015 static inline void mmx_pack_alpha1s_high(uint8_t *y1, uint8_t *y2) 00016 { 00017 movq_m2r (mmx_1s, mm4); 00018 punpckhbw_m2r (*y1, mm4); 00019 movq_m2r (mmx_1s, mm7); 00020 punpckhbw_m2r (*y2, mm7); 00021 } 00022 00023 static inline void mmx_pack_alpha1s_low(uint8_t *y1, uint8_t *y2) 00024 { 00025 movq_m2r (mmx_1s, mm4); 00026 punpcklbw_m2r (*y1, mm4); 00027 movq_m2r (mmx_1s, mm7); 00028 punpcklbw_m2r (*y2, mm7); 00029 } 00030 00031 static inline void mmx_pack_middle(uint8_t *dest1, uint8_t *dest2) 00032 { 00033 movq_r2r (mm3, mm5); 00034 punpcklbw_r2r (mm2, mm5); 00035 00036 movq_r2r (mm5, mm6); 00037 punpcklbw_r2r (mm4, mm6); 00038 movq_r2m (mm6, *(dest1)); 00039 00040 movq_r2r (mm5, mm6); 00041 punpckhbw_r2r (mm4, mm6); 00042 movq_r2m (mm6, *(dest1 + 8)); 00043 00044 movq_r2r (mm5, mm6); 00045 punpcklbw_r2r (mm7, mm6); 00046 movq_r2m (mm6, *(dest2)); 00047 00048 movq_r2r (mm5, mm6); 00049 punpckhbw_r2r (mm7, mm6); 00050 movq_r2m (mm6, *(dest2 + 8)); 00051 } 00052 00053 static inline void mmx_pack_end(uint8_t *dest1, uint8_t *dest2) 00054 { 00055 punpckhbw_r2r (mm2, mm3); 00056 00057 movq_r2r (mm3, mm6); 00058 punpcklbw_r2r (mm4, mm6); 00059 movq_r2m (mm6, *(dest1 + 16)); 00060 00061 movq_r2r (mm3, mm6); 00062 punpckhbw_r2r (mm4, mm6); 00063 movq_r2m (mm6, *(dest1 + 24)); 00064 00065 movq_r2r (mm3, mm6); 00066 punpcklbw_r2r (mm7, mm6); 00067 movq_r2m (mm6, *(dest2 + 16)); 00068 00069 punpckhbw_r2r (mm7, mm3); 00070 movq_r2m (mm3, *(dest2 + 24)); 00071 } 00072 00073 static inline void mmx_pack_easy(uint8_t *dest, uint8_t *y) 00074 { 00075 movq_m2r (mmx_1s, mm4); 00076 punpcklbw_m2r (*y, mm4); 00077 00078 movq_r2r (mm3, mm5); 00079 punpcklbw_r2r (mm2, mm5); 00080 00081 movq_r2r (mm5, mm6); 00082 punpcklbw_r2r (mm4, mm6); 00083 movq_r2m (mm6, *(dest)); 00084 00085 movq_r2r (mm5, mm6); 00086 punpckhbw_r2r (mm4, mm6); 00087 movq_r2m (mm6, *(dest + 8)); 00088 00089 movq_m2r (mmx_1s, mm4); 00090 punpckhbw_m2r (*y, mm4); 00091 00092 punpckhbw_r2r (mm2, mm3); 00093 00094 movq_r2r (mm3, mm6); 00095 punpcklbw_r2r (mm4, mm6); 00096 movq_r2m (mm6, *(dest + 16)); 00097 00098 punpckhbw_r2r (mm4, mm3); 00099 movq_r2m (mm3, *(dest + 24)); 00100 } 00101 00102 static mmx_t mmx_0s = {0x0000000000000000LL}; 00103 static mmx_t round = {0x0002000200020002LL}; 00104 00105 static inline void mmx_interp_start(uint8_t *left, uint8_t *right) 00106 { 00107 movd_m2r (*left, mm5); 00108 punpcklbw_m2r (mmx_0s, mm5); 00109 00110 movq_r2r (mm5, mm4); 00111 paddw_r2r (mm4, mm4); 00112 paddw_r2r (mm5, mm4); 00113 paddw_m2r (round, mm4); 00114 00115 movd_m2r (*right, mm5); 00116 punpcklbw_m2r (mmx_0s, mm5); 00117 paddw_r2r (mm5, mm4); 00118 00119 psrlw_i2r (2, mm4); 00120 } 00121 00122 static inline void mmx_interp_endu(void) 00123 { 00124 movq_r2r (mm4, mm2); 00125 psllw_i2r (8, mm2); 00126 paddb_r2r (mm4, mm2); 00127 } 00128 00129 static inline void mmx_interp_endv(void) 00130 { 00131 movq_r2r (mm4, mm3); 00132 psllw_i2r (8, mm3); 00133 paddb_r2r (mm4, mm3); 00134 } 00135 00136 static inline void mmx_pack_chroma(uint8_t *u, uint8_t *v) 00137 { 00138 movd_m2r (*u, mm2); 00139 movd_m2r (*v, mm3); 00140 punpcklbw_r2r (mm2, mm2); 00141 punpcklbw_r2r (mm3, mm3); 00142 } 00143 #endif // MMX 00144 00145 static inline void c_interp(uint8_t *dest, uint8_t *a, uint8_t *b, 00146 uint8_t *c, uint8_t *d) 00147 { 00148 unsigned int tmp = (unsigned int) *a; 00149 tmp *= 3; 00150 tmp += 2; 00151 tmp += (unsigned int) *c; 00152 dest[0] = (uint8_t) (tmp >> 2); 00153 00154 tmp = (unsigned int) *b; 00155 tmp *= 3; 00156 tmp += 2; 00157 tmp += (unsigned int) *d; 00158 dest[1] = (uint8_t) (tmp >> 2); 00159 00160 tmp = (unsigned int) *c; 00161 tmp *= 3; 00162 tmp += 2; 00163 tmp += (unsigned int) *a; 00164 dest[2] = (uint8_t) (tmp >> 2); 00165 00166 tmp = (unsigned int) *d; 00167 tmp *= 3; 00168 tmp += 2; 00169 tmp += (unsigned int) *b; 00170 dest[3] = (uint8_t) (tmp >> 2); 00171 } 00172 00173 void pack_yv12progressive(const unsigned char *source, 00174 const unsigned char *dest, 00175 const int *offsets, const int *pitches, 00176 const QSize &size) 00177 { 00178 const int width = size.width(); 00179 const int height = size.height(); 00180 00181 if (height % 2 || width % 2) 00182 return; 00183 00184 #ifdef MMX 00185 int residual = width % 8; 00186 int mmx_width = width - residual; 00187 int c_start_w = mmx_width; 00188 #else 00189 int residual = 0; 00190 int c_start_w = 0; 00191 #endif 00192 00193 uint bgra_width = width << 2; 00194 uint chroma_width = width >> 1; 00195 00196 uint y_extra = (pitches[0] << 1) - width + residual; 00197 uint u_extra = pitches[1] - chroma_width + (residual >> 1); 00198 uint v_extra = pitches[2] - chroma_width + (residual >> 1); 00199 uint d_extra = bgra_width + (residual << 2); 00200 00201 uint8_t *ypt_1 = (uint8_t *)source + offsets[0]; 00202 uint8_t *ypt_2 = ypt_1 + pitches[0]; 00203 uint8_t *upt = (uint8_t *)source + offsets[1]; 00204 uint8_t *vpt = (uint8_t *)source + offsets[2]; 00205 uint8_t *dst_1 = (uint8_t *) dest; 00206 uint8_t *dst_2 = dst_1 + bgra_width; 00207 00208 #ifdef MMX 00209 for (int row = 0; row < height; row += 2) 00210 { 00211 for (int col = 0; col < mmx_width; col += 8) 00212 { 00213 mmx_pack_chroma(upt, vpt); 00214 mmx_pack_alpha1s_low(ypt_1, ypt_2); 00215 mmx_pack_middle(dst_1, dst_2); 00216 mmx_pack_alpha1s_high(ypt_1, ypt_2); 00217 mmx_pack_end(dst_1, dst_2); 00218 00219 dst_1 += 32; dst_2 += 32; 00220 ypt_1 += 8; ypt_2 += 8; 00221 upt += 4; vpt += 4; 00222 00223 } 00224 ypt_1 += y_extra; ypt_2 += y_extra; 00225 upt += u_extra; vpt += v_extra; 00226 dst_1 += d_extra; dst_2 += d_extra; 00227 } 00228 00229 emms(); 00230 00231 if (residual) 00232 { 00233 y_extra = (pitches[0] << 1) - width + mmx_width; 00234 u_extra = pitches[1] - chroma_width + (mmx_width >> 1); 00235 v_extra = pitches[2] - chroma_width + (mmx_width >> 1); 00236 d_extra = bgra_width + (mmx_width << 2); 00237 00238 ypt_1 = (uint8_t *)source + offsets[0] + mmx_width; 00239 ypt_2 = ypt_1 + pitches[0]; 00240 upt = (uint8_t *)source + offsets[1] + (mmx_width>>1); 00241 vpt = (uint8_t *)source + offsets[2] + (mmx_width>>1); 00242 dst_1 = (uint8_t *) dest + (mmx_width << 2); 00243 dst_2 = dst_1 + bgra_width; 00244 } 00245 else 00246 { 00247 return; 00248 } 00249 #endif //MMX 00250 00251 for (int row = 0; row < height; row += 2) 00252 { 00253 for (int col = c_start_w; col < width; col += 2) 00254 { 00255 *(dst_1++) = *vpt; *(dst_2++) = *vpt; 00256 *(dst_1++) = 255; *(dst_2++) = 255; 00257 *(dst_1++) = *upt; *(dst_2++) = *upt; 00258 *(dst_1++) = *(ypt_1++); 00259 *(dst_2++) = *(ypt_2++); 00260 00261 *(dst_1++) = *vpt; *(dst_2++) = *(vpt++); 00262 *(dst_1++) = 255; *(dst_2++) = 255; 00263 *(dst_1++) = *upt; *(dst_2++) = *(upt++); 00264 *(dst_1++) = *(ypt_1++); 00265 *(dst_2++) = *(ypt_2++); 00266 } 00267 ypt_1 += y_extra; ypt_2 += y_extra; 00268 upt += u_extra; vpt += v_extra; 00269 dst_1 += d_extra; dst_2 += d_extra; 00270 } 00271 } 00272 00273 void pack_yv12interlaced(const unsigned char *source, 00274 const unsigned char *dest, 00275 const int *offsets, 00276 const int *pitches, 00277 const QSize &size) 00278 { 00279 int width = size.width(); 00280 int height = size.height(); 00281 00282 if (height % 4 || width % 2) 00283 return; 00284 00285 uint bgra_width = width << 2; 00286 uint dwrap = (bgra_width << 2) - bgra_width; 00287 uint chroma_width = width >> 1; 00288 uint ywrap = (pitches[0] << 1) - width; 00289 uint uwrap = (pitches[1] << 1) - chroma_width; 00290 uint vwrap = (pitches[2] << 1) - chroma_width; 00291 00292 uint8_t *ypt_1 = (uint8_t *)source + offsets[0]; 00293 uint8_t *ypt_2 = ypt_1 + pitches[0]; 00294 uint8_t *ypt_3 = ypt_1 + (pitches[0] * (height - 2)); 00295 uint8_t *ypt_4 = ypt_3 + pitches[0]; 00296 00297 uint8_t *u1 = (uint8_t *)source + offsets[1]; 00298 uint8_t *v1 = (uint8_t *)source + offsets[2]; 00299 uint8_t *u2 = u1 + pitches[1]; uint8_t *v2 = v1 + pitches[2]; 00300 uint8_t *u3 = u1 + (pitches[1] * ((height - 4) >> 1)); 00301 uint8_t *v3 = v1 + (pitches[2] * ((height - 4) >> 1)); 00302 uint8_t *u4 = u3 + pitches[1]; uint8_t *v4 = v3 + pitches[2]; 00303 00304 uint8_t *dst_1 = (uint8_t *) dest; 00305 uint8_t *dst_2 = dst_1 + bgra_width; 00306 uint8_t *dst_3 = dst_1 + (bgra_width * (height - 2)); 00307 uint8_t *dst_4 = dst_3 + bgra_width; 00308 00309 #ifdef MMX 00310 00311 if (!(width % 8)) 00312 { 00313 // pack first 2 and last 2 rows 00314 for (int col = 0; col < width; col += 8) 00315 { 00316 mmx_pack_chroma(u1, v1); 00317 mmx_pack_easy(dst_1, ypt_1); 00318 mmx_pack_chroma(u2, v2); 00319 mmx_pack_easy(dst_2, ypt_2); 00320 mmx_pack_chroma(u3, v3); 00321 mmx_pack_easy(dst_3, ypt_3); 00322 mmx_pack_chroma(u4, v4); 00323 mmx_pack_easy(dst_4, ypt_4); 00324 00325 dst_1 += 32; dst_2 += 32; dst_3 += 32; dst_4 += 32; 00326 ypt_1 += 8; ypt_2 += 8; ypt_3 += 8; ypt_4 += 8; 00327 u1 += 4; v1 += 4; u2 += 4; v2 += 4; 00328 u3 += 4; v3 += 4; u4 += 4; v4 += 4; 00329 } 00330 00331 ypt_1 += ywrap; ypt_2 += ywrap; 00332 dst_1 += bgra_width; dst_2 += bgra_width; 00333 00334 ypt_3 = ypt_2 + pitches[0]; 00335 ypt_4 = ypt_3 + pitches[0]; 00336 dst_3 = dst_2 + bgra_width; 00337 dst_4 = dst_3 + bgra_width; 00338 00339 ywrap = (pitches[0] << 2) - width; 00340 00341 u1 = (uint8_t *)source + offsets[1]; 00342 v1 = (uint8_t *)source + offsets[2]; 00343 u2 = u1 + pitches[1]; v2 = v1 + pitches[2]; 00344 u3 = u2 + pitches[1]; v3 = v2 + pitches[2]; 00345 u4 = u3 + pitches[1]; v4 = v3 + pitches[2]; 00346 00347 height -= 4; 00348 00349 // pack main body 00350 for (int row = 0 ; row < height; row += 4) 00351 { 00352 for (int col = 0; col < width; col += 8) 00353 { 00354 mmx_interp_start(u1, u3); mmx_interp_endu(); 00355 mmx_interp_start(v1, v3); mmx_interp_endv(); 00356 mmx_pack_easy(dst_1, ypt_1); 00357 00358 mmx_interp_start(u2, u4); mmx_interp_endu(); 00359 mmx_interp_start(v2, v4); mmx_interp_endv(); 00360 mmx_pack_easy(dst_2, ypt_2); 00361 00362 mmx_interp_start(u3, u1); mmx_interp_endu(); 00363 mmx_interp_start(v3, v1); mmx_interp_endv(); 00364 mmx_pack_easy(dst_3, ypt_3); 00365 00366 mmx_interp_start(u4, u2); mmx_interp_endu(); 00367 mmx_interp_start(v4, v2); mmx_interp_endv(); 00368 mmx_pack_easy(dst_4, ypt_4); 00369 00370 dst_1 += 32; dst_2 += 32; dst_3 += 32; dst_4 += 32; 00371 ypt_1 += 8; ypt_2 += 8; ypt_3 += 8; ypt_4 += 8; 00372 u1 += 4; u2 += 4; u3 += 4; u4 += 4; 00373 v1 += 4; v2 += 4; v3 += 4; v4 += 4; 00374 } 00375 00376 ypt_1 += ywrap; ypt_2 += ywrap; ypt_3 += ywrap; ypt_4 += ywrap; 00377 dst_1 += dwrap; dst_2 += dwrap; dst_3 += dwrap; dst_4 += dwrap; 00378 u1 += uwrap; v1 += vwrap; u2 += uwrap; v2 += vwrap; 00379 u3 += uwrap; v3 += vwrap; u4 += uwrap;v4 += vwrap; 00380 } 00381 00382 emms(); 00383 00384 return; 00385 } 00386 #endif //MMX 00387 00388 // pack first 2 and last 2 rows 00389 for (int col = 0; col < width; col += 2) 00390 { 00391 *(dst_1++) = *v1; *(dst_2++) = *v2; *(dst_3++) = *v3; *(dst_4++) = *v4; 00392 *(dst_1++) = 255; *(dst_2++) = 255; *(dst_3++) = 255; *(dst_4++) = 255; 00393 *(dst_1++) = *u1; *(dst_2++) = *u2; *(dst_3++) = *u3; *(dst_4++) = *u4; 00394 *(dst_1++) = *(ypt_1++); *(dst_2++) = *(ypt_2++); 00395 *(dst_3++) = *(ypt_3++); *(dst_4++) = *(ypt_4++); 00396 00397 *(dst_1++) = *(v1++); *(dst_2++) = *(v2++); 00398 *(dst_3++) = *(v3++); *(dst_4++) = *(v4++); 00399 *(dst_1++) = 255; *(dst_2++) = 255; *(dst_3++) = 255; *(dst_4++) = 255; 00400 *(dst_1++) = *(u1++); *(dst_2++) = *(u2++); 00401 *(dst_3++) = *(u3++); *(dst_4++) = *(u4++); 00402 *(dst_1++) = *(ypt_1++); *(dst_2++) = *(ypt_2++); 00403 *(dst_3++) = *(ypt_3++); *(dst_4++) = *(ypt_4++); 00404 } 00405 00406 ypt_1 += ywrap; ypt_2 += ywrap; 00407 dst_1 += bgra_width; dst_2 += bgra_width; 00408 00409 ypt_3 = ypt_2 + pitches[0]; 00410 ypt_4 = ypt_3 + pitches[0]; 00411 dst_3 = dst_2 + bgra_width; 00412 dst_4 = dst_3 + bgra_width; 00413 00414 ywrap = (pitches[0] << 2) - width; 00415 00416 u1 = (uint8_t *)source + offsets[1]; 00417 v1 = (uint8_t *)source + offsets[2]; 00418 u2 = u1 + pitches[1]; v2 = v1 + pitches[2]; 00419 u3 = u2 + pitches[1]; v3 = v2 + pitches[2]; 00420 u4 = u3 + pitches[1]; v4 = v3 + pitches[2]; 00421 00422 height -= 4; 00423 00424 uint8_t v[4], u[4]; 00425 00426 // pack main body 00427 for (int row = 0; row < height; row += 4) 00428 { 00429 for (int col = 0; col < width; col += 2) 00430 { 00431 c_interp(v, v1, v2, v3, v4); 00432 c_interp(u, u1, u2, u3, u4); 00433 00434 *(dst_1++) = v[0]; *(dst_2++) = v[1]; 00435 *(dst_3++) = v[2]; *(dst_4++) = v[3]; 00436 *(dst_1++) = 255; *(dst_2++) = 255; *(dst_3++) = 255; *(dst_4++) = 255; 00437 *(dst_1++) = u[0]; *(dst_2++) = u[1]; 00438 *(dst_3++) = u[2]; *(dst_4++) = u[3]; 00439 *(dst_1++) = *(ypt_1++); *(dst_2++) = *(ypt_2++); 00440 *(dst_3++) = *(ypt_3++); *(dst_4++) = *(ypt_4++); 00441 00442 *(dst_1++) = v[0]; *(dst_2++) = v[1]; 00443 *(dst_3++) = v[2]; *(dst_4++) = v[3]; 00444 *(dst_1++) = 255; *(dst_2++) = 255; *(dst_3++) = 255; *(dst_4++) = 255; 00445 *(dst_1++) = u[0]; *(dst_2++) = u[1]; 00446 *(dst_3++) = u[2]; *(dst_4++) = u[3]; 00447 *(dst_1++) = *(ypt_1++); *(dst_2++) = *(ypt_2++); 00448 *(dst_3++) = *(ypt_3++); *(dst_4++) = *(ypt_4++); 00449 00450 v1++; v2++; v3++; v4++; 00451 u1++; u2++; u3++; u4++; 00452 } 00453 ypt_1 += ywrap; ypt_2 += ywrap; ypt_3 += ywrap; ypt_4 += ywrap; 00454 u1 += uwrap; u2 += uwrap; u3 += uwrap; u4 += uwrap; 00455 v1 += vwrap; v2 += vwrap; v3 += vwrap; v4 += vwrap; 00456 dst_1 += dwrap; dst_2 += dwrap; dst_3 += dwrap; dst_4 += dwrap; 00457 } 00458 }
1.7.6.1