|
MythTV
0.26-pre
|
00001 // a linear blending deinterlacer yoinked from the mplayer sources. 00002 00003 #include <stdlib.h> 00004 #include <stdio.h> 00005 00006 #include "mythconfig.h" 00007 #if HAVE_STDINT_H 00008 #include <stdint.h> 00009 #endif 00010 00011 #if HAVE_MMX || HAVE_AMD3DNOW 00012 #include "ffmpeg-mmx.h" 00013 #endif 00014 00015 #include "../mm_arch.h" 00016 #if HAVE_ALTIVEC_H 00017 #include <altivec.h> 00018 #endif 00019 00020 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" 00021 #define PAVGUSB(a,b) "pavgusb " #a ", " #b " \n\t" 00022 00023 #include "filter.h" 00024 #include "frame.h" 00025 00026 typedef struct LBFilter 00027 { 00028 VideoFilter vf; 00029 00030 /* functions and variables below here considered "private" */ 00031 int mm_flags; 00032 void (*subfilter)(unsigned char *, int); 00033 TF_STRUCT; 00034 } LBFilter; 00035 00036 void linearBlend(unsigned char *src, int stride); 00037 void linearBlendMMX(unsigned char *src, int stride); 00038 void linearBlend3DNow(unsigned char *src, int stride); 00039 int linearBlendFilterAltivec(VideoFilter *f, VideoFrame *frame, int field); 00040 00041 #if HAVE_ALTIVEC 00042 inline void linearBlendAltivec(unsigned char *src, int stride); 00043 #endif 00044 00045 #ifdef MMX 00046 00047 void linearBlendMMX(unsigned char *src, int stride) 00048 { 00049 // src += 4 * stride; 00050 __asm__ volatile( 00051 "lea (%0, %1), %%"REG_a" \n\t" 00052 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" 00053 00054 "movq (%0), %%mm0 \n\t" // L0 00055 "movq (%%"REG_a", %1), %%mm1 \n\t" // L2 00056 PAVGB(%%mm1, %%mm0) // L0+L2 00057 "movq (%%"REG_a"), %%mm2 \n\t" // L1 00058 PAVGB(%%mm2, %%mm0) 00059 "movq %%mm0, (%0) \n\t" 00060 "movq (%%"REG_a", %1, 2), %%mm0 \n\t" // L3 00061 PAVGB(%%mm0, %%mm2) // L1+L3 00062 PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 00063 "movq %%mm2, (%%"REG_a") \n\t" 00064 "movq (%0, %1, 4), %%mm2 \n\t" // L4 00065 PAVGB(%%mm2, %%mm1) // L2+L4 00066 PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 00067 "movq %%mm1, (%%"REG_a", %1) \n\t" 00068 "movq (%%"REG_d"), %%mm1 \n\t" // L5 00069 PAVGB(%%mm1, %%mm0) // L3+L5 00070 PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 00071 "movq %%mm0, (%%"REG_a", %1, 2) \n\t" 00072 "movq (%%"REG_d", %1), %%mm0 \n\t" // L6 00073 PAVGB(%%mm0, %%mm2) // L4+L6 00074 PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 00075 "movq %%mm2, (%0, %1, 4) \n\t" 00076 "movq (%%"REG_d", %1, 2), %%mm2 \n\t" // L7 00077 PAVGB(%%mm2, %%mm1) // L5+L7 00078 PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 00079 "movq %%mm1, (%%"REG_d") \n\t" 00080 "movq (%0, %1, 8), %%mm1 \n\t" // L8 00081 PAVGB(%%mm1, %%mm0) // L6+L8 00082 PAVGB(%%mm2, %%mm0) // 2L7 + L6 + L8 00083 "movq %%mm0, (%%"REG_d", %1) \n\t" 00084 "movq (%%"REG_d", %1, 4), %%mm0 \n\t" // L9 00085 PAVGB(%%mm0, %%mm2) // L7+L9 00086 PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9 00087 "movq %%mm2, (%%"REG_d", %1, 2) \n\t" 00088 00089 : : "r" (src), "r" ((long)stride) 00090 : "%"REG_a, "%"REG_d 00091 ); 00092 } 00093 00094 void linearBlend3DNow(unsigned char *src, int stride) 00095 { 00096 // src += 4 * stride; 00097 __asm__ volatile( 00098 "lea (%0, %1), %%"REG_a" \n\t" 00099 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" 00100 00101 "movq (%0), %%mm0 \n\t" // L0 00102 "movq (%%"REG_a", %1), %%mm1 \n\t" // L2 00103 PAVGUSB(%%mm1, %%mm0) // L0+L2 00104 "movq (%%"REG_a"), %%mm2 \n\t" // L1 00105 PAVGUSB(%%mm2, %%mm0) 00106 "movq %%mm0, (%0) \n\t" 00107 "movq (%%"REG_a", %1, 2), %%mm0 \n\t" // L3 00108 PAVGUSB(%%mm0, %%mm2) // L1+L3 00109 PAVGUSB(%%mm1, %%mm2) // 2L2 + L1 + L3 00110 "movq %%mm2, (%%"REG_a") \n\t" 00111 "movq (%0, %1, 4), %%mm2 \n\t" // L4 00112 PAVGUSB(%%mm2, %%mm1) // L2+L4 00113 PAVGUSB(%%mm0, %%mm1) // 2L3 + L2 + L4 00114 "movq %%mm1, (%%"REG_a", %1) \n\t" 00115 "movq (%%"REG_d"), %%mm1 \n\t" // L5 00116 PAVGUSB(%%mm1, %%mm0) // L3+L5 00117 PAVGUSB(%%mm2, %%mm0) // 2L4 + L3 + L5 00118 "movq %%mm0, (%%"REG_a", %1, 2) \n\t" 00119 "movq (%%"REG_d", %1), %%mm0 \n\t" // L6 00120 PAVGUSB(%%mm0, %%mm2) // L4+L6 00121 PAVGUSB(%%mm1, %%mm2) // 2L5 + L4 + L6 00122 "movq %%mm2, (%0, %1, 4) \n\t" 00123 "movq (%%"REG_d", %1, 2), %%mm2 \n\t" // L7 00124 PAVGUSB(%%mm2, %%mm1) // L5+L7 00125 PAVGUSB(%%mm0, %%mm1) // 2L6 + L5 + L7 00126 "movq %%mm1, (%%"REG_d") \n\t" 00127 "movq (%0, %1, 8), %%mm1 \n\t" // L8 00128 PAVGUSB(%%mm1, %%mm0) // L6+L8 00129 PAVGUSB(%%mm2, %%mm0) // 2L7 + L6 + L8 00130 "movq %%mm0, (%%"REG_d", %1) \n\t" 00131 "movq (%%"REG_d", %1, 4), %%mm0 \n\t" // L9 00132 PAVGUSB(%%mm0, %%mm2) // L7+L9 00133 PAVGUSB(%%mm1, %%mm2) // 2L8 + L7 + L9 00134 "movq %%mm2, (%%"REG_d", %1, 2) \n\t" 00135 00136 : : "r" (src), "r" ((long)stride) 00137 : "%"REG_a, "%"REG_d 00138 ); 00139 } 00140 00141 #endif 00142 00143 #if HAVE_ALTIVEC 00144 00145 inline void linearBlendAltivec(unsigned char *src, int stride) 00146 { 00147 vector unsigned char a, b, c; 00148 int i; 00149 00150 b = vec_ld(0, src); 00151 c = vec_ld(stride, src); 00152 00153 for (i = 2; i < 10; i++) 00154 { 00155 a = b; 00156 b = c; 00157 c = vec_ld(stride * i, src); 00158 vec_st(vec_avg(vec_avg(a, c), b), stride * (i - 2), src); 00159 } 00160 } 00161 00162 int linearBlendFilterAltivec(VideoFilter *f, VideoFrame *frame, int field) 00163 { 00164 (void)field; 00165 (void)f; 00166 int height = frame->height; 00167 unsigned char *yptr = frame->buf + frame->offsets[0]; 00168 int stride = frame->pitches[0]; 00169 int ymax = height - 8; 00170 int x,y; 00171 unsigned char *src = 0; 00172 unsigned char *uoff = frame->buf + frame->offsets[1]; 00173 unsigned char *voff = frame->buf + frame->offsets[2]; 00174 TF_VARS; 00175 00176 TF_START; 00177 00178 if ((stride & 0xf) || ((unsigned int)yptr & 0xf)) 00179 { 00180 for (y = 0; y < ymax; y += 8) 00181 { 00182 for (x = 0; x < stride; x += 8) 00183 { 00184 src = yptr + x + y * stride; 00185 linearBlend(src, stride); 00186 } 00187 } 00188 } 00189 else 00190 { 00191 src = yptr; 00192 for (y = 0; y < ymax; y += 8) 00193 { 00194 for (x = 0; x < stride; x += 16) 00195 { 00196 linearBlendAltivec(src, stride); 00197 src += 16; 00198 } 00199 src += stride * 7; 00200 } 00201 } 00202 00203 stride = frame->pitches[1]; 00204 ymax = height / 2 - 8; 00205 00206 if ((stride & 0xf) || ((unsigned int)uoff & 0xf)) 00207 { 00208 for (y = 0; y < ymax; y += 8) 00209 { 00210 for (x = 0; x < stride; x += 8) 00211 { 00212 src = uoff + x + y * stride; 00213 linearBlend(src, stride); 00214 00215 src = voff + x + y * stride; 00216 linearBlend(src, stride); 00217 } 00218 } 00219 } 00220 else 00221 { 00222 for (y = 0; y < ymax; y += 8) 00223 { 00224 for (x = 0; x < stride; x += 16) 00225 { 00226 linearBlendAltivec(src, stride); 00227 uoff += 16; 00228 00229 linearBlendAltivec(src, stride); 00230 voff += 16; 00231 } 00232 uoff += stride * 7; 00233 voff += stride * 7; 00234 } 00235 } 00236 00237 TF_END(vf, "LinearBlendAltivec: "); 00238 return 0; 00239 } 00240 00241 #endif /* HAVE_ALTIVEC */ 00242 00243 void linearBlend(unsigned char *src, int stride) 00244 { 00245 int a, b, c, x; 00246 00247 for (x = 0; x < 2; x++) 00248 { 00249 a= *(uint32_t*)&src[stride*0]; 00250 b= *(uint32_t*)&src[stride*1]; 00251 c= *(uint32_t*)&src[stride*2]; 00252 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1); 00253 *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1); 00254 00255 a= *(uint32_t*)&src[stride*3]; 00256 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1); 00257 *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1); 00258 00259 b= *(uint32_t*)&src[stride*4]; 00260 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1); 00261 *(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1); 00262 00263 c= *(uint32_t*)&src[stride*5]; 00264 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1); 00265 *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1); 00266 00267 a= *(uint32_t*)&src[stride*6]; 00268 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1); 00269 *(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1); 00270 00271 b= *(uint32_t*)&src[stride*7]; 00272 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1); 00273 *(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1); 00274 00275 c= *(uint32_t*)&src[stride*8]; 00276 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1); 00277 *(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1); 00278 00279 a= *(uint32_t*)&src[stride*9]; 00280 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1); 00281 *(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1); 00282 00283 src += 4; 00284 } 00285 } 00286 00287 static int linearBlendFilter(VideoFilter *f, VideoFrame *frame, int field) 00288 { 00289 (void)field; 00290 int height = frame->height; 00291 unsigned char *yptr = frame->buf + frame->offsets[0]; 00292 int stride = frame->pitches[0]; 00293 int ymax = height - 8; 00294 int x,y; 00295 unsigned char *src; 00296 unsigned char *uoff = frame->buf + frame->offsets[1]; 00297 unsigned char *voff = frame->buf + frame->offsets[2]; 00298 LBFilter *vf = (LBFilter *)f; 00299 TF_VARS; 00300 00301 TF_START; 00302 00303 for (y = 0; y < ymax; y+=8) 00304 { 00305 for (x = 0; x < stride; x+=8) 00306 { 00307 src = yptr + x + y * stride; 00308 (vf->subfilter)(src, stride); 00309 } 00310 } 00311 00312 stride = frame->pitches[1]; 00313 ymax = height / 2 - 8; 00314 00315 for (y = 0; y < ymax; y += 8) 00316 { 00317 for (x = 0; x < stride; x += 8) 00318 { 00319 src = uoff + x + y * stride; 00320 (vf->subfilter)(src, stride); 00321 00322 src = voff + x + y * stride; 00323 (vf->subfilter)(src, stride); 00324 } 00325 } 00326 00327 #if HAVE_MMX || HAVE_AMD3DNOW 00328 if ((vf->mm_flags & AV_CPU_FLAG_MMX2) || (vf->mm_flags & AV_CPU_FLAG_3DNOW)) 00329 emms(); 00330 #endif 00331 00332 TF_END(vf, "LinearBlend: "); 00333 return 0; 00334 } 00335 00336 static VideoFilter *new_filter(VideoFrameType inpixfmt, 00337 VideoFrameType outpixfmt, 00338 int *width, int *height, char *options, 00339 int threads) 00340 { 00341 LBFilter *filter; 00342 (void)width; 00343 (void)height; 00344 (void)options; 00345 (void)threads; 00346 if (inpixfmt != FMT_YV12 || outpixfmt != FMT_YV12) 00347 return NULL; 00348 00349 filter = malloc(sizeof(LBFilter)); 00350 00351 if (filter == NULL) 00352 { 00353 fprintf(stderr,"Couldn't allocate memory for filter\n"); 00354 return NULL; 00355 } 00356 00357 filter->vf.filter = &linearBlendFilter; 00358 filter->subfilter = &linearBlend; /* Default, non accellerated */ 00359 filter->mm_flags = av_get_cpu_flags(); 00360 if (HAVE_MMX && filter->mm_flags & AV_CPU_FLAG_MMX2) 00361 filter->subfilter = &linearBlendMMX; 00362 else if (HAVE_AMD3DNOW && filter->mm_flags & AV_CPU_FLAG_3DNOW) 00363 filter->subfilter = &linearBlend3DNow; 00364 else if (HAVE_ALTIVEC && filter->mm_flags & AV_CPU_FLAG_ALTIVEC) 00365 filter->vf.filter = &linearBlendFilterAltivec; 00366 00367 filter->vf.cleanup = NULL; 00368 TF_INIT(filter); 00369 return (VideoFilter *)filter; 00370 } 00371 00372 static FmtConv FmtList[] = 00373 { 00374 { FMT_YV12, FMT_YV12 }, 00375 FMT_NULL 00376 }; 00377 00378 ConstFilterInfo filter_table[] = 00379 { 00380 { 00381 filter_init: &new_filter, 00382 name: "linearblend", 00383 descript: "fast blending deinterlace filter", 00384 formats: FmtList, 00385 libname: NULL 00386 }, 00387 FILT_NULL 00388 };
1.7.6.1