MythTV  0.26-pre
filter_linearblend.c
Go to the documentation of this file.
00001 // a linear blending deinterlacer yoinked from the mplayer sources.
00002 
00003 #include <stdlib.h>
00004 #include <stdio.h>
00005 
00006 #include "mythconfig.h"
00007 #if HAVE_STDINT_H
00008 #include <stdint.h>
00009 #endif
00010 
00011 #if HAVE_MMX || HAVE_AMD3DNOW
00012 #include "ffmpeg-mmx.h"
00013 #endif
00014 
00015 #include "../mm_arch.h"
00016 #if HAVE_ALTIVEC_H
00017     #include <altivec.h>
00018 #endif
00019 
00020 #define PAVGB(a,b)   "pavgb " #a ", " #b " \n\t"
00021 #define PAVGUSB(a,b) "pavgusb " #a ", " #b " \n\t"
00022 
00023 #include "filter.h"
00024 #include "frame.h"
00025 
00026 typedef struct LBFilter
00027 {
00028     VideoFilter vf;
00029 
00030     /* functions and variables below here considered "private" */
00031     int mm_flags;
00032     void (*subfilter)(unsigned char *, int);
00033     TF_STRUCT;
00034 } LBFilter;
00035 
00036 void linearBlend(unsigned char *src, int stride);
00037 void linearBlendMMX(unsigned char *src, int stride);
00038 void linearBlend3DNow(unsigned char *src, int stride);
00039 int linearBlendFilterAltivec(VideoFilter *f, VideoFrame *frame, int field);
00040 
00041 #if HAVE_ALTIVEC
00042 inline void linearBlendAltivec(unsigned char *src, int stride);
00043 #endif
00044 
00045 #ifdef MMX
00046 
00047 void linearBlendMMX(unsigned char *src, int stride)
00048 {
00049 //  src += 4 * stride;
00050     __asm__ volatile(
00051        "lea (%0, %1), %%"REG_a"                        \n\t"
00052        "lea (%%"REG_a", %1, 4), %%"REG_d"              \n\t"
00053 
00054        "movq (%0), %%mm0                               \n\t" // L0
00055        "movq (%%"REG_a", %1), %%mm1                    \n\t" // L2
00056        PAVGB(%%mm1, %%mm0)                                   // L0+L2
00057        "movq (%%"REG_a"), %%mm2                            \n\t" // L1
00058        PAVGB(%%mm2, %%mm0)
00059        "movq %%mm0, (%0)                               \n\t"
00060        "movq (%%"REG_a", %1, 2), %%mm0                     \n\t" // L3
00061        PAVGB(%%mm0, %%mm2)                                   // L1+L3
00062        PAVGB(%%mm1, %%mm2)                                   // 2L2 + L1 + L3
00063        "movq %%mm2, (%%"REG_a")                            \n\t"
00064        "movq (%0, %1, 4), %%mm2                        \n\t" // L4
00065        PAVGB(%%mm2, %%mm1)                                   // L2+L4
00066        PAVGB(%%mm0, %%mm1)                                   // 2L3 + L2 + L4
00067        "movq %%mm1, (%%"REG_a", %1)                        \n\t"
00068        "movq (%%"REG_d"), %%mm1                            \n\t" // L5
00069        PAVGB(%%mm1, %%mm0)                                   // L3+L5
00070        PAVGB(%%mm2, %%mm0)                                   // 2L4 + L3 + L5
00071        "movq %%mm0, (%%"REG_a", %1, 2)                     \n\t"
00072        "movq (%%"REG_d", %1), %%mm0                        \n\t" // L6
00073        PAVGB(%%mm0, %%mm2)                                   // L4+L6
00074        PAVGB(%%mm1, %%mm2)                                   // 2L5 + L4 + L6
00075        "movq %%mm2, (%0, %1, 4)                        \n\t"
00076        "movq (%%"REG_d", %1, 2), %%mm2                     \n\t" // L7
00077        PAVGB(%%mm2, %%mm1)                                   // L5+L7
00078        PAVGB(%%mm0, %%mm1)                                   // 2L6 + L5 + L7
00079        "movq %%mm1, (%%"REG_d")                            \n\t"
00080        "movq (%0, %1, 8), %%mm1                        \n\t" // L8
00081        PAVGB(%%mm1, %%mm0)                                   // L6+L8
00082        PAVGB(%%mm2, %%mm0)                                   // 2L7 + L6 + L8
00083        "movq %%mm0, (%%"REG_d", %1)                        \n\t"
00084        "movq (%%"REG_d", %1, 4), %%mm0                     \n\t" // L9
00085        PAVGB(%%mm0, %%mm2)                                   // L7+L9
00086        PAVGB(%%mm1, %%mm2)                                   // 2L8 + L7 + L9
00087        "movq %%mm2, (%%"REG_d", %1, 2)                     \n\t"
00088 
00089        : : "r" (src), "r" ((long)stride)
00090        : "%"REG_a, "%"REG_d
00091     );
00092 }
00093 
00094 void linearBlend3DNow(unsigned char *src, int stride)
00095 {
00096 //  src += 4 * stride;
00097     __asm__ volatile(
00098        "lea (%0, %1), %%"REG_a"                           \n\t"
00099        "lea (%%"REG_a", %1, 4), %%"REG_d"                     \n\t"
00100 
00101        "movq (%0), %%mm0                               \n\t" // L0
00102        "movq (%%"REG_a", %1), %%mm1                        \n\t" // L2
00103        PAVGUSB(%%mm1, %%mm0)                                 // L0+L2
00104        "movq (%%"REG_a"), %%mm2                            \n\t" // L1
00105        PAVGUSB(%%mm2, %%mm0)
00106        "movq %%mm0, (%0)                               \n\t"
00107        "movq (%%"REG_a", %1, 2), %%mm0                     \n\t" // L3
00108        PAVGUSB(%%mm0, %%mm2)                                 // L1+L3
00109        PAVGUSB(%%mm1, %%mm2)                                 // 2L2 + L1 + L3
00110        "movq %%mm2, (%%"REG_a")                            \n\t"
00111        "movq (%0, %1, 4), %%mm2                        \n\t" // L4
00112        PAVGUSB(%%mm2, %%mm1)                                 // L2+L4
00113        PAVGUSB(%%mm0, %%mm1)                                 // 2L3 + L2 + L4
00114        "movq %%mm1, (%%"REG_a", %1)                        \n\t"
00115        "movq (%%"REG_d"), %%mm1                            \n\t" // L5
00116        PAVGUSB(%%mm1, %%mm0)                                 // L3+L5
00117        PAVGUSB(%%mm2, %%mm0)                                 // 2L4 + L3 + L5
00118        "movq %%mm0, (%%"REG_a", %1, 2)                     \n\t"
00119        "movq (%%"REG_d", %1), %%mm0                        \n\t" // L6
00120        PAVGUSB(%%mm0, %%mm2)                                 // L4+L6
00121        PAVGUSB(%%mm1, %%mm2)                                 // 2L5 + L4 + L6
00122        "movq %%mm2, (%0, %1, 4)                        \n\t"
00123        "movq (%%"REG_d", %1, 2), %%mm2                     \n\t" // L7
00124        PAVGUSB(%%mm2, %%mm1)                                 // L5+L7
00125        PAVGUSB(%%mm0, %%mm1)                                 // 2L6 + L5 + L7
00126        "movq %%mm1, (%%"REG_d")                            \n\t"
00127        "movq (%0, %1, 8), %%mm1                        \n\t" // L8
00128        PAVGUSB(%%mm1, %%mm0)                                 // L6+L8
00129        PAVGUSB(%%mm2, %%mm0)                                 // 2L7 + L6 + L8
00130        "movq %%mm0, (%%"REG_d", %1)                        \n\t"
00131        "movq (%%"REG_d", %1, 4), %%mm0                     \n\t" // L9
00132        PAVGUSB(%%mm0, %%mm2)                                 // L7+L9
00133        PAVGUSB(%%mm1, %%mm2)                                 // 2L8 + L7 + L9
00134        "movq %%mm2, (%%"REG_d", %1, 2)                     \n\t"
00135 
00136        : : "r" (src), "r" ((long)stride)
00137        : "%"REG_a, "%"REG_d
00138     );
00139 }
00140 
00141 #endif
00142 
00143 #if HAVE_ALTIVEC
00144 
00145 inline void linearBlendAltivec(unsigned char *src, int stride)
00146 {
00147     vector unsigned char a, b, c;
00148     int i;
00149     
00150     b = vec_ld(0, src);
00151     c = vec_ld(stride, src);
00152     
00153     for (i = 2; i < 10; i++)
00154     {
00155         a = b;
00156         b = c;
00157         c = vec_ld(stride * i, src);
00158         vec_st(vec_avg(vec_avg(a, c), b), stride * (i - 2), src);
00159     }
00160 }
00161 
00162 int linearBlendFilterAltivec(VideoFilter *f, VideoFrame *frame, int field)
00163 {
00164     (void)field;
00165     (void)f;
00166     int height = frame->height;
00167     unsigned char *yptr = frame->buf + frame->offsets[0];
00168     int stride = frame->pitches[0];
00169     int ymax = height - 8;
00170     int x,y;
00171     unsigned char *src = 0;
00172     unsigned char *uoff = frame->buf + frame->offsets[1];
00173     unsigned char *voff = frame->buf + frame->offsets[2];
00174     TF_VARS;
00175 
00176     TF_START;
00177  
00178     if ((stride & 0xf) || ((unsigned int)yptr & 0xf))
00179     {
00180         for (y = 0; y < ymax; y += 8)
00181         {  
00182             for (x = 0; x < stride; x += 8)
00183             {
00184                 src = yptr + x + y * stride;  
00185                 linearBlend(src, stride);  
00186             }
00187         }
00188     }
00189     else
00190     {
00191         src = yptr;
00192         for (y = 0; y < ymax; y += 8)
00193         {  
00194             for (x = 0; x < stride; x += 16)
00195             {
00196                 linearBlendAltivec(src, stride);
00197                 src += 16;
00198             }
00199             src += stride * 7;
00200         }
00201     }
00202  
00203     stride = frame->pitches[1];
00204     ymax = height / 2 - 8;
00205   
00206     if ((stride & 0xf) || ((unsigned int)uoff & 0xf))
00207     {
00208         for (y = 0; y < ymax; y += 8)
00209         {
00210             for (x = 0; x < stride; x += 8)
00211             {
00212                 src = uoff + x + y * stride;
00213                 linearBlend(src, stride);
00214            
00215                 src = voff + x + y * stride;
00216                 linearBlend(src, stride);
00217             }
00218         }
00219     }
00220     else
00221     {
00222         for (y = 0; y < ymax; y += 8)
00223         {
00224             for (x = 0; x < stride; x += 16)
00225             {
00226                 linearBlendAltivec(src, stride);
00227                 uoff += 16;
00228            
00229                 linearBlendAltivec(src, stride);
00230                 voff += 16;
00231             }
00232             uoff += stride * 7;
00233             voff += stride * 7;
00234         }
00235     }
00236 
00237     TF_END(vf, "LinearBlendAltivec: ");
00238     return 0;
00239 }
00240 
00241 #endif /* HAVE_ALTIVEC */
00242 
00243 void linearBlend(unsigned char *src, int stride)
00244 {
00245     int a, b, c, x;
00246 
00247     for (x = 0; x < 2; x++)
00248     {
00249         a= *(uint32_t*)&src[stride*0];
00250         b= *(uint32_t*)&src[stride*1];
00251         c= *(uint32_t*)&src[stride*2];
00252         a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
00253         *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
00254 
00255         a= *(uint32_t*)&src[stride*3];
00256         b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
00257         *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
00258 
00259         b= *(uint32_t*)&src[stride*4];
00260         c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
00261         *(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
00262 
00263         c= *(uint32_t*)&src[stride*5];
00264         a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
00265         *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
00266 
00267         a= *(uint32_t*)&src[stride*6];
00268         b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
00269         *(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
00270 
00271         b= *(uint32_t*)&src[stride*7];
00272         c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
00273         *(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
00274 
00275         c= *(uint32_t*)&src[stride*8];
00276         a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
00277         *(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
00278 
00279         a= *(uint32_t*)&src[stride*9];
00280         b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
00281         *(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
00282 
00283         src += 4;
00284     }
00285 }
00286 
00287 static int linearBlendFilter(VideoFilter *f, VideoFrame *frame, int  field)
00288 {
00289     (void)field;
00290     int height = frame->height;
00291     unsigned char *yptr = frame->buf + frame->offsets[0];
00292     int stride = frame->pitches[0];
00293     int ymax = height - 8;
00294     int x,y;
00295     unsigned char *src;
00296     unsigned char *uoff = frame->buf + frame->offsets[1];
00297     unsigned char *voff = frame->buf + frame->offsets[2];
00298     LBFilter *vf = (LBFilter *)f;
00299     TF_VARS;
00300 
00301     TF_START;
00302 
00303     for (y = 0; y < ymax; y+=8)
00304     {  
00305         for (x = 0; x < stride; x+=8)
00306         {
00307             src = yptr + x + y * stride;
00308             (vf->subfilter)(src, stride);  
00309         }
00310     }
00311  
00312     stride = frame->pitches[1];
00313     ymax = height / 2 - 8;
00314   
00315     for (y = 0; y < ymax; y += 8)
00316     {
00317         for (x = 0; x < stride; x += 8)
00318         {
00319             src = uoff + x + y * stride;
00320             (vf->subfilter)(src, stride);
00321        
00322             src = voff + x + y * stride;
00323             (vf->subfilter)(src, stride);
00324         }
00325     }
00326 
00327 #if HAVE_MMX || HAVE_AMD3DNOW
00328     if ((vf->mm_flags & AV_CPU_FLAG_MMX2) || (vf->mm_flags & AV_CPU_FLAG_3DNOW))
00329         emms();
00330 #endif
00331 
00332     TF_END(vf, "LinearBlend: ");
00333     return 0;
00334 }
00335 
00336 static VideoFilter *new_filter(VideoFrameType inpixfmt,
00337                                VideoFrameType outpixfmt,
00338                                int *width, int *height, char *options,
00339                                int threads)
00340 {
00341     LBFilter *filter;
00342     (void)width;
00343     (void)height;
00344     (void)options;
00345     (void)threads;
00346     if (inpixfmt != FMT_YV12 || outpixfmt != FMT_YV12)
00347         return NULL;
00348 
00349     filter = malloc(sizeof(LBFilter));
00350 
00351     if (filter == NULL)
00352     {
00353         fprintf(stderr,"Couldn't allocate memory for filter\n");
00354         return NULL;
00355     }
00356 
00357     filter->vf.filter = &linearBlendFilter;
00358     filter->subfilter = &linearBlend;    /* Default, non accellerated */
00359     filter->mm_flags = av_get_cpu_flags();
00360     if (HAVE_MMX && filter->mm_flags & AV_CPU_FLAG_MMX2)
00361         filter->subfilter = &linearBlendMMX;
00362     else if (HAVE_AMD3DNOW && filter->mm_flags & AV_CPU_FLAG_3DNOW)
00363         filter->subfilter = &linearBlend3DNow;
00364     else if (HAVE_ALTIVEC && filter->mm_flags & AV_CPU_FLAG_ALTIVEC)
00365         filter->vf.filter = &linearBlendFilterAltivec;
00366 
00367     filter->vf.cleanup = NULL;
00368     TF_INIT(filter);
00369     return (VideoFilter *)filter;
00370 }
00371 
00372 static FmtConv FmtList[] = 
00373 {
00374     { FMT_YV12, FMT_YV12 },
00375     FMT_NULL
00376 };
00377 
00378 ConstFilterInfo filter_table[] = 
00379 {
00380     {
00381         filter_init: &new_filter,
00382         name:       "linearblend",
00383         descript:   "fast blending deinterlace filter",
00384         formats:    FmtList,
00385         libname:    NULL
00386     },
00387     FILT_NULL
00388 };
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends