MythTV  0.26-pre
filter_quickdnr.c
Go to the documentation of this file.
00001 /*
00002  * Quick DNR 0.8
00003  * (C)opyright 2003, Debabrata Banerjee
00004  * GNU GPL 2 or later
00005  *
00006  * Pass options as:
00007  * quickdnr=quality (0-255 scale adjusted)
00008  * quickdnr=Luma_threshold:Chroma_threshold (0-255) for single threshold
00009  * quickdnr=Luma_threshold1:Luma_threshold2:Chroma_threshold1:Chroma_threshold2 for double
00010  *
00011  */
00012 
00013 #include <stdio.h>
00014 
00015 #include "config.h"
00016 #if HAVE_STDINT_H
00017 #include <stdint.h>
00018 #endif
00019 
00020 #include <stdlib.h>
00021 #include <string.h>
00022 
00023 #include "filter.h"
00024 #include "frame.h"
00025 #include "libavutil/mem.h"
00026 #include "libavcodec/dsputil.h"
00027 
00028 #ifdef MMX
00029 #include "ffmpeg-mmx.h"
00030 #endif
00031 
00032 //Regular filter
00033 #define LUMA_THRESHOLD_DEFAULT 15
00034 #define CHROMA_THRESHOLD_DEFAULT 25
00035 
00036 //Double thresholded filter
00037 #define LUMA_THRESHOLD1_DEFAULT 10
00038 #define LUMA_THRESHOLD2_DEFAULT 1
00039 #define CHROMA_THRESHOLD1_DEFAULT 20
00040 #define CHROMA_THRESHOLD2_DEFAULT 2
00041 
00042 //#define QUICKDNR_DEBUG
00043 
00044 //static const char FILTER_NAME[] = "quickdnr";
00045 
00046 typedef struct ThisFilter
00047 {
00048         VideoFilter vf;
00049 
00050         uint64_t Luma_threshold_mask1;
00051         uint64_t Luma_threshold_mask2;
00052         uint64_t Chroma_threshold_mask1;
00053         uint64_t Chroma_threshold_mask2;
00054         uint8_t  Luma_threshold1;
00055         uint8_t  Luma_threshold2;
00056         uint8_t  Chroma_threshold1;
00057         uint8_t  Chroma_threshold2;
00058         uint8_t *average;
00059         int      average_size;
00060         int      offsets[3];
00061         int      pitches[3];
00062 
00063         TF_STRUCT;
00064 
00065 } ThisFilter;
00066 
00067 static int alloc_avg(ThisFilter *filter, int size)
00068 {
00069     if (filter->average_size >= size)
00070         return 1;
00071 
00072     uint8_t *tmp = realloc(filter->average, size);
00073     if (!tmp)
00074     {
00075         fprintf(stderr, "Couldn't allocate memory for DNR buffer\n");
00076         return 0;
00077     }
00078 
00079     filter->average = tmp;
00080     filter->average_size = size;
00081 
00082     return 1;
00083 }
00084 
00085 static int init_avg(ThisFilter *filter, VideoFrame *frame)
00086 {
00087     if (!alloc_avg(filter, frame->size))
00088         return 0;
00089 
00090     if ((filter->offsets[0] != frame->offsets[0]) ||
00091         (filter->offsets[1] != frame->offsets[1]) ||
00092         (filter->offsets[2] != frame->offsets[2]) ||
00093         (filter->pitches[0] != frame->pitches[0]) ||
00094         (filter->pitches[1] != frame->pitches[1]) ||
00095         (filter->pitches[2] != frame->pitches[2]))
00096     {
00097         memcpy(filter->average, frame->buf, frame->size);
00098         memcpy(filter->offsets, frame->offsets, sizeof(int) * 3);
00099         memcpy(filter->pitches, frame->pitches, sizeof(int) * 3);
00100     }
00101 
00102     return 1;
00103 }
00104 
00105 static void init_vars(ThisFilter *tf, VideoFrame *frame,
00106                       int *thr1, int *thr2, int *height,
00107                       uint8_t **avg, uint8_t **buf)
00108 {
00109     thr1[0] = tf->Luma_threshold1;
00110     thr1[1] = tf->Chroma_threshold1;
00111     thr1[2] = tf->Chroma_threshold1;
00112 
00113     thr2[0] = tf->Luma_threshold2;
00114     thr2[1] = tf->Chroma_threshold2;
00115     thr2[2] = tf->Chroma_threshold2;
00116 
00117     height[0] = frame->height;
00118     height[1] = frame->height >> 1;
00119     height[2] = frame->height >> 1;
00120 
00121     avg[0] = tf->average + frame->offsets[0];
00122     avg[1] = tf->average + frame->offsets[1];
00123     avg[2] = tf->average + frame->offsets[2];
00124 
00125     buf[0] = frame->buf + frame->offsets[0];
00126     buf[1] = frame->buf + frame->offsets[1];
00127     buf[2] = frame->buf + frame->offsets[2];
00128 }
00129 
00130 static int quickdnr(VideoFilter *f, VideoFrame *frame, int field)
00131 {
00132     (void)field;
00133     ThisFilter *tf = (ThisFilter *)f;
00134     int thr1[3], thr2[3], height[3];
00135     uint8_t *avg[3], *buf[3];
00136     int i, y;
00137 
00138     TF_VARS;
00139 
00140     TF_START;
00141 
00142     if (!init_avg(tf, frame))
00143         return 0;
00144 
00145     init_vars(tf, frame, thr1, thr2, height, avg, buf);
00146 
00147     for (i = 0; i < 3; i++)
00148     {
00149         int sz = height[i] * frame->pitches[i];
00150         for (y = 0; y < sz; y++)
00151         {
00152             if (abs(avg[i][y] - buf[i][y]) < thr1[i])
00153                 buf[i][y] = avg[i][y] = (avg[i][y] + buf[i][y]) >> 1;
00154             else
00155                 avg[i][y] = buf[i][y];
00156         }
00157     }
00158 
00159     TF_END(tf, "QuickDNR: ");
00160 
00161     return 0;
00162 }
00163 
00164 static int quickdnr2(VideoFilter *f, VideoFrame *frame, int field)
00165 {
00166     (void)field;
00167     ThisFilter *tf = (ThisFilter *)f;
00168     int thr1[3], thr2[3], height[3];
00169     uint8_t *avg[3], *buf[3];
00170     int i, y;
00171 
00172     TF_VARS;
00173 
00174     TF_START;
00175 
00176     if (!init_avg(tf, frame))
00177         return 0;
00178 
00179     init_vars(tf, frame, thr1, thr2, height, avg, buf);
00180 
00181     for (i = 0; i < 3; i++)
00182     {
00183         int sz = height[i] * frame->pitches[i];
00184         for (y = 0; y < sz; y++)
00185         {
00186             int t = abs(avg[i][y] - buf[i][y]);
00187             if (t < thr1[i])
00188             {
00189                 if (t > thr2[i])
00190                     avg[i][y] = (avg[i][y] + buf[i][y]) >> 1;
00191                 buf[i][y] = avg[i][y];
00192             }
00193             else
00194             {
00195                 avg[i][y] = buf[i][y];
00196             }
00197         }
00198     }
00199 
00200     TF_END(tf, "QuickDNR2: ");
00201 
00202     return 0;
00203 }
00204 
00205 #ifdef MMX
00206 
00207 static int quickdnrMMX(VideoFilter *f, VideoFrame *frame, int field)
00208 {
00209     (void)field;
00210     ThisFilter *tf = (ThisFilter *)f;
00211     const uint64_t sign_convert = 0x8080808080808080LL;
00212     int thr1[3], thr2[3], height[3];
00213     uint64_t *avg[3], *buf[3];
00214     int i, y;
00215 
00216     TF_VARS;
00217 
00218     TF_START;
00219 
00220     if (!init_avg(tf, frame))
00221         return 0;
00222 
00223     init_vars(tf, frame, thr1, thr2, height, (uint8_t**) avg, (uint8_t**) buf);
00224 
00225     /*
00226       Removed all the prefetches. These don't do anything when
00227       you are processing an array with sequential accesses because the
00228       processor automatically does a prefetchT0 in these cases. The
00229       instruction is meant to be used to specify a different prefetch
00230       cache level, or to prefetch non-sequental data.
00231 
00232       These prefetches are not available on all MMX processors so if
00233       we wanted to use them we would need to test for a prefetch
00234       capable processor before using them. -- dtk
00235     */
00236 
00237     __asm__ volatile("emms\n\t");
00238 
00239     __asm__ volatile("movq (%0), %%mm4" : : "r" (&sign_convert));
00240 
00241     for (i = 0; i < 3; i++)
00242     {
00243         int sz = (height[i] * frame->pitches[i]) >> 3;
00244 
00245         if (0 == i)
00246             __asm__ volatile("movq (%0), %%mm5" : : "r" (&tf->Luma_threshold_mask1));
00247         else
00248             __asm__ volatile("movq (%0), %%mm5" : : "r" (&tf->Chroma_threshold_mask1));
00249 
00250         for (y = 0; y < sz; y++)
00251         {
00252             __asm__ volatile(
00253             "movq (%0), %%mm0     \n\t" // avg[i]
00254             "movq (%1), %%mm1     \n\t" // buf[i]
00255             "movq %%mm0, %%mm2    \n\t"
00256             "movq %%mm1, %%mm3    \n\t"
00257             "movq %%mm1, %%mm7    \n\t"
00258 
00259             "pcmpgtb %%mm0, %%mm1 \n\t" // 1 if av greater
00260             "psubb %%mm0, %%mm3   \n\t" // mm3=buf-av
00261             "psubb %%mm7, %%mm0   \n\t" // mm0=av-buf
00262             "pand %%mm1, %%mm3    \n\t" // select buf
00263             "pandn %%mm0,%%mm1    \n\t" // select av
00264             "por %%mm1, %%mm3     \n\t" // mm3=abs()
00265 
00266             "paddb %%mm4, %%mm3   \n\t" // hack! No proper unsigned mmx compares!
00267             "pcmpgtb %%mm5, %%mm3 \n\t" // compare buf with mask
00268 
00269             "pavgb %%mm7, %%mm2   \n\t"
00270             "pand %%mm3, %%mm7    \n\t"
00271             "pandn %%mm2,%%mm3    \n\t"
00272             "por %%mm7, %%mm3     \n\t"
00273             "movq %%mm3, (%0)     \n\t"
00274             "movq %%mm3, (%1)     \n\t"
00275             : : "r" (avg[i]), "r" (buf[i])
00276             );
00277             buf[i]++;
00278             avg[i]++;
00279         }
00280     }
00281 
00282     __asm__ volatile("emms\n\t");
00283 
00284     // filter the leftovers from the mmx rutine
00285     for (i = 0; i < 3; i++)
00286     {
00287         int thr1[3], thr2[3], height[3];
00288         uint8_t *avg8[3], *buf8[3];
00289         int end, beg;
00290 
00291         init_vars(tf, frame, thr1, thr2, height, avg8, buf8);
00292 
00293         end = height[i] * frame->pitches[i];
00294         beg = end & ~0x7;
00295 
00296         if (beg == end)
00297             continue;
00298 
00299         for (y = beg; y < end; y++)
00300         {
00301             if (abs(avg8[i][y] - buf8[i][y]) < thr1[i])
00302                 buf8[i][y] = avg8[i][y] = (avg8[i][y] + buf8[i][y]) >> 1;
00303             else
00304                 avg8[i][y] = buf8[i][y];
00305         }
00306     }
00307 
00308     TF_END(tf, "QuickDNRmmx: ");
00309 
00310     return 0;
00311 }
00312 
00313 
00314 static int quickdnr2MMX(VideoFilter *f, VideoFrame *frame, int field)
00315 {
00316     (void)field;
00317     ThisFilter *tf = (ThisFilter *)f;
00318     const uint64_t sign_convert = 0x8080808080808080LL;
00319     int thr1[3], thr2[3], height[3];
00320     uint64_t *avg[3], *buf[3];
00321     int i, y;
00322 
00323     TF_VARS;
00324 
00325     TF_START;
00326 
00327     if (!init_avg(tf, frame))
00328         return 0;
00329 
00330     init_vars(tf, frame, thr1, thr2, height, (uint8_t**) avg, (uint8_t**) buf);
00331 
00332     __asm__ volatile("emms\n\t");
00333 
00334     __asm__ volatile("movq (%0), %%mm4" : : "r" (&sign_convert));
00335 
00336     for (i = 0; i < 3; i++)
00337     {
00338         int sz = (height[i] * frame->pitches[i]) >> 3;
00339 
00340         if (0 == i)
00341             __asm__ volatile("movq (%0), %%mm5" : : "r" (&tf->Luma_threshold_mask1));
00342         else
00343             __asm__ volatile("movq (%0), %%mm5" : : "r" (&tf->Chroma_threshold_mask1));
00344 
00345         for (y = 0; y < sz; y++)
00346         {
00347             uint64_t *mask2 = (0 == i) ?
00348                 &tf->Luma_threshold_mask2 : &tf->Chroma_threshold_mask2;
00349 
00350             __asm__ volatile(
00351                 "movq (%0), %%mm0     \n\t" // avg[i]
00352                 "movq (%1), %%mm1     \n\t" // buf[i]
00353                 "movq %%mm0, %%mm2    \n\t"
00354                 "movq %%mm1, %%mm3    \n\t"
00355                 "movq %%mm1, %%mm6    \n\t"
00356                 "movq %%mm1, %%mm7    \n\t"
00357 
00358                 "pcmpgtb %%mm0, %%mm1 \n\t" // 1 if av greater
00359                 "psubb %%mm0, %%mm3   \n\t" // mm3=buf-av
00360                 "psubb %%mm7, %%mm0   \n\t" // mm0=av-buf
00361                 "pand %%mm1, %%mm3    \n\t" // select buf
00362                 "pandn %%mm0,%%mm1    \n\t" // select av
00363                 "por %%mm1, %%mm3     \n\t" // mm3=abs(buf-av)
00364 
00365                 "paddb %%mm4, %%mm3   \n\t" // hack! No proper unsigned mmx compares!
00366                 "pcmpgtb %%mm5, %%mm3 \n\t" // compare diff with mask
00367 
00368                 "movq %%mm2, %%mm0    \n\t" // reload registers
00369                 "movq %%mm7, %%mm1    \n\t"
00370 
00371                 "pcmpgtb %%mm0, %%mm1 \n\t" // Secondary threshold
00372                 "psubb %%mm0, %%mm6   \n\t"
00373                 "psubb %%mm7, %%mm0   \n\t"
00374                 "pand %%mm1, %%mm6    \n\t"
00375                 "pandn %%mm0,%%mm1    \n\t"
00376                 "por %%mm1, %%mm6     \n\t"
00377 
00378                 "paddb %%mm4, %%mm6   \n\t"
00379                 "pcmpgtb (%2), %%mm6  \n\t"
00380 
00381                 "movq %%mm2, %%mm0    \n\t"
00382 
00383                 "pavgb %%mm7, %%mm2   \n\t"
00384 
00385                 "pand %%mm6, %%mm2    \n\t"
00386                 "pandn %%mm0,%%mm6    \n\t"
00387                 "por %%mm2, %%mm6     \n\t" // Combined new/keep average
00388 
00389                 "pand %%mm3, %%mm7    \n\t"
00390                 "pandn %%mm6,%%mm3    \n\t"
00391                 "por %%mm7, %%mm3     \n\t" // Combined new/keep average
00392 
00393                 "movq %%mm3, (%0)     \n\t"
00394                 "movq %%mm3, (%1)     \n\t"
00395                 : :
00396                 "r" (avg[i]),
00397                 "r" (buf[i]),
00398                 "r" (mask2)
00399                 );
00400             buf[i]++;
00401             avg[i]++;
00402         }
00403     }
00404 
00405     __asm__ volatile("emms\n\t");
00406 
00407     // filter the leftovers from the mmx rutine
00408     for (i = 0; i < 3; i++)
00409     {
00410         int thr1[3], thr2[3], height[3];
00411         uint8_t *avg8[3], *buf8[3];
00412         int end, beg;
00413 
00414         init_vars(tf, frame, thr1, thr2, height, avg8, buf8);
00415 
00416         end = height[i] * frame->pitches[i];
00417         beg = end & ~0x7;
00418 
00419         if (beg == end)
00420             continue;
00421 
00422         for (y = beg; y < end; y++)
00423         {
00424             int t = abs(avg8[i][y] - buf8[i][y]);
00425             if (t < thr1[i])
00426             {
00427                 if (t > thr2[i])
00428                     avg8[i][y] = (avg8[i][y] + buf8[i][y]) >> 1;
00429                 buf8[i][y] = avg8[i][y];
00430             }
00431             else
00432             {
00433                 avg8[i][y] = buf8[i][y];
00434             }
00435         }
00436     }
00437 
00438     TF_END(tf, "QuickDNR2mmx: ");
00439 
00440     return 0;
00441 }
00442 #endif /* MMX */
00443 
00444 static void cleanup(VideoFilter *vf)
00445 {
00446     ThisFilter *tf = (ThisFilter*) vf;
00447 
00448     if (tf->average)
00449         free(tf->average);
00450 }
00451 
00452 static VideoFilter *new_filter(VideoFrameType inpixfmt,
00453                                VideoFrameType outpixfmt,
00454                                int *width, int *height, char *options,
00455                                int threads)
00456 {
00457     unsigned int Param1, Param2, Param3, Param4;
00458     int i, double_threshold = 1;
00459     ThisFilter *filter;
00460 
00461     (void) width;
00462     (void) height;
00463     (void) i;
00464     (void) threads;
00465 
00466     if (inpixfmt != FMT_YV12 || outpixfmt != FMT_YV12)
00467     {
00468         fprintf(stderr, "QuickDNR: attempt to initialize "
00469                 "with unsupported format\n");
00470         return NULL;
00471     }
00472 
00473     filter = malloc(sizeof(ThisFilter));
00474     if (filter == NULL)
00475     {
00476         fprintf(stderr, "Couldn't allocate memory for filter\n");
00477         return NULL;
00478     }
00479 
00480     memset(filter, 0, sizeof(ThisFilter));
00481     filter->vf.cleanup        = &cleanup;
00482     filter->Luma_threshold1   = LUMA_THRESHOLD1_DEFAULT;
00483     filter->Chroma_threshold1 = CHROMA_THRESHOLD1_DEFAULT;
00484     filter->Luma_threshold2   = LUMA_THRESHOLD2_DEFAULT;
00485     filter->Chroma_threshold2 = CHROMA_THRESHOLD2_DEFAULT;
00486     double_threshold          = 1;
00487 
00488     if (options)
00489     {
00490         int ret = sscanf(options, "%20u:%20u:%20u:%20u",
00491                          &Param1, &Param2, &Param3, &Param4);
00492         switch (ret)
00493         {
00494             case 1:
00495                 //These might be better as logarithmic if this gets used a lot.
00496                 filter->Luma_threshold1   = ((uint8_t) Param1) * 40 / 255;
00497                 filter->Luma_threshold2   = ((uint8_t) Param1) * 4/255 > 2 ?
00498                     2 : ((uint8_t) Param1) * 4/255;
00499                 filter->Chroma_threshold1 = ((uint8_t) Param1) * 80 / 255;
00500                 filter->Chroma_threshold2 = ((uint8_t) Param1) * 8/255 > 4 ?
00501                     4 : ((uint8_t) Param1) * 8/255;
00502                 break;
00503 
00504             case 2:
00505                 filter->Luma_threshold1   = (uint8_t) Param1;
00506                 filter->Chroma_threshold1 = (uint8_t) Param2;
00507                 double_threshold = 0;
00508                 break;
00509 
00510             case 4:
00511                 filter->Luma_threshold1   = (uint8_t) Param1;
00512                 filter->Luma_threshold2   = (uint8_t) Param2;
00513                 filter->Chroma_threshold1 = (uint8_t) Param3;
00514                 filter->Chroma_threshold2 = (uint8_t) Param4;
00515                 break;
00516 
00517             default:
00518                 break;
00519         }
00520     }
00521 
00522     filter->vf.filter  = (double_threshold) ? &quickdnr2 : &quickdnr;
00523 
00524 #ifdef MMX
00525     if (av_get_cpu_flags() > AV_CPU_FLAG_MMX2)
00526     {
00527         filter->vf.filter = (double_threshold) ? &quickdnr2MMX : &quickdnrMMX;
00528         for (i = 0; i < 8; i++)
00529         {
00530             // 8 sign-shifted bytes!
00531             filter->Luma_threshold_mask1 =
00532                 (filter->Luma_threshold_mask1 << 8) +
00533                 ((filter->Luma_threshold1 > 0x80) ?
00534                  (filter->Luma_threshold1 - 0x80) :
00535                  (filter->Luma_threshold1 + 0x80));
00536 
00537             filter->Chroma_threshold_mask1 =
00538                 (filter->Chroma_threshold_mask1 << 8) +
00539                 ((filter->Chroma_threshold1 > 0x80) ?
00540                  (filter->Chroma_threshold1 - 0x80) :
00541                  (filter->Chroma_threshold1 + 0x80));
00542 
00543             filter->Luma_threshold_mask2 =
00544                 (filter->Luma_threshold_mask2 << 8) +
00545                 ((filter->Luma_threshold2 > 0x80) ?
00546                  (filter->Luma_threshold2 - 0x80) :
00547                  (filter->Luma_threshold2 + 0x80));
00548 
00549             filter->Chroma_threshold_mask2 =
00550                 (filter->Chroma_threshold_mask2 << 8) +
00551                 ((filter->Chroma_threshold2 > 0x80) ?
00552                  (filter->Chroma_threshold2 - 0x80) :
00553                  (filter->Chroma_threshold2 + 0x80));
00554         }
00555     }
00556 #endif
00557 
00558     TF_INIT(filter);
00559 
00560 #ifdef QUICKDNR_DEBUG
00561     fprintf(stderr, "DNR Loaded: 0x%X Params: %u %u \n"
00562             "Luma1:   %3d 0x%X%X  Luma2:   0x%X%X\n"
00563             "Chroma1: %3d %X%X    Chroma2: 0x%X%X\n",
00564             av_get_cpu_flags(), Param1, Param2, filter->Luma_threshold1,
00565             ((int*)&filter->Luma_threshold_mask1)[1],
00566             ((int*)&filter->Luma_threshold_mask1)[0],
00567             ((int*)&filter->Luma_threshold_mask2)[1],
00568             ((int*)&filter->Luma_threshold_mask2)[0],
00569             filter->Chroma_threshold1,
00570             ((int*)&filter->Chroma_threshold_mask1)[1],
00571             ((int*)&filter->Chroma_threshold_mask1)[0],
00572             ((int*)&filter->Chroma_threshold_mask2)[1],
00573             ((int*)&filter->Chroma_threshold_mask2)[0]
00574         );
00575 
00576     fprintf(stderr, "Options:%d:%d:%d:%d\n",
00577             filter->Luma_threshold1, filter->Luma_threshold2,
00578             filter->Chroma_threshold1, filter->Chroma_threshold2);
00579 #endif
00580 
00581     return (VideoFilter*) filter;
00582 }
00583 
00584 static FmtConv FmtList[] =
00585 {
00586     { FMT_YV12, FMT_YV12 },
00587     FMT_NULL
00588 };
00589 
00590 ConstFilterInfo filter_table[] =
00591 {
00592     {
00593         filter_init: &new_filter,
00594         name:       "quickdnr",
00595         descript:   "removes noise with a fast single/double thresholded average filter",
00596         formats:    FmtList,
00597         libname:    NULL
00598     },
00599     FILT_NULL
00600 };
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends