• Skip to content
  • Skip to link menu
KDE 4.4 API Reference
  • KDE API Reference
  • KDE Support
  • Sitemap
  • Contact Us
 

qimageblitz

convolve.cpp

Go to the documentation of this file.
00001 /* 
00002  Copyright (C) 2004, 2005, 2007 Daniel M. Duley <daniel.duley@verizon.net>
00003 
00004  Redistribution and use in source and binary forms, with or without
00005 modification, are permitted provided that the following conditions
00006 are met:
00007 
00008 1. Redistributions of source code must retain the above copyright
00009    notice, this list of conditions and the following disclaimer.
00010 2. Redistributions in binary form must reproduce the above copyright
00011    notice, this list of conditions and the following disclaimer in the
00012    documentation and/or other materials provided with the distribution.
00013 
00014 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00015 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00016 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00017 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00018 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00019 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00020 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00021 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00022 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00023 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00024 
00025 */
00026 
00027 /*
00028  Portions of this software are were originally based on ImageMagick's
00029  algorithms. ImageMagick is copyrighted under the following conditions:
00030 
00031 Copyright (C) 2003 ImageMagick Studio, a non-profit organization dedicated to
00032 making software imaging solutions freely available.
00033 
00034 Permission is hereby granted, free of charge, to any person obtaining a copy
00035 of this software and associated documentation files ("ImageMagick"), to deal
00036 in ImageMagick without restriction, including without limitation the rights
00037 to use, copy, modify, merge, publish, distribute, sublicense,  and/or sell
00038 copies of ImageMagick, and to permit persons to whom the ImageMagick is
00039 furnished to do so, subject to the following conditions:
00040 
00041 The above copyright notice and this permission notice shall be included in all
00042 copies or substantial portions of ImageMagick.
00043 
00044 The software is provided "as is", without warranty of any kind, express or
00045 implied, including but not limited to the warranties of merchantability,
00046 fitness for a particular purpose and noninfringement.  In no event shall
00047 ImageMagick Studio be liable for any claim, damages or other liability,
00048 whether in an action of contract, tort or otherwise, arising from, out of or
00049 in connection with ImageMagick or the use or other dealings in ImageMagick.
00050 
00051 Except as contained in this notice, the name of the ImageMagick Studio shall
00052 not be used in advertising or otherwise to promote the sale, use or other
00053 dealings in ImageMagick without prior written authorization from the
00054 ImageMagick Studio.
00055 */
00056 
00057 #include "qimageblitz.h"
00058 #include "private/blitz_p.h"
00059 #include <config-processor.h>
00060 #include "blitzcpu.h"
00061 #include <cmath>
00062 #include <stdio.h>
00063 #include <stdlib.h>
00064 #include <string.h>
00065 #ifdef HAVE_STDINT_H
00066   #include <stdint.h>
00067 #endif
00068 
00069 #if defined(__i386__) && ( defined(__GNUC__) || defined(__INTEL_COMPILER) )
00070 #  if defined(HAVE_MMX )
00071 #    define USE_MMX_INLINE_ASM
00072 #  endif
00073 #endif
00074 
00075 #define M_EPSILON 1.0e-6
00076 #define M_SQ2PI 2.50662827463100024161235523934010416269302368164062
00077 #ifndef M_PI
00078 #define M_PI 3.14159265358979323846
00079 #endif
00080 
00081 #define RoundToPixel(value) ((unsigned char) ((value) < 0.0 ? 0.0 : \
00082   ((value) > (float) 255) ? (float) 255 : (value)+0.5))
00083 
00084 typedef struct
00085 {
00086     float red, green, blue, alpha;
00087 } FloatPixel;
00088 
00089 
00090 #define CONVOLVE_ACC(weight, pixel) \
00091     r+=((weight))*(qRed((pixel))); g+=((weight))*(qGreen((pixel))); \
00092     b+=((weight))*(qBlue((pixel)));
00093 
00094 
00095 QImage Blitz::convolve(QImage &img, int matrix_size, float *matrix)
00096 {
00097     int i, x, y, w, h, matrix_x, matrix_y;
00098     int edge = matrix_size/2;
00099     QRgb *dest, *src, *s, **scanblock;
00100     float *m, *normalize_matrix, normalize;
00101     bool overflow = false;
00102 
00103     if(!(matrix_size % 2)){
00104         qWarning("Blitz::convolve(): kernel width must be an odd number!");
00105         return(img);
00106     }
00107 
00108     w = img.width();
00109     h = img.height();
00110     if(w < 3 || h < 3){
00111         qWarning("Blitz::convolve(): Image is too small!");
00112         return(img);
00113     }
00114 
00115     if(img.format() == QImage::Format_ARGB32_Premultiplied)
00116         img = img.convertToFormat(QImage::Format_ARGB32);
00117     else if(img.depth() < 32){ 
00118         img = img.convertToFormat(img.hasAlphaChannel() ?
00119                                   QImage::Format_ARGB32 :
00120                                   QImage::Format_RGB32);
00121     }
00122     QImage buffer(w, h, img.format());
00123 
00124     scanblock = new QRgb* [matrix_size];
00125     normalize_matrix = new float[matrix_size*matrix_size];
00126 
00127     // create normalized matrix
00128     normalize = 0.0;
00129     for(i=0; i < matrix_size*matrix_size; ++i)
00130         normalize += matrix[i];
00131     if(std::abs(normalize) <=  M_EPSILON)
00132         normalize = 1.0;
00133     normalize = 1.0/normalize;
00134     for(i=0; i < matrix_size*matrix_size; ++i){
00135         normalize_matrix[i] = normalize*matrix[i];
00136         if(normalize_matrix[i] < 0.0)
00137             overflow = true;
00138     }
00139 
00140     // apply
00141 
00142 #ifdef USE_MMX_INLINE_ASM
00143 #ifdef __GNUC__
00144 #warning Using MMX floating point convolve
00145 #endif
00146     if(BlitzCPUInfo::haveExtension(BlitzCPUInfo::AMD3DNOW) &&
00147        BlitzCPUInfo::haveExtension(BlitzCPUInfo::IntegerSSE)){
00148         //
00149         //
00150         // MMX/3dnow version
00151         //
00152         //
00153 
00154         __asm__ __volatile__
00155             ("pxor %%mm7, %%mm7\n\t" : :); // clear for unpacking
00156         for(y=0; y < h; ++y){
00157             dest = (QRgb *)buffer.scanLine(y);
00158             src = (QRgb *)img.scanLine(y);
00159             // Read in scanlines to pixel neighborhood. If the scanline is outside
00160             // the image use the top or bottom edge.
00161             for(x=y-edge, i=0; x <= y+edge; ++i, ++x){
00162                 scanblock[i] = (QRgb *)
00163                     img.scanLine((x < 0) ? 0 : (x > h-1) ? h-1 : x);
00164             }
00165             // Now we are about to start processing scanlines. First handle the
00166             // part where the pixel neighborhood extends off the left edge.
00167             for(x=0; x-edge < 0 ; ++x){
00168                 m = normalize_matrix;
00169                 __asm__ __volatile__
00170                     ("pxor %%mm0, %%mm0\n\t"
00171                      "pxor %%mm1, %%mm1\n\t"
00172                      : :);
00173                 for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00174                     s = scanblock[matrix_y];
00175                     matrix_x = -edge;
00176                     while(x+matrix_x < 0){
00177                         __asm__ __volatile__
00178                             ("movd (%0), %%mm2\n\t" // mm2: m doubleword
00179                              "punpckldq %%mm2, %%mm2\n\t"
00180                              "movd (%1), %%mm3\n\t" // load pixel
00181                              "punpcklbw %%mm7, %%mm3\n\t"
00182                              "pshufw $0xE4, %%mm3, %%mm4\n\t"
00183                              "punpcklwd %%mm7, %%mm3\n\t" // mm3: BG
00184                              "punpckhwd %%mm7, %%mm4\n\t" // mm4: RA
00185 
00186                              "pi2fd %%mm3, %%mm3\n\t" // convert to fp...
00187                              "pi2fd %%mm4, %%mm4\n\t"
00188                              "pfmul %%mm2, %%mm3\n\t" // ...and multiply
00189                              "pfmul %%mm2, %%mm4\n\t"
00190 
00191                              "pfadd %%mm3, %%mm0\n\t" // add to accumulator
00192                              "pfadd %%mm4, %%mm1\n\t"
00193                              : : "r"(m), "r"(s));
00194                         ++matrix_x; ++m;
00195                     }
00196                     while(matrix_x <= edge){
00197                         __asm__ __volatile__
00198                             ("movd (%0), %%mm2\n\t" // mm2: m doubleword
00199                              "punpckldq %%mm2, %%mm2\n\t"
00200                              "movd (%1), %%mm3\n\t" // load pixel
00201                              "punpcklbw %%mm7, %%mm3\n\t"
00202                              "pshufw $0xE4, %%mm3, %%mm4\n\t"
00203                              "punpcklwd %%mm7, %%mm3\n\t" // mm3: BG
00204                              "punpckhwd %%mm7, %%mm4\n\t" // mm4: RA
00205 
00206                              "pi2fd %%mm3, %%mm3\n\t" // convert to fp...
00207                              "pi2fd %%mm4, %%mm4\n\t"
00208                              "pfmul %%mm2, %%mm3\n\t" // ...and multiply
00209                              "pfmul %%mm2, %%mm4\n\t"
00210 
00211                              "pfadd %%mm3, %%mm0\n\t" // add to accumulator
00212                              "pfadd %%mm4, %%mm1\n\t"
00213                              : : "r"(m), "r"(s));
00214                         ++matrix_x; ++m; ++s;
00215                     }
00216                 }
00217                 __asm__ __volatile__
00218                     ("pf2id %%mm0, %%mm0\n\t"
00219                      "pf2id %%mm1, %%mm1\n\t"
00220 
00221                      "packssdw %%mm0, %%mm1\n\t"
00222                      "pshufw $0x4E, %%mm1, %%mm1\n\t"
00223                      "packuswb %%mm7, %%mm1\n\t"
00224                      "movd %%mm1, (%0)\n\t"
00225                      : : "r"(dest));
00226                 *dest = BlitzPrivate::setAlpha(*dest, qAlpha(*src++));
00227                 ++dest;
00228             }
00229             // Okay, now process the middle part where the entire neighborhood
00230             // is on the image.
00231             for(; x+edge < w; ++x){
00232                 m = normalize_matrix;
00233                 __asm__ __volatile__
00234                     ("pxor %%mm0, %%mm0\n\t"
00235                      "pxor %%mm1, %%mm1\n\t"
00236                      : :);
00237                 for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00238                     s = scanblock[matrix_y] + (x-edge);
00239                     for(matrix_x = -edge; matrix_x <= edge; ++matrix_x, ++m, ++s){
00240                         __asm__ __volatile__
00241                             ("movd (%0), %%mm2\n\t" // mm2: m doubleword
00242                              "punpckldq %%mm2, %%mm2\n\t"
00243                              "movd (%1), %%mm3\n\t" // load pixel
00244                              "punpcklbw %%mm7, %%mm3\n\t"
00245                              "pshufw $0xE4, %%mm3, %%mm4\n\t"
00246                              "punpcklwd %%mm7, %%mm3\n\t" // mm3: BG
00247                              "punpckhwd %%mm7, %%mm4\n\t" // mm4: RA
00248 
00249                              "pi2fd %%mm3, %%mm3\n\t" // convert to fp...
00250                              "pi2fd %%mm4, %%mm4\n\t"
00251                              "pfmul %%mm2, %%mm3\n\t" // ...and multiply
00252                              "pfmul %%mm2, %%mm4\n\t"
00253 
00254                              "pfadd %%mm3, %%mm0\n\t" // add to accumulator
00255                              "pfadd %%mm4, %%mm1\n\t"
00256                              : : "r"(m), "r"(s));
00257                     }
00258                 }
00259                 __asm__ __volatile__
00260                     ("pf2id %%mm0, %%mm0\n\t"
00261                      "pf2id %%mm1, %%mm1\n\t"
00262 
00263                      "packssdw %%mm0, %%mm1\n\t"
00264                      "pshufw $0x4E, %%mm1, %%mm1\n\t"
00265                      "packuswb %%mm7, %%mm1\n\t"
00266                      "movd %%mm1, (%0)\n\t"
00267                      : : "r"(dest));
00268                 *dest = BlitzPrivate::setAlpha(*dest, qAlpha(*src++));
00269                 ++dest;
00270             }
00271             // Finally process the right part where the neighborhood extends off
00272             // the right edge of the image
00273             for(; x < w; ++x){
00274                 m = normalize_matrix;
00275                 __asm__ __volatile__
00276                     ("pxor %%mm0, %%mm0\n\t"
00277                      "pxor %%mm1, %%mm1\n\t"
00278                      : :);
00279                 for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00280                     s = scanblock[matrix_y];
00281                     s += x-edge;
00282                     matrix_x = -edge;
00283                     while(x+matrix_x < w){
00284                         __asm__ __volatile__
00285                             ("movd (%0), %%mm2\n\t" // mm2: m doubleword
00286                              "punpckldq %%mm2, %%mm2\n\t"
00287                              "movd (%1), %%mm3\n\t" // load pixel
00288                              "punpcklbw %%mm7, %%mm3\n\t"
00289                              "pshufw $0xE4, %%mm3, %%mm4\n\t"
00290                              "punpcklwd %%mm7, %%mm3\n\t" // mm3: BG
00291                              "punpckhwd %%mm7, %%mm4\n\t" // mm4: RA
00292 
00293                              "pi2fd %%mm3, %%mm3\n\t" // convert to fp...
00294                              "pi2fd %%mm4, %%mm4\n\t"
00295                              "pfmul %%mm2, %%mm3\n\t" // ...and multiply
00296                              "pfmul %%mm2, %%mm4\n\t"
00297 
00298                              "pfadd %%mm3, %%mm0\n\t" // add to accumulator
00299                              "pfadd %%mm4, %%mm1\n\t"
00300                              : : "r"(m), "r"(s));
00301                         ++matrix_x, ++m, ++s;
00302                     }
00303                     --s;
00304                     while(matrix_x <= edge){
00305                         __asm__ __volatile__
00306                             ("movd (%0), %%mm2\n\t" // mm2: m doubleword
00307                              "punpckldq %%mm2, %%mm2\n\t"
00308                              "movd (%1), %%mm3\n\t" // load pixel
00309                              "punpcklbw %%mm7, %%mm3\n\t"
00310                              "pshufw $0xE4, %%mm3, %%mm4\n\t"
00311                              "punpcklwd %%mm7, %%mm3\n\t" // mm3: BG
00312                              "punpckhwd %%mm7, %%mm4\n\t" // mm4: RA
00313 
00314                              "pi2fd %%mm3, %%mm3\n\t" // convert to fp...
00315                              "pi2fd %%mm4, %%mm4\n\t"
00316                              "pfmul %%mm2, %%mm3\n\t" // ...and multiply
00317                              "pfmul %%mm2, %%mm4\n\t"
00318 
00319                              "pfadd %%mm3, %%mm0\n\t" // add to accumulator
00320                              "pfadd %%mm4, %%mm1\n\t"
00321                              : : "r"(m), "r"(s));
00322                         ++matrix_x, ++m;
00323                     }
00324                 }
00325                 __asm__ __volatile__
00326                     ("pf2id %%mm0, %%mm0\n\t"
00327                      "pf2id %%mm1, %%mm1\n\t"
00328 
00329                      "packssdw %%mm0, %%mm1\n\t"
00330                      "pshufw $0x4E, %%mm1, %%mm1\n\t"
00331                      "packuswb %%mm7, %%mm1\n\t"
00332                      "movd %%mm1, (%0)\n\t"
00333                      : : "r"(dest));
00334                 *dest = BlitzPrivate::setAlpha(*dest, qAlpha(*src++));
00335                 ++dest;
00336             }
00337         }
00338         __asm__ __volatile__ ("emms\n\t" : :);
00339     }
00340     else
00341 #endif
00342     {
00343         //
00344         //
00345         // Non-MMX version
00346         //
00347         //
00348 
00349         float r, g, b;
00350         for(y=0; y < h; ++y){
00351             src = (QRgb *)img.scanLine(y);
00352             dest = (QRgb *)buffer.scanLine(y);
00353             // Read in scanlines to pixel neighborhood. If the scanline is outside
00354             // the image use the top or bottom edge.
00355             for(x=y-edge, i=0; x <= y+edge; ++i, ++x){
00356                 scanblock[i] = (QRgb *)
00357                     img.scanLine((x < 0) ? 0 : (x > h-1) ? h-1 : x);
00358             }
00359             // Now we are about to start processing scanlines. First handle the
00360             // part where the pixel neighborhood extends off the left edge.
00361             for(x=0; x-edge < 0 ; ++x){
00362                 r = g = b = 0.0;
00363                 m = normalize_matrix;
00364                 for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00365                     s = scanblock[matrix_y];
00366                     matrix_x = -edge;
00367                     while(x+matrix_x < 0){
00368                         CONVOLVE_ACC(*m, *s);
00369                         ++matrix_x; ++m;
00370                     }
00371                     while(matrix_x <= edge){
00372                         CONVOLVE_ACC(*m, *s);
00373                         ++matrix_x; ++m; ++s;
00374                     }
00375                 }
00376                 r = r < 0.0 ? 0.0 : r > 255.0 ? 255.0 : r+0.5;
00377                 g = g < 0.0 ? 0.0 : g > 255.0 ? 255.0 : g+0.5;
00378                 b = b < 0.0 ? 0.0 : b > 255.0 ? 255.0 : b+0.5;
00379                 *dest++ = qRgba((unsigned char)r, (unsigned char)g,
00380                                 (unsigned char)b, qAlpha(*src++));
00381             }
00382             // Okay, now process the middle part where the entire neighborhood
00383             // is on the image.
00384             for(; x+edge < w; ++x){
00385                 m = normalize_matrix;
00386                 r = g = b = 0.0;
00387                 for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00388                     s = scanblock[matrix_y] + (x-edge);
00389                     for(matrix_x = -edge; matrix_x <= edge; ++matrix_x, ++m, ++s){
00390                         CONVOLVE_ACC(*m, *s);
00391                     }
00392                 }
00393                 r = r < 0.0 ? 0.0 : r > 255.0 ? 255.0 : r+0.5;
00394                 g = g < 0.0 ? 0.0 : g > 255.0 ? 255.0 : g+0.5;
00395                 b = b < 0.0 ? 0.0 : b > 255.0 ? 255.0 : b+0.5;
00396                 *dest++ = qRgba((unsigned char)r, (unsigned char)g,
00397                                 (unsigned char)b, qAlpha(*src++));
00398             }
00399             // Finally process the right part where the neighborhood extends off
00400             // the right edge of the image
00401             for(; x < w; ++x){
00402                 r = g = b = 0.0;
00403                 m = normalize_matrix;
00404                 for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00405                     s = scanblock[matrix_y];
00406                     s += x-edge;
00407                     matrix_x = -edge;
00408                     while(x+matrix_x < w){
00409                         CONVOLVE_ACC(*m, *s);
00410                         ++matrix_x, ++m, ++s;
00411                     }
00412                     --s;
00413                     while(matrix_x <= edge){
00414                         CONVOLVE_ACC(*m, *s);
00415                         ++matrix_x, ++m;
00416                     }
00417                 }
00418                 r = r < 0.0 ? 0.0 : r > 255.0 ? 255.0 : r+0.5;
00419                 g = g < 0.0 ? 0.0 : g > 255.0 ? 255.0 : g+0.5;
00420                 b = b < 0.0 ? 0.0 : b > 255.0 ? 255.0 : b+0.5;
00421                 *dest++ = qRgba((unsigned char)r, (unsigned char)g,
00422                                 (unsigned char)b, qAlpha(*src++));
00423             }
00424         }
00425     }
00426 
00427     delete[] scanblock;
00428     delete[] normalize_matrix;
00429     return(buffer);
00430 }
00431 
00432 QImage Blitz::convolveInteger(QImage &img, int matrix_size,
00433                               int *matrix, int divisor)
00434 {
00435     int i, x, y, w, h, matrix_x, matrix_y, *m;
00436     int edge = matrix_size/2;
00437     QRgb *dest, *src, *s, **scanblock;
00438 
00439     if(!(matrix_size % 2)){
00440         qWarning("Blitz::convolve(): kernel width must be an odd number!");
00441         return(img);
00442     }
00443 
00444     w = img.width();
00445     h = img.height();
00446     if(w < 3 || h < 3){
00447         qWarning("Blitz::convolve(): Image is too small!");
00448         return(img);
00449     }
00450 
00451     if(img.format() == QImage::Format_ARGB32_Premultiplied)
00452         img = img.convertToFormat(QImage::Format_ARGB32);
00453     else if(img.depth() < 32){ 
00454         img = img.convertToFormat(img.hasAlphaChannel() ?
00455                                   QImage::Format_ARGB32 :
00456                                   QImage::Format_RGB32);
00457     }
00458 
00459     QImage buffer(w, h, img.format());
00460     scanblock = new QRgb* [matrix_size];
00461 
00462     if(!divisor){
00463 #ifdef USE_MMX_INLINE_ASM
00464 #ifdef __GNUC__
00465 #warning Using MMX integer convolve
00466 #endif
00467         if(BlitzCPUInfo::haveExtension(BlitzCPUInfo::MMX)){
00468             //
00469             // No divisor MMX version
00470             //
00471             __asm__ __volatile__
00472                 ("pxor %%mm7, %%mm7\n\t" : :); // clear for unpacking
00473             for(y=0; y < h; ++y){
00474                 src = (QRgb *)img.scanLine(y);
00475                 dest = (QRgb *)buffer.scanLine(y);
00476                 // Read in scanlines to pixel neighborhood. If the scanline is outside
00477                 // the image use the top or bottom edge.
00478                 for(x=y-edge, i=0; x <= y+edge; ++i, ++x){
00479                     scanblock[i] = (QRgb *)
00480                         img.scanLine((x < 0) ? 0 : (x > h-1) ? h-1 : x);
00481                 }
00482                 // Now we are about to start processing scanlines. First handle the
00483                 // part where the pixel neighborhood extends off the left edge.
00484                 for(x=0; x-edge < 0 ; ++x){
00485                     m = matrix;
00486                     __asm__ __volatile__
00487                         ("pxor %%mm0, %%mm0\n\t" : :); // clear acc
00488 
00489                     for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00490                         s = scanblock[matrix_y];
00491                         matrix_x = -edge;
00492                         while(x+matrix_x < 0){
00493                             __asm__ __volatile__
00494                                 ("movd (%0), %%mm1\n\t" // mm1: matrix
00495                                  "punpckldq %%mm1, %%mm1\n\t"
00496                                  "packssdw %%mm1, %%mm1\n\t"
00497                                  "movd (%1), %%mm2\n\t" // mm2: pixel
00498                                  "punpcklbw %%mm7, %%mm2\n\t"
00499                                  "pmullw %%mm1, %%mm2\n\t" // multiply
00500                                  "paddsw %%mm2, %%mm0\n\t" // add to acc
00501                                  : : "r"(m), "r"(s));
00502                             ++matrix_x; ++m;
00503                         }
00504                         while(matrix_x <= edge){
00505                             __asm__ __volatile__
00506                                 ("movd (%0), %%mm1\n\t" // mm1: matrix
00507                                  "punpckldq %%mm1, %%mm1\n\t"
00508                                  "packssdw %%mm1, %%mm1\n\t"
00509                                  "movd (%1), %%mm2\n\t" // mm2: pixel
00510                                  "punpcklbw %%mm7, %%mm2\n\t"
00511                                  "pmullw %%mm1, %%mm2\n\t" // multiply
00512                                  "paddsw %%mm2, %%mm0\n\t" // add to acc
00513                                  : : "r"(m), "r"(s));
00514                             ++matrix_x; ++m; ++s;
00515                         }
00516                     }
00517                     __asm__ __volatile__
00518                         ("packuswb %%mm0, %%mm0\n\t" // pack and write
00519                          "movd %%mm0, (%0)\n\t"
00520                          : : "r"(dest));
00521                     *dest = BlitzPrivate::setAlpha(*dest, qAlpha(*src++));
00522                     ++dest;
00523                 }
00524                 // Okay, now process the middle part where the entire neighborhood
00525                 // is on the image.
00526                 for(; x+edge < w; ++x){
00527                     m = matrix;
00528                     __asm__ __volatile__
00529                         ("pxor %%mm0, %%mm0\n\t" : :); // clear acc
00530                     for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00531                         s = scanblock[matrix_y] + (x-edge);
00532                         for(matrix_x = -edge; matrix_x <= edge; ++matrix_x, ++m, ++s){
00533                             __asm__ __volatile__
00534                                 ("movd (%0), %%mm1\n\t" // mm1: matrix
00535                                  "punpckldq %%mm1, %%mm1\n\t"
00536                                  "packssdw %%mm1, %%mm1\n\t"
00537                                  "movd (%1), %%mm2\n\t" // mm2: pixel
00538                                  "punpcklbw %%mm7, %%mm2\n\t"
00539                                  "pmullw %%mm1, %%mm2\n\t" // multiply
00540                                  "paddsw %%mm2, %%mm0\n\t" // add to acc
00541                                  : : "r"(m), "r"(s));
00542                         }
00543                     }
00544 
00545                     __asm__ __volatile__
00546                         ("packuswb %%mm0, %%mm0\n\t" // pack and write
00547                          "movd %%mm0, (%0)\n\t"
00548                          : : "r"(dest));
00549                     *dest = BlitzPrivate::setAlpha(*dest, qAlpha(*src++));
00550                     ++dest;
00551                 }
00552                 // Finally process the right part where the neighborhood extends off
00553                 // the right edge of the image
00554                 for(; x < w; ++x){
00555                     m = matrix;
00556                     __asm__ __volatile__
00557                         ("pxor %%mm0, %%mm0\n\t" : :); // clear acc
00558                     for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00559                         s = scanblock[matrix_y];
00560                         s += x-edge;
00561                         matrix_x = -edge;
00562                         while(x+matrix_x < w){
00563                             __asm__ __volatile__
00564                                 ("movd (%0), %%mm1\n\t" // mm1: matrix
00565                                  "punpckldq %%mm1, %%mm1\n\t"
00566                                  "packssdw %%mm1, %%mm1\n\t"
00567                                  "movd (%1), %%mm2\n\t" // mm2: pixel
00568                                  "punpcklbw %%mm7, %%mm2\n\t"
00569                                  "pmullw %%mm1, %%mm2\n\t" // multiply
00570                                  "paddsw %%mm2, %%mm0\n\t" // add to acc
00571                                  : : "r"(m), "r"(s));
00572                             ++matrix_x, ++m, ++s;
00573                         }
00574                         --s;
00575                         while(matrix_x <= edge){
00576                             __asm__ __volatile__
00577                                 ("movd (%0), %%mm1\n\t" // mm1: matrix
00578                                  "punpckldq %%mm1, %%mm1\n\t"
00579                                  "packssdw %%mm1, %%mm1\n\t"
00580                                  "movd (%1), %%mm2\n\t" // mm2: pixel
00581                                  "punpcklbw %%mm7, %%mm2\n\t"
00582                                  "pmullw %%mm1, %%mm2\n\t" // multiply
00583                                  "paddsw %%mm2, %%mm0\n\t" // add to acc
00584                                  : : "r"(m), "r"(s));
00585                             ++matrix_x, ++m;
00586                         }
00587                     }
00588                     __asm__ __volatile__
00589                         ("packuswb %%mm0, %%mm0\n\t" // pack and write
00590                          "movd %%mm0, (%0)\n\t"
00591                          : : "r"(dest));
00592                     *dest = BlitzPrivate::setAlpha(*dest, qAlpha(*src++));
00593                     ++dest;
00594                 }
00595             }
00596             __asm__ __volatile__ ("emms\n\t" : :);
00597         }
00598         else
00599 #endif
00600         {
00601             //
00602             // No divisor non-MMX version
00603             //
00604             int r, g, b;
00605             for(y=0; y < h; ++y){
00606                 src = (QRgb *)img.scanLine(y);
00607                 dest = (QRgb *)buffer.scanLine(y);
00608                 // Read in scanlines to pixel neighborhood. If the scanline is outside
00609                 // the image use the top or bottom edge.
00610                 for(x=y-edge, i=0; x <= y+edge; ++i, ++x){
00611                     scanblock[i] = (QRgb *)
00612                         img.scanLine((x < 0) ? 0 : (x > h-1) ? h-1 : x);
00613                 }
00614                 // Now we are about to start processing scanlines. First handle the
00615                 // part where the pixel neighborhood extends off the left edge.
00616                 for(x=0; x-edge < 0 ; ++x){
00617                     r = g = b = 0;
00618                     m = matrix;
00619                     for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00620                         s = scanblock[matrix_y];
00621                         matrix_x = -edge;
00622                         while(x+matrix_x < 0){
00623                             CONVOLVE_ACC(*m, *s);
00624                             ++matrix_x; ++m;
00625                         }
00626                         while(matrix_x <= edge){
00627                             CONVOLVE_ACC(*m, *s);
00628                             ++matrix_x; ++m; ++s;
00629                         }
00630                     }
00631                     *dest++ = qRgba((unsigned char)qBound(0, r, 255),
00632                                     (unsigned char)qBound(0, g, 255),
00633                                     (unsigned char)qBound(0, b, 255),
00634                                     qAlpha(*src++));
00635                 }
00636                 // Okay, now process the middle part where the entire neighborhood
00637                 // is on the image.
00638                 for(; x+edge < w; ++x){
00639                     r = g = b = 0;
00640                     m = matrix;
00641                     for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00642                         s = scanblock[matrix_y] + (x-edge);
00643                         for(matrix_x = -edge; matrix_x <= edge; ++matrix_x, ++m, ++s){
00644                             CONVOLVE_ACC(*m, *s);
00645                         }
00646                     }
00647                     *dest++ = qRgba((unsigned char)qBound(0, r, 255),
00648                                     (unsigned char)qBound(0, g, 255),
00649                                     (unsigned char)qBound(0, b, 255),
00650                                     qAlpha(*src++));
00651                 }
00652                 // Finally process the right part where the neighborhood extends off
00653                 // the right edge of the image
00654                 for(; x < w; ++x){
00655                     r = g = b = 0;
00656                     m = matrix;
00657                     for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00658                         s = scanblock[matrix_y];
00659                         s += x-edge;
00660                         matrix_x = -edge;
00661                         while(x+matrix_x < w){
00662                             CONVOLVE_ACC(*m, *s);
00663                             ++matrix_x, ++m, ++s;
00664                         }
00665                         --s;
00666                         while(matrix_x <= edge){
00667                             CONVOLVE_ACC(*m, *s);
00668                             ++matrix_x, ++m;
00669                         }
00670                     }
00671                     *dest++ = qRgba((unsigned char)qBound(0, r, 255),
00672                                     (unsigned char)qBound(0, g, 255),
00673                                     (unsigned char)qBound(0, b, 255),
00674                                     qAlpha(*src++));
00675                 }
00676             }
00677         }
00678     }
00679     else{
00680 #ifdef USE_MMX_INLINE_ASM
00681         // FIXME: TODO
00682         if(/*BlitzCPUInfo::haveExtension(BlitzCPUInfo::AMD3DNOW)*/false){
00683             //
00684             // Divisor 3dnow version
00685             //
00686             __asm__ __volatile__
00687                 ("pxor %%mm7, %%mm7\n\t" : :); // clear for unpacking
00688 
00689             for(y=0; y < h; ++y){
00690                 src = (QRgb *)img.scanLine(y);
00691                 dest = (QRgb *)buffer.scanLine(y);
00692                 // Read in scanlines to pixel neighborhood. If the scanline is outside
00693                 // the image use the top or bottom edge.
00694                 for(x=y-edge, i=0; x <= y+edge; ++i, ++x){
00695                     scanblock[i] = (QRgb *)
00696                         img.scanLine((x < 0) ? 0 : (x > h-1) ? h-1 : x);
00697                 }
00698                 // Now we are about to start processing scanlines. First handle the
00699                 // part where the pixel neighborhood extends off the left edge.
00700                 for(x=0; x-edge < 0 ; ++x){
00701                     m = matrix;
00702                     __asm__ __volatile__
00703                         ("pxor %%mm0, %%mm0\n\t" : :); // clear acc
00704 
00705                     for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00706                         s = scanblock[matrix_y];
00707                         matrix_x = -edge;
00708                         while(x+matrix_x < 0){
00709                             __asm__ __volatile__
00710                                 ("movd (%0), %%mm1\n\t" // mm1: matrix
00711                                  "punpckldq %%mm1, %%mm1\n\t"
00712                                  "packssdw %%mm1, %%mm1\n\t"
00713                                  "movd (%1), %%mm2\n\t" // mm2: pixel
00714                                  "punpcklbw %%mm7, %%mm2\n\t"
00715                                  "pmullw %%mm1, %%mm2\n\t" // multiply
00716                                  "paddsw %%mm2, %%mm0\n\t" // add to acc
00717                                  : : "r"(m), "r"(s));
00718                             ++matrix_x; ++m;
00719                         }
00720                         while(matrix_x <= edge){
00721                             __asm__ __volatile__
00722                                 ("movd (%0), %%mm1\n\t" // mm1: matrix
00723                                  "punpckldq %%mm1, %%mm1\n\t"
00724                                  "packssdw %%mm1, %%mm1\n\t"
00725                                  "movd (%1), %%mm2\n\t" // mm2: pixel
00726                                  "punpcklbw %%mm7, %%mm2\n\t"
00727                                  "pmullw %%mm1, %%mm2\n\t" // multiply
00728                                  "paddsw %%mm2, %%mm0\n\t" // add to acc
00729                                  : : "r"(m), "r"(s));
00730                             ++matrix_x; ++m; ++s;
00731                         }
00732                     }
00733                     __asm__ __volatile__
00734                         ("packuswb %%mm0, %%mm0\n\t" // pack and write
00735                          "movd %%mm0, (%0)\n\t"
00736                          : : "r"(dest));
00737                     *dest = BlitzPrivate::setAlpha(*dest, qAlpha(*src++));
00738                     ++dest;
00739                 }
00740                 // Okay, now process the middle part where the entire neighborhood
00741                 // is on the image.
00742                 for(; x+edge < w; ++x){
00743                     m = matrix;
00744                     __asm__ __volatile__
00745                         ("pxor %%mm0, %%mm0\n\t" : :); // clear acc
00746                     for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00747                         s = scanblock[matrix_y] + (x-edge);
00748                         for(matrix_x = -edge; matrix_x <= edge; ++matrix_x, ++m, ++s){
00749                             __asm__ __volatile__
00750                                 ("movd (%0), %%mm1\n\t" // mm1: matrix
00751                                  "punpckldq %%mm1, %%mm1\n\t"
00752                                  "packssdw %%mm1, %%mm1\n\t"
00753                                  "movd (%1), %%mm2\n\t" // mm2: pixel
00754                                  "punpcklbw %%mm7, %%mm2\n\t"
00755                                  "pmullw %%mm1, %%mm2\n\t" // multiply
00756                                  "paddsw %%mm2, %%mm0\n\t" // add to acc
00757                                  : : "r"(m), "r"(s));
00758                         }
00759                     }
00760 
00761                     __asm__ __volatile__
00762                         ("packuswb %%mm0, %%mm0\n\t" // pack and write
00763                          "movd %%mm0, (%0)\n\t"
00764                          : : "r"(dest));
00765                     *dest = BlitzPrivate::setAlpha(*dest, qAlpha(*src++));
00766                     ++dest;
00767                 }
00768                 // Finally process the right part where the neighborhood extends off
00769                 // the right edge of the image
00770                 for(; x < w; ++x){
00771                     m = matrix;
00772                     __asm__ __volatile__
00773                         ("pxor %%mm0, %%mm0\n\t" : :); // clear acc
00774                     for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00775                         s = scanblock[matrix_y];
00776                         s += x-edge;
00777                         matrix_x = -edge;
00778                         while(x+matrix_x < w){
00779                             __asm__ __volatile__
00780                                 ("movd (%0), %%mm1\n\t" // mm1: matrix
00781                                  "punpckldq %%mm1, %%mm1\n\t"
00782                                  "packssdw %%mm1, %%mm1\n\t"
00783                                  "movd (%1), %%mm2\n\t" // mm2: pixel
00784                                  "punpcklbw %%mm7, %%mm2\n\t"
00785                                  "pmullw %%mm1, %%mm2\n\t" // multiply
00786                                  "paddsw %%mm2, %%mm0\n\t" // add to acc
00787                                  : : "r"(m), "r"(s));
00788                             ++matrix_x, ++m, ++s;
00789                         }
00790                         --s;
00791                         while(matrix_x <= edge){
00792                             __asm__ __volatile__
00793                                 ("movd (%0), %%mm1\n\t" // mm1: matrix
00794                                  "punpckldq %%mm1, %%mm1\n\t"
00795                                  "packssdw %%mm1, %%mm1\n\t"
00796                                  "movd (%1), %%mm2\n\t" // mm2: pixel
00797                                  "punpcklbw %%mm7, %%mm2\n\t"
00798                                  "pmullw %%mm1, %%mm2\n\t" // multiply
00799                                  "paddsw %%mm2, %%mm0\n\t" // add to acc
00800                                  : : "r"(m), "r"(s));
00801                             ++matrix_x, ++m;
00802                         }
00803                     }
00804                     __asm__ __volatile__
00805                         ("packuswb %%mm0, %%mm0\n\t" // pack and write
00806                          "movd %%mm0, (%0)\n\t"
00807                          : : "r"(dest));
00808                     *dest = BlitzPrivate::setAlpha(*dest, qAlpha(*src++));
00809                     ++dest;
00810                 }
00811             }
00812             __asm__ __volatile__ ("emms\n\t" : :);
00813         }
00814         else
00815 #endif
00816         {
00817             //
00818             // Divisor, no 3dnow
00819             //
00820             int r, g, b;
00821             for(y=0; y < h; ++y){
00822                 src = (QRgb *)img.scanLine(y);
00823                 dest = (QRgb *)buffer.scanLine(y);
00824                 // Read in scanlines to pixel neighborhood. If the scanline is outside
00825                 // the image use the top or bottom edge.
00826                 for(x=y-edge, i=0; x <= y+edge; ++i, ++x){
00827                     scanblock[i] = (QRgb *)
00828                         img.scanLine((x < 0) ? 0 : (x > h-1) ? h-1 : x);
00829                 }
00830                 // Now we are about to start processing scanlines. First handle the
00831                 // part where the pixel neighborhood extends off the left edge.
00832                 for(x=0; x-edge < 0 ; ++x){
00833                     r = g = b = 0;
00834                     m = matrix;
00835                     for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00836                         s = scanblock[matrix_y];
00837                         matrix_x = -edge;
00838                         while(x+matrix_x < 0){
00839                             CONVOLVE_ACC(*m, *s);
00840                             ++matrix_x; ++m;
00841                         }
00842                         while(matrix_x <= edge){
00843                             CONVOLVE_ACC(*m, *s);
00844                             ++matrix_x; ++m; ++s;
00845                         }
00846                     }
00847                     r /= divisor; g /= divisor; b /= divisor;
00848                     *dest++ = qRgba((unsigned char)qBound(0, r, 255),
00849                                     (unsigned char)qBound(0, g, 255),
00850                                     (unsigned char)qBound(0, b, 255),
00851                                     qAlpha(*src++));
00852                 }
00853                 // Okay, now process the middle part where the entire neighborhood
00854                 // is on the image.
00855                 for(; x+edge < w; ++x){
00856                     r = g = b = 0;
00857                     m = matrix;
00858                     for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00859                         s = scanblock[matrix_y] + (x-edge);
00860                         for(matrix_x = -edge; matrix_x <= edge; ++matrix_x, ++m, ++s){
00861                             CONVOLVE_ACC(*m, *s);
00862                         }
00863                     }
00864                     r /= divisor; g /= divisor; b /= divisor;
00865                     *dest++ = qRgba((unsigned char)qBound(0, r, 255),
00866                                     (unsigned char)qBound(0, g, 255),
00867                                     (unsigned char)qBound(0, b, 255),
00868                                     qAlpha(*src++));
00869                 }
00870                 // Finally process the right part where the neighborhood extends off
00871                 // the right edge of the image
00872                 for(; x < w; ++x){
00873                     r = g = b = 0;
00874                     m = matrix;
00875                     for(matrix_y = 0; matrix_y < matrix_size; ++matrix_y){
00876                         s = scanblock[matrix_y];
00877                         s += x-edge;
00878                         matrix_x = -edge;
00879                         while(x+matrix_x < w){
00880                             CONVOLVE_ACC(*m, *s);
00881                             ++matrix_x, ++m, ++s;
00882                         }
00883                         --s;
00884                         while(matrix_x <= edge){
00885                             CONVOLVE_ACC(*m, *s);
00886                             ++matrix_x, ++m;
00887                         }
00888                     }
00889                     r /= divisor; g /= divisor; b /= divisor;
00890                     *dest++ = qRgba((unsigned char)qBound(0, r, 255),
00891                                     (unsigned char)qBound(0, g, 255),
00892                                     (unsigned char)qBound(0, b, 255),
00893                                     qAlpha(*src++));
00894                 }
00895             }
00896         }
00897     }
00898 
00899     delete[] scanblock;
00900     return(buffer);
00901 }
00902 
00903 int BlitzPrivate::defaultConvolveMatrixSize(float radius, float sigma,
00904                                             bool quality)
00905 {
00906     int i, matrix_size;
00907     float normalize, value;
00908     float sigma2 = sigma*sigma*2.0;
00909     float sigmaSQ2PI = M_SQ2PI*sigma;
00910     int max = quality ? 65535 : 255;
00911 
00912     if(sigma == 0.0){
00913         qWarning("Blitz::convolve(): Zero sigma is invalid!");
00914         return(5);
00915     }
00916 
00917     if(radius > 0.0)
00918         return((int)(2.0*std::ceil(radius)+1.0));
00919 
00920     matrix_size = 5;
00921     do{
00922         normalize = 0.0;
00923         for(i=(-matrix_size/2); i <= (matrix_size/2); ++i)
00924             normalize += std::exp(-((float) i*i)/sigma2) / sigmaSQ2PI;
00925         i = matrix_size/2;
00926         value = std::exp(-((float) i*i)/sigma2) / sigmaSQ2PI / normalize;
00927         matrix_size += 2;
00928     } while((int)(max*value) > 0);
00929 
00930     matrix_size-=4;
00931     return(matrix_size);
00932 }
00933 
00934 float* BlitzPrivate::getBlurKernel(int &kernel_width, float sigma)
00935 {
00936 #define KernelRank 3
00937 
00938     float alpha, normalize, *kernel;
00939     int bias;
00940     long i;
00941 
00942     if(sigma == 0.0)
00943         return(false);
00944     if(kernel_width == 0)
00945         kernel_width = 3;
00946 
00947     kernel = new float[kernel_width+1];
00948     memset(kernel, 0, (kernel_width+1)*sizeof(float));
00949     bias = KernelRank*kernel_width/2;
00950     for(i=(-bias); i <= bias; ++i){
00951         alpha = std::exp(-((float) i*i)/(2.0*KernelRank*KernelRank*sigma*sigma));
00952         kernel[(i+bias)/KernelRank] += alpha/(M_SQ2PI*sigma);
00953     }
00954 
00955     normalize = 0;
00956     for(i=0; i < kernel_width; ++i)
00957         normalize += kernel[i];
00958     for(i=0; i < kernel_width; ++i)
00959         kernel[i] /= normalize;
00960     return(kernel);
00961 }
00962 
00963 void BlitzPrivate::blurScanLine(float *kernel, int kern_width,
00964                                 QRgb *source, QRgb *destination,
00965                                 int columns, int offset)
00966 {
00967     FloatPixel aggregate, zero;
00968     float scale, *k;
00969     QRgb *src, *dest;
00970     int i, x;
00971 
00972 #ifdef USE_MMX_INLINE_ASM
00973 #ifdef __GNUC__
00974 #warning Using MMX gaussian blur
00975 #endif
00976     //
00977     //
00978     // MMX Version
00979     //
00980     //
00981 
00982     if(BlitzCPUInfo::haveExtension(BlitzCPUInfo::AMD3DNOW) &&
00983        BlitzCPUInfo::haveExtension(BlitzCPUInfo::IntegerSSE)){
00984         if(kern_width > columns){
00985             memset(&zero, 0, sizeof(FloatPixel));
00986             for(dest=destination, x=0; x < columns; ++x, dest+=offset){
00987                 aggregate = zero;
00988                 scale = 0.0;
00989                 k = kernel;
00990                 src = source;
00991                 for(i=0; i < columns; ++k, src+=offset){
00992                     if((i >= (x-kern_width/2)) && (i <= (x+kern_width/2))){
00993                         aggregate.red += (*k)*qRed(*src);
00994                         aggregate.green += (*k)*qGreen(*src);
00995                         aggregate.blue += (*k)*qBlue(*src);
00996                         aggregate.alpha += (*k)*qAlpha(*src);
00997                     }
00998 
00999                     if(((i+kern_width/2-x) >= 0) && ((i+kern_width/2-x) < kern_width))
01000                         scale += kernel[i+kern_width/2-x];
01001                 }
01002                 scale = 1.0/scale;
01003                 *dest = qRgba((unsigned char)(scale*(aggregate.red+0.5)),
01004                               (unsigned char)(scale*(aggregate.green+0.5)),
01005                               (unsigned char)(scale*(aggregate.blue+0.5)),
01006                               (unsigned char)(scale*(aggregate.alpha+0.5)));
01007             }
01008             return;
01009         }
01010 
01011         // blur
01012         __asm__ __volatile__ ("pxor %%mm7, %%mm7\n\t" : : ); // for unpacking
01013         for(dest=destination, x=0; x < kern_width/2; ++x, dest+=offset){
01014             k = kernel+kern_width/2-x;
01015             src = source;
01016             __asm__ __volatile__ ("pxor %%mm0, %%mm0\n\t" // mm0: BG acc
01017                                   "pxor %%mm1, %%mm1\n\t" // mm1: RA acc
01018                                   "pxor %%mm6, %%mm6\n\t" // mm6: scale
01019                                   : : );
01020             for(i=kern_width/2-x; i < kern_width; ++i, ++k, src+=offset){
01021                 __asm__ __volatile__
01022                     ("movd (%0), %%mm2\n\t" // mm2: k doubleword
01023                      "punpckldq %%mm2, %%mm2\n\t"
01024                      "pfadd %%mm2, %%mm6\n\t" // add to scale
01025 
01026                      "movd (%1), %%mm3\n\t" // load pixel
01027                      "punpcklbw %%mm7, %%mm3\n\t"
01028                      "pshufw $0xE4, %%mm3, %%mm4\n\t"
01029                      "punpcklwd %%mm7, %%mm3\n\t" // mm3: BG
01030                      "punpckhwd %%mm7, %%mm4\n\t" // mm4: RA
01031 
01032                      "pi2fd %%mm3, %%mm3\n\t" // convert to fp...
01033                      "pi2fd %%mm4, %%mm4\n\t"
01034                      "pfmul %%mm2, %%mm3\n\t" // ...and multiply
01035                      "pfmul %%mm2, %%mm4\n\t"
01036 
01037                      "pfadd %%mm3, %%mm0\n\t" // add to accumulator
01038                      "pfadd %%mm4, %%mm1\n\t"
01039                      : : "r"(k), "r"(src));
01040             }
01041             __asm__ __volatile__
01042                 ("pfrcp %%mm6, %%mm5\n\t" // reciprocal
01043                  "pfrcpit1 %%mm5, %%mm6\n\t" // expand to 24bit
01044                  "pfrcpit2 %%mm5, %%mm6\n\t"
01045 
01046                  "pfmul %%mm6, %%mm0\n\t" // multiply
01047                  "pfmul %%mm6, %%mm1\n\t"
01048 
01049                  "pf2id %%mm0, %%mm0\n\t" // and write
01050                  "pf2id %%mm1, %%mm1\n\t"
01051                  "packssdw %%mm0, %%mm1\n\t"
01052                  "pshufw $0x4E, %%mm1, %%mm1\n\t"
01053                  "packuswb %%mm7, %%mm1\n\t"
01054                  "movd %%mm1, (%0)\n\t"
01055                  : : "r"(dest));
01056         }
01057         for(; x < (columns-kern_width/2); ++x, dest+=offset){
01058             __asm__ __volatile__ ("pxor %%mm0, %%mm0\n\t" // mm0: BG acc
01059                                   "pxor %%mm1, %%mm1\n\t" // mm1: RA acc
01060                                   : : );
01061             k = kernel;
01062             src = source+((x-kern_width/2)*offset);
01063             for(i=0; i < kern_width; ++i, ++k, src+=offset){
01064                 __asm__ __volatile__
01065                     ("movd (%0), %%mm2\n\t" // mm2: k doubleword
01066                      "punpckldq %%mm2, %%mm2\n\t"
01067 
01068                      "movd (%1), %%mm3\n\t" // load pixel
01069                      "punpcklbw %%mm7, %%mm3\n\t"
01070                      "pshufw $0xE4, %%mm3, %%mm4\n\t"
01071                      "punpcklwd %%mm7, %%mm3\n\t" // mm3: BG
01072                      "punpckhwd %%mm7, %%mm4\n\t" // mm4: RA
01073 
01074                      "pi2fd %%mm3, %%mm3\n\t" // convert to fp...
01075                      "pi2fd %%mm4, %%mm4\n\t"
01076                      "pfmul %%mm2, %%mm3\n\t" // ...and multiply
01077                      "pfmul %%mm2, %%mm4\n\t"
01078 
01079                      "pfadd %%mm3, %%mm0\n\t" // add to accumulator
01080                      "pfadd %%mm4, %%mm1\n\t"
01081                      : : "r"(k), "r"(src));
01082             }
01083             __asm__ __volatile__
01084                 ("pf2id %%mm0, %%mm0\n\t" // write
01085                  "pf2id %%mm1, %%mm1\n\t"
01086                  "packssdw %%mm0, %%mm1\n\t"
01087                  "pshufw $0x4E, %%mm1, %%mm1\n\t"
01088                  "packuswb %%mm7, %%mm1\n\t"
01089                  "movd %%mm1, (%0)\n\t"
01090                  : : "r"(dest));
01091         }
01092         for(; x < columns; ++x, dest+=offset){
01093             k = kernel;
01094             src = source+((x-kern_width/2)*offset);
01095             __asm__ __volatile__ ("pxor %%mm0, %%mm0\n\t" // mm0: BG acc
01096                                   "pxor %%mm1, %%mm1\n\t" // mm1: RA acc
01097                                   "pxor %%mm6, %%mm6\n\t" // mm6: scale
01098                                   : : );
01099             for(i=0; i < (columns-x+kern_width/2); ++i, ++k, src+=offset){
01100                 __asm__ __volatile__
01101                     ("movd (%0), %%mm2\n\t" // mm2: k doubleword
01102                      "punpckldq %%mm2, %%mm2\n\t"
01103                      "pfadd %%mm2, %%mm6\n\t" // add to scale
01104 
01105                      "movd (%1), %%mm3\n\t" // load pixel
01106                      "punpcklbw %%mm7, %%mm3\n\t"
01107                      "pshufw $0xE4, %%mm3, %%mm4\n\t"
01108                      "punpcklwd %%mm7, %%mm3\n\t" // mm3: BG
01109                      "punpckhwd %%mm7, %%mm4\n\t" // mm4: RA
01110 
01111                      "pi2fd %%mm3, %%mm3\n\t" // convert to fp...
01112                      "pi2fd %%mm4, %%mm4\n\t"
01113                      "pfmul %%mm2, %%mm3\n\t" // ...and multiply
01114                      "pfmul %%mm2, %%mm4\n\t"
01115 
01116                      "pfadd %%mm3, %%mm0\n\t" // add to accumulator
01117                      "pfadd %%mm4, %%mm1\n\t"
01118                      : : "r"(k), "r"(src));
01119             }
01120             __asm__ __volatile__
01121                 ("pfrcp %%mm6, %%mm5\n\t" // reciprocal
01122                  "pfrcpit1 %%mm5, %%mm6\n\t" // expand to 24bit
01123                  "pfrcpit2 %%mm5, %%mm6\n\t"
01124 
01125                  "pfmul %%mm6, %%mm0\n\t" // multiply
01126                  "pfmul %%mm6, %%mm1\n\t"
01127 
01128                  "pf2id %%mm0, %%mm0\n\t" // and write
01129                  "pf2id %%mm1, %%mm1\n\t"
01130                  "packssdw %%mm0, %%mm1\n\t"
01131                  "pshufw $0x4E, %%mm1, %%mm1\n\t"
01132                  "packuswb %%mm7, %%mm1\n\t"
01133                  "movd %%mm1, (%0)\n\t"
01134                  : : "r"(dest));
01135         }
01136 
01137         __asm__ __volatile__ ("emms\n\t" : :);
01138     }
01139     else
01140 #endif
01141     {
01142         //
01143         //
01144         // Non-MMX version
01145         //
01146         //
01147 
01148         memset(&zero, 0, sizeof(FloatPixel));
01149         if(kern_width > columns){
01150             for(dest=destination, x=0; x < columns; ++x, dest+=offset){
01151                 aggregate = zero;
01152                 scale = 0.0;
01153                 k = kernel;
01154                 src = source;
01155                 for(i=0; i < columns; ++k, src+=offset){
01156                     if((i >= (x-kern_width/2)) && (i <= (x+kern_width/2))){
01157                         aggregate.red += (*k)*qRed(*src);
01158                         aggregate.green += (*k)*qGreen(*src);
01159                         aggregate.blue += (*k)*qBlue(*src);
01160                         aggregate.alpha += (*k)*qAlpha(*src);
01161                     }
01162 
01163                     if(((i+kern_width/2-x) >= 0) && ((i+kern_width/2-x) < kern_width))
01164                         scale += kernel[i+kern_width/2-x];
01165                 }
01166                 scale = 1.0/scale;
01167                 *dest = qRgba((unsigned char)(scale*(aggregate.red+0.5)),
01168                               (unsigned char)(scale*(aggregate.green+0.5)),
01169                               (unsigned char)(scale*(aggregate.blue+0.5)),
01170                               (unsigned char)(scale*(aggregate.alpha+0.5)));
01171             }
01172             return;
01173         }
01174 
01175         // blur
01176         for(dest=destination, x=0; x < kern_width/2; ++x, dest+=offset){
01177             aggregate = zero; // put this stuff in loop initializer once tested
01178             scale = 0.0;
01179             k = kernel+kern_width/2-x;
01180             src = source;
01181             for(i=kern_width/2-x; i < kern_width; ++i, ++k, src+=offset){
01182                 aggregate.red += (*k)*qRed(*src);
01183                 aggregate.green += (*k)*qGreen(*src);
01184                 aggregate.blue += (*k)*qBlue(*src);
01185                 aggregate.alpha += (*k)*qAlpha(*src);
01186                 scale += (*k);
01187             }
01188             scale = 1.0/scale;
01189             *dest = qRgba((unsigned char)(scale*(aggregate.red+0.5)),
01190                           (unsigned char)(scale*(aggregate.green+0.5)),
01191                           (unsigned char)(scale*(aggregate.blue+0.5)),
01192                           (unsigned char)(scale*(aggregate.alpha+0.5)));
01193         }
01194         for(; x < (columns-kern_width/2); ++x, dest+=offset){
01195             aggregate = zero;
01196             k = kernel;
01197             src = source+((x-kern_width/2)*offset);
01198             for(i=0; i < kern_width; ++i, ++k, src+=offset){
01199                 aggregate.red += (*k)*qRed(*src);
01200                 aggregate.green += (*k)*qGreen(*src);
01201                 aggregate.blue += (*k)*qBlue(*src);
01202                 aggregate.alpha += (*k)*qAlpha(*src);
01203             }
01204             *dest = qRgba((unsigned char)(aggregate.red+0.5),
01205                           (unsigned char)(aggregate.green+0.5),
01206                           (unsigned char)(aggregate.blue+0.5),
01207                           (unsigned char)(aggregate.alpha+0.5));
01208         }
01209         for(; x < columns; ++x, dest+=offset){
01210             aggregate = zero;
01211             scale = 0;
01212             k = kernel;
01213             src = source+((x-kern_width/2)*offset);
01214             for(i=0; i < (columns-x+kern_width/2); ++i, ++k, src+=offset){
01215                 aggregate.red += (*k)*qRed(*src);
01216                 aggregate.green += (*k)*qGreen(*src);
01217                 aggregate.blue += (*k)*qBlue(*src);
01218                 aggregate.alpha += (*k)*qAlpha(*src);
01219                 scale += (*k);
01220             }
01221             scale = 1.0/scale;
01222             *dest = qRgba((unsigned char)(scale*(aggregate.red+0.5)),
01223                           (unsigned char)(scale*(aggregate.green+0.5)),
01224                           (unsigned char)(scale*(aggregate.blue+0.5)),
01225                           (unsigned char)(scale*(aggregate.alpha+0.5)));
01226         }
01227     }
01228 }
01229 
01230 QImage Blitz::gaussianBlur(QImage &img, float radius, float sigma)
01231 {
01232     int kern_width, x, y, w, h;
01233     QRgb *src;
01234     float *k = NULL;
01235 
01236     if(sigma == 0.0){
01237         qWarning("Blitz::gaussianBlur(): Zero sigma is not valid!");
01238         return(img);
01239     }
01240 
01241     // figure out optimal kernel width
01242     if(radius > 0){
01243         kern_width = (int)(2*std::ceil(radius)+1);
01244         k = BlitzPrivate::getBlurKernel(kern_width, sigma);
01245     }
01246     else{
01247         float *last_kernel = NULL;
01248         kern_width = 3;
01249         k = BlitzPrivate::getBlurKernel(kern_width, sigma);
01250         while((long)(255*k[0]) > 0){
01251             if(last_kernel != NULL)
01252                 delete[] last_kernel;
01253             last_kernel = k;
01254             kern_width += 2;
01255             k = BlitzPrivate::getBlurKernel(kern_width, sigma);
01256         }
01257         if(last_kernel != NULL){
01258             delete[] k;
01259             kern_width -= 2;
01260             k = last_kernel;
01261         }
01262     }
01263 
01264     if(kern_width < 3){
01265         qWarning("Blitz::gaussianBlur(): radius is too small!");
01266         return(img);
01267     }
01268 
01269     // allocate destination image
01270     w = img.width();
01271     h = img.height();
01272     if(img.format() == QImage::Format_ARGB32_Premultiplied)
01273         img = img.convertToFormat(QImage::Format_ARGB32);
01274     else if(img.depth() < 32){
01275         img = img.convertToFormat(img.hasAlphaChannel() ?
01276                                   QImage::Format_ARGB32 :
01277                                   QImage::Format_RGB32);
01278     }
01279     QImage buffer(w, h, img.format());
01280 
01281     //blur image rows
01282     for(y=0; y < h; ++y)
01283         BlitzPrivate::blurScanLine(k, kern_width, (QRgb *)img.scanLine(y),
01284                                    (QRgb *)buffer.scanLine(y), img.width(), 1);
01285 
01286     // blur image columns
01287     src = (QRgb *)buffer.scanLine(0);
01288     for(x=0; x < w; ++x)
01289         BlitzPrivate::blurScanLine(k, kern_width, src+x, src+x, img.height(),
01290                                    img.width());
01291     // finish up
01292     delete[] k;
01293     return(buffer);
01294 }
01295 
01296 QImage Blitz::gaussianSharpen(QImage &img, float radius, float sigma,
01297                               EffectQuality quality)
01298 {
01299     if(sigma == 0.0){
01300         qWarning("Blitz::gaussianSharpen(): Zero sigma is invalid!");
01301         return(img);
01302     }
01303 
01304     int matrix_size = BlitzPrivate::defaultConvolveMatrixSize(radius, sigma,
01305                                                               quality == High);
01306     int len = matrix_size*matrix_size;
01307     float alpha, *matrix = new float[len];
01308     float sigma2 = sigma*sigma*2.0;
01309     float sigmaPI2 = 2.0*M_PI*sigma*sigma;
01310 
01311     int half = matrix_size/2;
01312     int x, y, i=0, j=half;
01313     float normalize=0.0;
01314     for(y=(-half); y <= half; ++y, --j){
01315         for(x=(-half); x <= half; ++x, ++i){
01316             alpha = std::exp(-((float)x*x+y*y)/sigma2);
01317             matrix[i] = alpha/sigmaPI2;
01318             normalize += matrix[i];
01319         }
01320     }
01321 
01322     matrix[i/2]=(-2.0)*normalize;
01323     QImage result(convolve(img, matrix_size, matrix));
01324     delete[] matrix;
01325     return(result);
01326 }
01327 
01328 QImage Blitz::antialias(QImage &img)
01329 {
01330     // nice little example method
01331     int matrix[] = {
01332         1, 2, 1,
01333         2, 8, 2,
01334         1, 2, 1
01335     };
01336     return(convolveInteger(img, 3, matrix, 20));
01337 }
01338 
01339 QImage Blitz::emboss(QImage &img, float radius, float sigma,
01340                      EffectQuality quality)
01341 {
01342     if(sigma == 0.0){
01343         qWarning("Blitz::emboss(): Zero sigma is invalid!");
01344         return(img);
01345     }
01346 
01347     int matrix_size = BlitzPrivate::defaultConvolveMatrixSize(radius, sigma,
01348                                                               quality == High);
01349     int len = matrix_size*matrix_size;
01350 
01351     float alpha, *matrix = new float[len];
01352     float sigma2 = sigma*sigma*2.0;
01353     float sigmaPI2 = 2.0*M_PI*sigma*sigma;
01354 
01355     int half = matrix_size/2;
01356     int x, y, i=0, j=half;
01357     for(y=(-half); y <= half; ++y, --j){
01358         for(x=(-half); x <= half; ++x, ++i){
01359             alpha = std::exp(-((float)x*x+y*y)/sigma2);
01360             matrix[i]=((x < 0) || (y < 0) ? -8.0 : 8.0)*alpha/sigmaPI2;
01361             if(x == j)
01362                 matrix[i]=0.0;
01363         }
01364     }
01365     QImage result(convolve(img, matrix_size, matrix));
01366     delete[] matrix;
01367     equalize(result);
01368     return(result);
01369 }
01370 
01371 QImage Blitz::convolveEdge(QImage &img, float radius,
01372                            EffectQuality quality)
01373 {
01374     int i, matrix_size = BlitzPrivate::defaultConvolveMatrixSize(radius, 0.5,
01375                                                                  quality == High);
01376     int len = matrix_size*matrix_size;
01377     int *matrix = new int[len];
01378     for(i=0; i < len; ++i)
01379         matrix[i] = -1;
01380     matrix[i/2] = len-1;
01381 
01382     QImage result(convolveInteger(img, matrix_size, matrix, 0));
01383     delete[] matrix;
01384     return(result);
01385 }
01386 
01387 

qimageblitz

Skip menu "qimageblitz"
  • Main Page
  • Namespace List
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members

KDE Support

Skip menu "KDE Support"
  • akonadi
  • Decibel
  • grantlee
  • kdewin
  • phonon
  •     Backend
  • polkit-qt
  • qca
  • qimageblitz
  • soprano
  • strigi
  •     searchclient
  •     streamanalyzer
  •     streams
Generated for KDE Support by doxygen 1.5.9-20090814
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal