kimageeffect.cpp

00001 /* This file is part of the KDE libraries
00002     Copyright (C) 1998, 1999, 2001, 2002 Daniel M. Duley <mosfet@kde.org>
00003     (C) 1998, 1999 Christian Tibirna <ctibirna@total.net>
00004     (C) 1998, 1999 Dirk Mueller <mueller@kde.org>
00005     (C) 1999 Geert Jansen <g.t.jansen@stud.tue.nl>
00006     (C) 2000 Josef Weidendorfer <weidendo@in.tum.de>
00007     (C) 2004 Zack Rusin <zack@kde.org>
00008 
00009 Redistribution and use in source and binary forms, with or without
00010 modification, are permitted provided that the following conditions
00011 are met:
00012 
00013 1. Redistributions of source code must retain the above copyright
00014    notice, this list of conditions and the following disclaimer.
00015 2. Redistributions in binary form must reproduce the above copyright
00016    notice, this list of conditions and the following disclaimer in the
00017    documentation and/or other materials provided with the distribution.
00018 
00019 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00020 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00021 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00022 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00023 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00025 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00026 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00027 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00028 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029 
00030 */
00031 
00032 // $Id$
00033 
00034 #include <math.h>
00035 #include <assert.h>
00036 
00037 #include <qimage.h>
00038 #include <stdlib.h>
00039 #include <iostream>
00040 
00041 #include "kimageeffect.h"
00042 #include "kcpuinfo.h"
00043 
00044 #include <config.h>
00045 
00046 #if 0
00047 //disabled until #74478 fixed.
00048 
00049 #if defined(__i386__) && ( defined(__GNUC__) || defined(__INTEL_COMPILER) )
00050 #  if defined( HAVE_X86_MMX )
00051 #    define USE_MMX_INLINE_ASM
00052 #  endif
00053 #  if defined( HAVE_X86_SSE2 )
00054 #    define USE_SSE2_INLINE_ASM
00055 #  endif
00056 #endif
00057 
00058 #endif
00059 //======================================================================
00060 //
00061 // Utility stuff for effects ported from ImageMagick to QImage
00062 //
00063 //======================================================================
00064 #define MaxRGB 255L
00065 #define DegreesToRadians(x) ((x)*M_PI/180.0)
00066 #define MagickSQ2PI 2.50662827463100024161235523934010416269302368164062
00067 #define MagickEpsilon  1.0e-12
00068 #define MagickPI  3.14159265358979323846264338327950288419716939937510
00069 #define MOD(x, y) ((x) < 0 ? ((y) - 1 - ((y) - 1 - (x)) % (y)) : (x) % (y))
00070 
00076 #define FXCLAMP(x,low,high) fxClamp(x,low,high)
00077 template<class T>
00078 inline const T& fxClamp( const T& x, const T& low, const T& high )
00079 {
00080     if ( x < low )       return low;
00081     else if ( x > high ) return high;
00082     else                 return x;
00083 }
00084 
00085 static inline unsigned int intensityValue(unsigned int color)
00086 {
00087     return((unsigned int)((0.299*qRed(color) +
00088                            0.587*qGreen(color) +
00089                            0.1140000000000001*qBlue(color))));
00090 }
00091 
00092 template<typename T>
00093 static inline void liberateMemory(T **memory)
00094 {
00095     assert(memory != NULL);
00096     if(*memory == NULL) return;
00097     free((char*)*memory);
00098     *memory=NULL;
00099 }
00100 
00101 struct double_packet
00102 {
00103     double red;
00104     double green;
00105     double blue;
00106     double alpha;
00107 };
00108 
00109 struct short_packet
00110 {
00111     unsigned short int red;
00112     unsigned short int green;
00113     unsigned short int blue;
00114     unsigned short int alpha;
00115 };
00116 
00117 
00118 //======================================================================
00119 //
00120 // Gradient effects
00121 //
00122 //======================================================================
00123 
00124 QImage KImageEffect::gradient(const QSize &size, const QColor &ca,
00125     const QColor &cb, GradientType eff, int ncols)
00126 {
00127     int rDiff, gDiff, bDiff;
00128     int rca, gca, bca, rcb, gcb, bcb;
00129 
00130     QImage image(size, 32);
00131 
00132     if (size.width() == 0 || size.height() == 0) {
00133 #ifndef NDEBUG
00134       std::cerr << "WARNING: KImageEffect::gradient: invalid image" << std::endl;
00135 #endif
00136       return image;
00137     }
00138 
00139     register int x, y;
00140 
00141     rDiff = (rcb = cb.red())   - (rca = ca.red());
00142     gDiff = (gcb = cb.green()) - (gca = ca.green());
00143     bDiff = (bcb = cb.blue())  - (bca = ca.blue());
00144 
00145     if( eff == VerticalGradient || eff == HorizontalGradient ){
00146 
00147         uint *p;
00148         uint rgb;
00149 
00150         register int rl = rca << 16;
00151         register int gl = gca << 16;
00152         register int bl = bca << 16;
00153 
00154         if( eff == VerticalGradient ) {
00155 
00156             int rcdelta = ((1<<16) / size.height()) * rDiff;
00157             int gcdelta = ((1<<16) / size.height()) * gDiff;
00158             int bcdelta = ((1<<16) / size.height()) * bDiff;
00159 
00160             for ( y = 0; y < size.height(); y++ ) {
00161                 p = (uint *) image.scanLine(y);
00162 
00163                 rl += rcdelta;
00164                 gl += gcdelta;
00165                 bl += bcdelta;
00166 
00167                 rgb = qRgb( (rl>>16), (gl>>16), (bl>>16) );
00168 
00169                 for( x = 0; x < size.width(); x++ ) {
00170                     *p = rgb;
00171                     p++;
00172                 }
00173             }
00174 
00175         }
00176         else {                  // must be HorizontalGradient
00177 
00178             unsigned int *o_src = (unsigned int *)image.scanLine(0);
00179             unsigned int *src = o_src;
00180 
00181             int rcdelta = ((1<<16) / size.width()) * rDiff;
00182             int gcdelta = ((1<<16) / size.width()) * gDiff;
00183             int bcdelta = ((1<<16) / size.width()) * bDiff;
00184 
00185             for( x = 0; x < size.width(); x++) {
00186 
00187                 rl += rcdelta;
00188                 gl += gcdelta;
00189                 bl += bcdelta;
00190 
00191                 *src++ = qRgb( (rl>>16), (gl>>16), (bl>>16));
00192             }
00193 
00194             src = o_src;
00195 
00196             // Believe it or not, manually copying in a for loop is faster
00197             // than calling memcpy for each scanline (on the order of ms...).
00198             // I think this is due to the function call overhead (mosfet).
00199 
00200             for (y = 1; y < size.height(); ++y) {
00201 
00202                 p = (unsigned int *)image.scanLine(y);
00203                 src = o_src;
00204                 for(x=0; x < size.width(); ++x)
00205                     *p++ = *src++;
00206             }
00207         }
00208     }
00209 
00210     else {
00211 
00212         float rfd, gfd, bfd;
00213         float rd = rca, gd = gca, bd = bca;
00214 
00215         unsigned char *xtable[3];
00216         unsigned char *ytable[3];
00217 
00218         unsigned int w = size.width(), h = size.height();
00219         xtable[0] = new unsigned char[w];
00220         xtable[1] = new unsigned char[w];
00221         xtable[2] = new unsigned char[w];
00222         ytable[0] = new unsigned char[h];
00223         ytable[1] = new unsigned char[h];
00224         ytable[2] = new unsigned char[h];
00225         w*=2, h*=2;
00226 
00227         if ( eff == DiagonalGradient || eff == CrossDiagonalGradient) {
00228             // Diagonal dgradient code inspired by BlackBox (mosfet)
00229             // BlackBox dgradient is (C) Brad Hughes, <bhughes@tcac.net> and
00230             // Mike Cole <mike@mydot.com>.
00231 
00232             rfd = (float)rDiff/w;
00233             gfd = (float)gDiff/w;
00234             bfd = (float)bDiff/w;
00235 
00236             int dir;
00237             for (x = 0; x < size.width(); x++, rd+=rfd, gd+=gfd, bd+=bfd) {
00238                 dir = eff == DiagonalGradient? x : size.width() - x - 1;
00239                 xtable[0][dir] = (unsigned char) rd;
00240                 xtable[1][dir] = (unsigned char) gd;
00241                 xtable[2][dir] = (unsigned char) bd;
00242             }
00243             rfd = (float)rDiff/h;
00244             gfd = (float)gDiff/h;
00245             bfd = (float)bDiff/h;
00246             rd = gd = bd = 0;
00247             for (y = 0; y < size.height(); y++, rd+=rfd, gd+=gfd, bd+=bfd) {
00248                 ytable[0][y] = (unsigned char) rd;
00249                 ytable[1][y] = (unsigned char) gd;
00250                 ytable[2][y] = (unsigned char) bd;
00251             }
00252 
00253             for (y = 0; y < size.height(); y++) {
00254                 unsigned int *scanline = (unsigned int *)image.scanLine(y);
00255                 for (x = 0; x < size.width(); x++) {
00256                     scanline[x] = qRgb(xtable[0][x] + ytable[0][y],
00257                                        xtable[1][x] + ytable[1][y],
00258                                        xtable[2][x] + ytable[2][y]);
00259                 }
00260             }
00261         }
00262 
00263         else if (eff == RectangleGradient ||
00264                  eff == PyramidGradient ||
00265                  eff == PipeCrossGradient ||
00266                  eff == EllipticGradient)
00267         {
00268             int rSign = rDiff>0? 1: -1;
00269             int gSign = gDiff>0? 1: -1;
00270             int bSign = bDiff>0? 1: -1;
00271 
00272             rfd = (float)rDiff / size.width();
00273             gfd = (float)gDiff / size.width();
00274             bfd = (float)bDiff / size.width();
00275 
00276             rd = (float)rDiff/2;
00277             gd = (float)gDiff/2;
00278             bd = (float)bDiff/2;
00279 
00280             for (x = 0; x < size.width(); x++, rd-=rfd, gd-=gfd, bd-=bfd)
00281             {
00282                 xtable[0][x] = (unsigned char) abs((int)rd);
00283                 xtable[1][x] = (unsigned char) abs((int)gd);
00284                 xtable[2][x] = (unsigned char) abs((int)bd);
00285             }
00286 
00287             rfd = (float)rDiff/size.height();
00288             gfd = (float)gDiff/size.height();
00289             bfd = (float)bDiff/size.height();
00290 
00291             rd = (float)rDiff/2;
00292             gd = (float)gDiff/2;
00293             bd = (float)bDiff/2;
00294 
00295             for (y = 0; y < size.height(); y++, rd-=rfd, gd-=gfd, bd-=bfd)
00296             {
00297                 ytable[0][y] = (unsigned char) abs((int)rd);
00298                 ytable[1][y] = (unsigned char) abs((int)gd);
00299                 ytable[2][y] = (unsigned char) abs((int)bd);
00300             }
00301 
00302             int h = (size.height()+1)>>1;
00303             for (y = 0; y < h; y++) {
00304                 unsigned int *sl1 = (unsigned int *)image.scanLine(y);
00305                 unsigned int *sl2 = (unsigned int *)image.scanLine(QMAX(size.height()-y-1, y));
00306 
00307                 int w = (size.width()+1)>>1;
00308                 int x2 = size.width()-1;
00309 
00310                 for (x = 0; x < w; x++, x2--) {
00311             unsigned int rgb = 0;
00312                     if (eff == PyramidGradient) {
00313                         rgb = qRgb(rcb-rSign*(xtable[0][x]+ytable[0][y]),
00314                                    gcb-gSign*(xtable[1][x]+ytable[1][y]),
00315                                    bcb-bSign*(xtable[2][x]+ytable[2][y]));
00316                     }
00317                     if (eff == RectangleGradient) {
00318                         rgb = qRgb(rcb - rSign *
00319                                    QMAX(xtable[0][x], ytable[0][y]) * 2,
00320                                    gcb - gSign *
00321                                    QMAX(xtable[1][x], ytable[1][y]) * 2,
00322                                    bcb - bSign *
00323                                    QMAX(xtable[2][x], ytable[2][y]) * 2);
00324                     }
00325                     if (eff == PipeCrossGradient) {
00326                         rgb = qRgb(rcb - rSign *
00327                                    QMIN(xtable[0][x], ytable[0][y]) * 2,
00328                                    gcb - gSign *
00329                                    QMIN(xtable[1][x], ytable[1][y]) * 2,
00330                                    bcb - bSign *
00331                                    QMIN(xtable[2][x], ytable[2][y]) * 2);
00332                     }
00333                     if (eff == EllipticGradient) {
00334                         rgb = qRgb(rcb - rSign *
00335                                    (int)sqrt((xtable[0][x]*xtable[0][x] +
00336                                               ytable[0][y]*ytable[0][y])*2.0),
00337                                    gcb - gSign *
00338                                    (int)sqrt((xtable[1][x]*xtable[1][x] +
00339                                               ytable[1][y]*ytable[1][y])*2.0),
00340                                    bcb - bSign *
00341                                    (int)sqrt((xtable[2][x]*xtable[2][x] +
00342                                               ytable[2][y]*ytable[2][y])*2.0));
00343                     }
00344 
00345                     sl1[x] = sl2[x] = rgb;
00346                     sl1[x2] = sl2[x2] = rgb;
00347                 }
00348             }
00349         }
00350 
00351         delete [] xtable[0];
00352         delete [] xtable[1];
00353         delete [] xtable[2];
00354         delete [] ytable[0];
00355         delete [] ytable[1];
00356         delete [] ytable[2];
00357     }
00358 
00359     // dither if necessary
00360     if (ncols && (QPixmap::defaultDepth() < 15 )) {
00361     if ( ncols < 2 || ncols > 256 )
00362         ncols = 3;
00363     QColor *dPal = new QColor[ncols];
00364     for (int i=0; i<ncols; i++) {
00365         dPal[i].setRgb ( rca + rDiff * i / ( ncols - 1 ),
00366                  gca + gDiff * i / ( ncols - 1 ),
00367                  bca + bDiff * i / ( ncols - 1 ) );
00368     }
00369         dither(image, dPal, ncols);
00370         delete [] dPal;
00371     }
00372 
00373     return image;
00374 }
00375 
00376 
00377 // -----------------------------------------------------------------------------
00378 
00379 //CT this was (before Dirk A. Mueller's speedup changes)
00380 //   merely the same code as in the above method, but it's supposedly
00381 //   way less performant since it introduces a lot of supplementary tests
00382 //   and simple math operations for the calculus of the balance.
00383 //      (surprizingly, it isn't less performant, in the contrary :-)
00384 //   Yes, I could have merged them, but then the excellent performance of
00385 //   the balanced code would suffer with no other gain than a mere
00386 //   source code and byte code size economy.
00387 
00388 QImage KImageEffect::unbalancedGradient(const QSize &size, const QColor &ca,
00389     const QColor &cb, GradientType eff, int xfactor, int yfactor,
00390     int ncols)
00391 {
00392     int dir; // general parameter used for direction switches
00393 
00394     bool _xanti = false , _yanti = false;
00395 
00396     if (xfactor < 0) _xanti = true; // negative on X direction
00397     if (yfactor < 0) _yanti = true; // negative on Y direction
00398 
00399     xfactor = abs(xfactor);
00400     yfactor = abs(yfactor);
00401 
00402     if (!xfactor) xfactor = 1;
00403     if (!yfactor) yfactor = 1;
00404 
00405     if (xfactor > 200 ) xfactor = 200;
00406     if (yfactor > 200 ) yfactor = 200;
00407 
00408 
00409     //    float xbal = xfactor/5000.;
00410     //    float ybal = yfactor/5000.;
00411     float xbal = xfactor/30./size.width();
00412     float ybal = yfactor/30./size.height();
00413     float rat;
00414 
00415     int rDiff, gDiff, bDiff;
00416     int rca, gca, bca, rcb, gcb, bcb;
00417 
00418     QImage image(size, 32);
00419 
00420     if (size.width() == 0 || size.height() == 0) {
00421 #ifndef NDEBUG
00422       std::cerr << "WARNING: KImageEffect::unbalancedGradient : invalid image\n";
00423 #endif
00424       return image;
00425     }
00426 
00427     register int x, y;
00428     unsigned int *scanline;
00429 
00430     rDiff = (rcb = cb.red())   - (rca = ca.red());
00431     gDiff = (gcb = cb.green()) - (gca = ca.green());
00432     bDiff = (bcb = cb.blue())  - (bca = ca.blue());
00433 
00434     if( eff == VerticalGradient || eff == HorizontalGradient){
00435         QColor cRow;
00436 
00437         uint *p;
00438         uint rgbRow;
00439 
00440     if( eff == VerticalGradient) {
00441       for ( y = 0; y < size.height(); y++ ) {
00442         dir = _yanti ? y : size.height() - 1 - y;
00443             p = (uint *) image.scanLine(dir);
00444             rat =  1 - exp( - (float)y  * ybal );
00445 
00446             cRow.setRgb( rcb - (int) ( rDiff * rat ),
00447                          gcb - (int) ( gDiff * rat ),
00448                          bcb - (int) ( bDiff * rat ) );
00449 
00450             rgbRow = cRow.rgb();
00451 
00452             for( x = 0; x < size.width(); x++ ) {
00453           *p = rgbRow;
00454           p++;
00455             }
00456       }
00457     }
00458     else {
00459 
00460       unsigned int *src = (unsigned int *)image.scanLine(0);
00461       for(x = 0; x < size.width(); x++ )
00462           {
00463           dir = _xanti ? x : size.width() - 1 - x;
00464           rat = 1 - exp( - (float)x  * xbal );
00465 
00466               src[dir] = qRgb(rcb - (int) ( rDiff * rat ),
00467                               gcb - (int) ( gDiff * rat ),
00468                               bcb - (int) ( bDiff * rat ));
00469           }
00470 
00471       // Believe it or not, manually copying in a for loop is faster
00472       // than calling memcpy for each scanline (on the order of ms...).
00473       // I think this is due to the function call overhead (mosfet).
00474 
00475       for(y = 1; y < size.height(); ++y)
00476           {
00477           scanline = (unsigned int *)image.scanLine(y);
00478           for(x=0; x < size.width(); ++x)
00479                   scanline[x] = src[x];
00480           }
00481     }
00482     }
00483 
00484     else {
00485       int w=size.width(), h=size.height();
00486 
00487       unsigned char *xtable[3];
00488       unsigned char *ytable[3];
00489       xtable[0] = new unsigned char[w];
00490       xtable[1] = new unsigned char[w];
00491       xtable[2] = new unsigned char[w];
00492       ytable[0] = new unsigned char[h];
00493       ytable[1] = new unsigned char[h];
00494       ytable[2] = new unsigned char[h];
00495 
00496       if ( eff == DiagonalGradient || eff == CrossDiagonalGradient)
00497       {
00498       for (x = 0; x < w; x++) {
00499               dir = _xanti ? x : w - 1 - x;
00500               rat = 1 - exp( - (float)x * xbal );
00501 
00502               xtable[0][dir] = (unsigned char) ( rDiff/2 * rat );
00503               xtable[1][dir] = (unsigned char) ( gDiff/2 * rat );
00504               xtable[2][dir] = (unsigned char) ( bDiff/2 * rat );
00505           }
00506 
00507       for (y = 0; y < h; y++) {
00508               dir = _yanti ? y : h - 1 - y;
00509               rat =  1 - exp( - (float)y  * ybal );
00510 
00511               ytable[0][dir] = (unsigned char) ( rDiff/2 * rat );
00512               ytable[1][dir] = (unsigned char) ( gDiff/2 * rat );
00513               ytable[2][dir] = (unsigned char) ( bDiff/2 * rat );
00514           }
00515 
00516       for (y = 0; y < h; y++) {
00517               unsigned int *scanline = (unsigned int *)image.scanLine(y);
00518               for (x = 0; x < w; x++) {
00519                   scanline[x] = qRgb(rcb - (xtable[0][x] + ytable[0][y]),
00520                                      gcb - (xtable[1][x] + ytable[1][y]),
00521                                      bcb - (xtable[2][x] + ytable[2][y]));
00522               }
00523           }
00524       }
00525 
00526       else if (eff == RectangleGradient ||
00527                eff == PyramidGradient ||
00528                eff == PipeCrossGradient ||
00529                eff == EllipticGradient)
00530       {
00531           int rSign = rDiff>0? 1: -1;
00532           int gSign = gDiff>0? 1: -1;
00533           int bSign = bDiff>0? 1: -1;
00534 
00535           for (x = 0; x < w; x++)
00536           {
00537               dir = _xanti ? x : w - 1 - x;
00538               rat =  1 - exp( - (float)x * xbal );
00539 
00540               xtable[0][dir] = (unsigned char) abs((int)(rDiff*(0.5-rat)));
00541               xtable[1][dir] = (unsigned char) abs((int)(gDiff*(0.5-rat)));
00542               xtable[2][dir] = (unsigned char) abs((int)(bDiff*(0.5-rat)));
00543           }
00544 
00545           for (y = 0; y < h; y++)
00546           {
00547               dir = _yanti ? y : h - 1 - y;
00548 
00549               rat =  1 - exp( - (float)y * ybal );
00550 
00551               ytable[0][dir] = (unsigned char) abs((int)(rDiff*(0.5-rat)));
00552               ytable[1][dir] = (unsigned char) abs((int)(gDiff*(0.5-rat)));
00553               ytable[2][dir] = (unsigned char) abs((int)(bDiff*(0.5-rat)));
00554           }
00555 
00556           for (y = 0; y < h; y++) {
00557               unsigned int *scanline = (unsigned int *)image.scanLine(y);
00558               for (x = 0; x < w; x++) {
00559                   if (eff == PyramidGradient)
00560                   {
00561                       scanline[x] = qRgb(rcb-rSign*(xtable[0][x]+ytable[0][y]),
00562                                          gcb-gSign*(xtable[1][x]+ytable[1][y]),
00563                                          bcb-bSign*(xtable[2][x]+ytable[2][y]));
00564                   }
00565                   else if (eff == RectangleGradient)
00566                   {
00567                       scanline[x] = qRgb(rcb - rSign *
00568                                          QMAX(xtable[0][x], ytable[0][y]) * 2,
00569                                          gcb - gSign *
00570                                          QMAX(xtable[1][x], ytable[1][y]) * 2,
00571                                          bcb - bSign *
00572                                          QMAX(xtable[2][x], ytable[2][y]) * 2);
00573                   }
00574                   else if (eff == PipeCrossGradient)
00575                   {
00576                       scanline[x] = qRgb(rcb - rSign *
00577                                          QMIN(xtable[0][x], ytable[0][y]) * 2,
00578                                          gcb - gSign *
00579                                          QMIN(xtable[1][x], ytable[1][y]) * 2,
00580                                          bcb - bSign *
00581                                          QMIN(xtable[2][x], ytable[2][y]) * 2);
00582                   }
00583                   else if (eff == EllipticGradient)
00584                   {
00585                       scanline[x] = qRgb(rcb - rSign *
00586                                          (int)sqrt((xtable[0][x]*xtable[0][x] +
00587                                                     ytable[0][y]*ytable[0][y])*2.0),
00588                                          gcb - gSign *
00589                                          (int)sqrt((xtable[1][x]*xtable[1][x] +
00590                                                     ytable[1][y]*ytable[1][y])*2.0),
00591                                          bcb - bSign *
00592                                          (int)sqrt((xtable[2][x]*xtable[2][x] +
00593                                                     ytable[2][y]*ytable[2][y])*2.0));
00594                   }
00595               }
00596           }
00597       }
00598 
00599       if (ncols && (QPixmap::defaultDepth() < 15 )) {
00600           if ( ncols < 2 || ncols > 256 )
00601               ncols = 3;
00602           QColor *dPal = new QColor[ncols];
00603           for (int i=0; i<ncols; i++) {
00604               dPal[i].setRgb ( rca + rDiff * i / ( ncols - 1 ),
00605                                gca + gDiff * i / ( ncols - 1 ),
00606                                bca + bDiff * i / ( ncols - 1 ) );
00607           }
00608           dither(image, dPal, ncols);
00609           delete [] dPal;
00610       }
00611 
00612       delete [] xtable[0];
00613       delete [] xtable[1];
00614       delete [] xtable[2];
00615       delete [] ytable[0];
00616       delete [] ytable[1];
00617       delete [] ytable[2];
00618 
00619     }
00620 
00621     return image;
00622 }
00623 
00627 namespace {
00628 
00629 struct KIE4Pack
00630 {
00631     Q_UINT16 data[4];
00632 };
00633 
00634 struct KIE8Pack
00635 {
00636     Q_UINT16 data[8];
00637 };
00638 
00639 }
00640 
00641 //======================================================================
00642 //
00643 // Intensity effects
00644 //
00645 //======================================================================
00646 
00647 
00648 /* This builds a 256 byte unsigned char lookup table with all
00649  * the possible percent values prior to applying the effect, then uses
00650  * integer math for the pixels. For any image larger than 9x9 this will be
00651  * less expensive than doing a float operation on the 3 color components of
00652  * each pixel. (mosfet)
00653  */
00654 QImage& KImageEffect::intensity(QImage &image, float percent)
00655 {
00656     if (image.width() == 0 || image.height() == 0) {
00657 #ifndef NDEBUG
00658       std::cerr << "WARNING: KImageEffect::intensity : invalid image\n";
00659 #endif
00660       return image;
00661     }
00662 
00663     int segColors = image.depth() > 8 ? 256 : image.numColors();
00664     int pixels = image.depth() > 8 ? image.width()*image.height() :
00665                  image.numColors();
00666     unsigned int *data = image.depth() > 8 ? (unsigned int *)image.bits() :
00667                          (unsigned int *)image.colorTable();
00668 
00669     bool brighten = (percent >= 0);
00670     if(percent < 0)
00671         percent = -percent;
00672 
00673 #ifdef USE_MMX_INLINE_ASM
00674     bool haveMMX = KCPUInfo::haveExtension( KCPUInfo::IntelMMX );
00675 
00676     if(haveMMX)
00677     {
00678         Q_UINT16 p = Q_UINT16(256.0f*(percent));
00679         KIE4Pack mult = {{p,p,p,0}};
00680 
00681         __asm__ __volatile__(
00682         "pxor %%mm7, %%mm7\n\t"                // zero mm7 for unpacking
00683         "movq  (%0), %%mm6\n\t"                // copy intensity change to mm6
00684         : : "r"(&mult), "m"(mult));
00685 
00686         unsigned int rem = pixels % 4;
00687         pixels -= rem;
00688         Q_UINT32 *end = ( data + pixels );
00689 
00690         if (brighten)
00691         {
00692             while ( data != end ) {
00693                 __asm__ __volatile__(
00694                 "movq       (%0), %%mm0\n\t"
00695                 "movq      8(%0), %%mm4\n\t"   // copy 4 pixels of data to mm0 and mm4
00696                 "movq      %%mm0, %%mm1\n\t"
00697                 "movq      %%mm0, %%mm3\n\t"
00698                 "movq      %%mm4, %%mm5\n\t"   // copy to registers for unpacking
00699                 "punpcklbw %%mm7, %%mm0\n\t"
00700                 "punpckhbw %%mm7, %%mm1\n\t"   // unpack the two pixels from mm0
00701                 "pmullw    %%mm6, %%mm0\n\t"
00702                 "punpcklbw %%mm7, %%mm4\n\t"
00703                 "pmullw    %%mm6, %%mm1\n\t"   // multiply by intensity*256
00704                 "psrlw        $8, %%mm0\n\t"   // divide by 256
00705                 "pmullw    %%mm6, %%mm4\n\t"
00706                 "psrlw        $8, %%mm1\n\t"
00707                 "psrlw        $8, %%mm4\n\t"
00708                 "packuswb  %%mm1, %%mm0\n\t"   // pack solution into mm0. saturates at 255
00709                 "movq      %%mm5, %%mm1\n\t"
00710 
00711                 "punpckhbw %%mm7, %%mm1\n\t"   // unpack 4th pixel in mm1
00712 
00713                 "pmullw    %%mm6, %%mm1\n\t"
00714                 "paddusb   %%mm3, %%mm0\n\t"   // add intesity result to original of mm0
00715                 "psrlw        $8, %%mm1\n\t"
00716                 "packuswb  %%mm1, %%mm4\n\t"   // pack upper two pixels into mm4
00717 
00718                 "movq      %%mm0, (%0)\n\t"    // rewrite to memory lower two pixels
00719                 "paddusb   %%mm5, %%mm4\n\t"
00720                 "movq      %%mm4, 8(%0)\n\t"   // rewrite upper two pixels
00721                 : : "r"(data) );
00722                 data += 4;
00723             }
00724 
00725             end += rem;
00726             while ( data != end ) {
00727                 __asm__ __volatile__(
00728                 "movd       (%0), %%mm0\n\t"   // repeat above but for
00729                 "punpcklbw %%mm7, %%mm0\n\t"   // one pixel at a time
00730                 "movq      %%mm0, %%mm3\n\t"
00731                 "pmullw    %%mm6, %%mm0\n\t"
00732                 "psrlw        $8, %%mm0\n\t"
00733                 "paddw     %%mm3, %%mm0\n\t"
00734                 "packuswb  %%mm0, %%mm0\n\t"
00735                 "movd      %%mm0, (%0)\n\t"
00736                 : : "r"(data) );
00737         data++;
00738             }
00739         }
00740         else
00741         {
00742             while ( data != end ) {
00743                 __asm__ __volatile__(
00744                 "movq       (%0), %%mm0\n\t"
00745                 "movq      8(%0), %%mm4\n\t"
00746                 "movq      %%mm0, %%mm1\n\t"
00747                 "movq      %%mm0, %%mm3\n\t"
00748 
00749                 "movq      %%mm4, %%mm5\n\t"
00750 
00751                 "punpcklbw %%mm7, %%mm0\n\t"
00752                 "punpckhbw %%mm7, %%mm1\n\t"
00753                 "pmullw    %%mm6, %%mm0\n\t"
00754                 "punpcklbw %%mm7, %%mm4\n\t"
00755                 "pmullw    %%mm6, %%mm1\n\t"
00756                 "psrlw        $8, %%mm0\n\t"
00757                 "pmullw    %%mm6, %%mm4\n\t"
00758                 "psrlw        $8, %%mm1\n\t"
00759                 "psrlw        $8, %%mm4\n\t"
00760                 "packuswb  %%mm1, %%mm0\n\t"
00761                 "movq      %%mm5, %%mm1\n\t"
00762 
00763                 "punpckhbw %%mm7, %%mm1\n\t"
00764 
00765                 "pmullw    %%mm6, %%mm1\n\t"
00766                 "psubusb   %%mm0, %%mm3\n\t"   // subtract darkening amount
00767                 "psrlw        $8, %%mm1\n\t"
00768                 "packuswb  %%mm1, %%mm4\n\t"
00769 
00770                 "movq      %%mm3, (%0)\n\t"
00771                 "psubusb   %%mm4, %%mm5\n\t"   // only change for this version is
00772                 "movq      %%mm5, 8(%0)\n\t"   // subtraction here as we are darkening image
00773                 : : "r"(data) );
00774                 data += 4;
00775             }
00776 
00777             end += rem;
00778             while ( data != end ) {
00779                 __asm__ __volatile__(
00780                 "movd       (%0), %%mm0\n\t"
00781                 "punpcklbw %%mm7, %%mm0\n\t"
00782                 "movq      %%mm0, %%mm3\n\t"
00783                 "pmullw    %%mm6, %%mm0\n\t"
00784                 "psrlw        $8, %%mm0\n\t"
00785                 "psubusw   %%mm0, %%mm3\n\t"
00786                 "packuswb  %%mm3, %%mm3\n\t"
00787                 "movd      %%mm3, (%0)\n\t"
00788                 : : "r"(data) );
00789                 data++;
00790             }
00791         }
00792         __asm__ __volatile__("emms");          // clear mmx state
00793     }
00794     else
00795 #endif // USE_MMX_INLINE_ASM
00796     {
00797         unsigned char *segTbl = new unsigned char[segColors];
00798         int tmp;
00799         if(brighten){ // keep overflow check out of loops
00800             for(int i=0; i < segColors; ++i){
00801                 tmp = (int)(i*percent);
00802                 if(tmp > 255)
00803                     tmp = 255;
00804                 segTbl[i] = tmp;
00805             }
00806         }
00807         else{
00808             for(int i=0; i < segColors; ++i){
00809                 tmp = (int)(i*percent);
00810                 if(tmp < 0)
00811                     tmp = 0;
00812                  segTbl[i] = tmp;
00813             }
00814         }
00815 
00816         if(brighten){ // same here
00817             for(int i=0; i < pixels; ++i){
00818                 int r = qRed(data[i]);
00819                 int g = qGreen(data[i]);
00820                 int b = qBlue(data[i]);
00821                 int a = qAlpha(data[i]);
00822                 r = r + segTbl[r] > 255 ? 255 : r + segTbl[r];
00823                 g = g + segTbl[g] > 255 ? 255 : g + segTbl[g];
00824                 b = b + segTbl[b] > 255 ? 255 : b + segTbl[b];
00825                 data[i] = qRgba(r, g, b,a);
00826             }
00827         }
00828         else{
00829             for(int i=0; i < pixels; ++i){
00830                 int r = qRed(data[i]);
00831                 int g = qGreen(data[i]);
00832                 int b = qBlue(data[i]);
00833                 int a = qAlpha(data[i]);
00834                 r = r - segTbl[r] < 0 ? 0 : r - segTbl[r];
00835                 g = g - segTbl[g] < 0 ? 0 : g - segTbl[g];
00836                 b = b - segTbl[b] < 0 ? 0 : b - segTbl[b];
00837                 data[i] = qRgba(r, g, b, a);
00838             }
00839         }
00840         delete [] segTbl;
00841     }
00842 
00843     return image;
00844 }
00845 
00846 QImage& KImageEffect::channelIntensity(QImage &image, float percent,
00847                                        RGBComponent channel)
00848 {
00849     if (image.width() == 0 || image.height() == 0) {
00850 #ifndef NDEBUG
00851       std::cerr << "WARNING: KImageEffect::channelIntensity : invalid image\n";
00852 #endif
00853       return image;
00854     }
00855 
00856     int segColors = image.depth() > 8 ? 256 : image.numColors();
00857     unsigned char *segTbl = new unsigned char[segColors];
00858     int pixels = image.depth() > 8 ? image.width()*image.height() :
00859         image.numColors();
00860     unsigned int *data = image.depth() > 8 ? (unsigned int *)image.bits() :
00861         (unsigned int *)image.colorTable();
00862     bool brighten = (percent >= 0);
00863     if(percent < 0)
00864         percent = -percent;
00865 
00866     if(brighten){ // keep overflow check out of loops
00867         for(int i=0; i < segColors; ++i){
00868             int tmp = (int)(i*percent);
00869             if(tmp > 255)
00870                 tmp = 255;
00871             segTbl[i] = tmp;
00872         }
00873     }
00874     else{
00875         for(int i=0; i < segColors; ++i){
00876             int tmp = (int)(i*percent);
00877             if(tmp < 0)
00878                 tmp = 0;
00879             segTbl[i] = tmp;
00880         }
00881     }
00882 
00883     if(brighten){ // same here
00884         if(channel == Red){ // and here ;-)
00885             for(int i=0; i < pixels; ++i){
00886                 int c = qRed(data[i]);
00887                 c = c + segTbl[c] > 255 ? 255 : c + segTbl[c];
00888                 data[i] = qRgba(c, qGreen(data[i]), qBlue(data[i]), qAlpha(data[i]));
00889             }
00890         }
00891         else if(channel == Green){
00892             for(int i=0; i < pixels; ++i){
00893                 int c = qGreen(data[i]);
00894                 c = c + segTbl[c] > 255 ? 255 : c + segTbl[c];
00895                 data[i] = qRgba(qRed(data[i]), c, qBlue(data[i]), qAlpha(data[i]));
00896             }
00897         }
00898         else{
00899             for(int i=0; i < pixels; ++i){
00900                 int c = qBlue(data[i]);
00901                 c = c + segTbl[c] > 255 ? 255 : c + segTbl[c];
00902                 data[i] = qRgba(qRed(data[i]), qGreen(data[i]), c, qAlpha(data[i]));
00903             }
00904         }
00905 
00906     }
00907     else{
00908         if(channel == Red){
00909             for(int i=0; i < pixels; ++i){
00910                 int c = qRed(data[i]);
00911                 c = c - segTbl[c] < 0 ? 0 : c - segTbl[c];
00912                 data[i] = qRgba(c, qGreen(data[i]), qBlue(data[i]), qAlpha(data[i]));
00913             }
00914         }
00915         else if(channel == Green){
00916             for(int i=0; i < pixels; ++i){
00917                 int c = qGreen(data[i]);
00918                 c = c - segTbl[c] < 0 ? 0 : c - segTbl[c];
00919                 data[i] = qRgba(qRed(data[i]), c, qBlue(data[i]), qAlpha(data[i]));
00920             }
00921         }
00922         else{
00923             for(int i=0; i < pixels; ++i){
00924                 int c = qBlue(data[i]);
00925                 c = c - segTbl[c] < 0 ? 0 : c - segTbl[c];
00926                 data[i] = qRgba(qRed(data[i]), qGreen(data[i]), c, qAlpha(data[i]));
00927             }
00928         }
00929     }
00930     delete [] segTbl;
00931 
00932     return image;
00933 }
00934 
00935 // Modulate an image with an RBG channel of another image
00936 //
00937 QImage& KImageEffect::modulate(QImage &image, QImage &modImage, bool reverse,
00938     ModulationType type, int factor, RGBComponent channel)
00939 {
00940     if (image.width() == 0 || image.height() == 0 ||
00941         modImage.width() == 0 || modImage.height() == 0) {
00942 #ifndef NDEBUG
00943       std::cerr << "WARNING: KImageEffect::modulate : invalid image\n";
00944 #endif
00945       return image;
00946     }
00947 
00948     int r, g, b, h, s, v, a;
00949     QColor clr;
00950     int mod=0;
00951     unsigned int x1, x2, y1, y2;
00952     register int x, y;
00953 
00954     // for image, we handle only depth 32
00955     if (image.depth()<32) image = image.convertDepth(32);
00956 
00957     // for modImage, we handle depth 8 and 32
00958     if (modImage.depth()<8) modImage = modImage.convertDepth(8);
00959 
00960     unsigned int *colorTable2 = (modImage.depth()==8) ?
00961                  modImage.colorTable():0;
00962     unsigned int *data1, *data2;
00963     unsigned char *data2b;
00964     unsigned int color1, color2;
00965 
00966     x1 = image.width();    y1 = image.height();
00967     x2 = modImage.width(); y2 = modImage.height();
00968 
00969     for (y = 0; y < (int)y1; y++) {
00970         data1 =  (unsigned int *) image.scanLine(y);
00971     data2 =  (unsigned int *) modImage.scanLine( y%y2 );
00972     data2b = (unsigned char *) modImage.scanLine( y%y2 );
00973 
00974     x=0;
00975     while(x < (int)x1) {
00976       color2 = (colorTable2) ? colorTable2[*data2b] : *data2;
00977       if (reverse) {
00978           color1 = color2;
00979           color2 = *data1;
00980       }
00981       else
00982           color1 = *data1;
00983 
00984       if (type == Intensity || type == Contrast) {
00985               r = qRed(color1);
00986           g = qGreen(color1);
00987           b = qBlue(color1);
00988           if (channel != All) {
00989                 mod = (channel == Red) ? qRed(color2) :
00990             (channel == Green) ? qGreen(color2) :
00991                 (channel == Blue) ? qBlue(color2) :
00992             (channel == Gray) ? qGray(color2) : 0;
00993             mod = mod*factor/50;
00994           }
00995 
00996           if (type == Intensity) {
00997             if (channel == All) {
00998               r += r * factor/50 * qRed(color2)/256;
00999               g += g * factor/50 * qGreen(color2)/256;
01000               b += b * factor/50 * qBlue(color2)/256;
01001             }
01002             else {
01003               r += r * mod/256;
01004               g += g * mod/256;
01005               b += b * mod/256;
01006             }
01007           }
01008           else { // Contrast
01009             if (channel == All) {
01010           r += (r-128) * factor/50 * qRed(color2)/128;
01011               g += (g-128) * factor/50 * qGreen(color2)/128;
01012               b += (b-128) * factor/50 * qBlue(color2)/128;
01013             }
01014             else {
01015               r += (r-128) * mod/128;
01016               g += (g-128) * mod/128;
01017               b += (b-128) * mod/128;
01018             }
01019           }
01020 
01021           if (r<0) r=0; if (r>255) r=255;
01022           if (g<0) g=0; if (g>255) g=255;
01023           if (b<0) b=0; if (b>255) b=255;
01024           a = qAlpha(*data1);
01025           *data1 = qRgba(r, g, b, a);
01026       }
01027       else if (type == Saturation || type == HueShift) {
01028           clr.setRgb(color1);
01029           clr.hsv(&h, &s, &v);
01030               mod = (channel == Red) ? qRed(color2) :
01031             (channel == Green) ? qGreen(color2) :
01032                 (channel == Blue) ? qBlue(color2) :
01033             (channel == Gray) ? qGray(color2) : 0;
01034           mod = mod*factor/50;
01035 
01036           if (type == Saturation) {
01037           s -= s * mod/256;
01038           if (s<0) s=0; if (s>255) s=255;
01039           }
01040           else { // HueShift
01041             h += mod;
01042         while(h<0) h+=360;
01043         h %= 360;
01044           }
01045 
01046           clr.setHsv(h, s, v);
01047           a = qAlpha(*data1);
01048           *data1 = clr.rgb() | ((uint)(a & 0xff) << 24);
01049       }
01050       data1++; data2++; data2b++; x++;
01051       if ( (x%x2) ==0) { data2 -= x2; data2b -= x2; }
01052         }
01053     }
01054     return image;
01055 }
01056 
01057 
01058 
01059 //======================================================================
01060 //
01061 // Blend effects
01062 //
01063 //======================================================================
01064 
01065 
01066 // Nice and fast direct pixel manipulation
01067 QImage& KImageEffect::blend(const QColor& clr, QImage& dst, float opacity)
01068 {
01069     if (dst.width() <= 0 || dst.height() <= 0)
01070         return dst;
01071 
01072     if (opacity < 0.0 || opacity > 1.0) {
01073 #ifndef NDEBUG
01074         std::cerr << "WARNING: KImageEffect::blend : invalid opacity. Range [0, 1]\n";
01075 #endif
01076         return dst;
01077     }
01078 
01079     if (dst.depth() != 32)
01080         dst = dst.convertDepth(32);
01081 
01082     int pixels = dst.width() * dst.height();
01083 
01084 #ifdef USE_SSE2_INLINE_ASM
01085     if ( KCPUInfo::haveExtension( KCPUInfo::IntelSSE2 ) && pixels > 16 ) {
01086         Q_UINT16 alpha = Q_UINT16( ( 1.0 - opacity ) * 256.0 );
01087 
01088         KIE8Pack packedalpha = { { alpha, alpha, alpha, 256,
01089                                    alpha, alpha, alpha, 256 } };
01090 
01091         Q_UINT16 red   = Q_UINT16( clr.red()   * 256 * opacity );
01092         Q_UINT16 green = Q_UINT16( clr.green() * 256 * opacity );
01093         Q_UINT16 blue  = Q_UINT16( clr.blue()  * 256 * opacity );
01094 
01095         KIE8Pack packedcolor = { { blue, green, red, 0,
01096                                    blue, green, red, 0 } };
01097 
01098         // Prepare the XMM5, XMM6 and XMM7 registers for unpacking and blending
01099         __asm__ __volatile__(
01100         "pxor        %%xmm7,  %%xmm7\n\t" // Zero out XMM7 for unpacking
01101         "movdqu        (%0),  %%xmm6\n\t" // Set up (1 - alpha) * 256 in XMM6
01102         "movdqu        (%1),  %%xmm5\n\t" // Set up color * alpha * 256 in XMM5
01103         : : "r"(&packedalpha), "r"(&packedcolor),
01104             "m"(packedcolor),  "m"(packedalpha) );
01105 
01106         Q_UINT32 *data = reinterpret_cast<Q_UINT32*>( dst.bits() );
01107 
01108         // Check how many pixels we need to process to achieve 16 byte alignment
01109         int offset = (16 - (Q_UINT32( data ) & 0x0f)) / 4;
01110 
01111         // The main loop processes 8 pixels / iteration
01112         int remainder = (pixels - offset) % 8;
01113         pixels -= remainder;
01114 
01115         // Alignment loop
01116         for ( int i = 0; i < offset; i++ ) {
01117             __asm__ __volatile__(
01118             "movd         (%0,%1,4),      %%xmm0\n\t"  // Load one pixel to XMM1
01119             "punpcklbw       %%xmm7,      %%xmm0\n\t"  // Unpack the pixel
01120             "pmullw          %%xmm6,      %%xmm0\n\t"  // Multiply the pixel with (1 - alpha) * 256
01121             "paddw           %%xmm5,      %%xmm0\n\t"  // Add color * alpha * 256 to the result
01122             "psrlw               $8,      %%xmm0\n\t"  // Divide by 256
01123             "packuswb        %%xmm1,      %%xmm0\n\t"  // Pack the pixel to a dword
01124             "movd            %%xmm0,   (%0,%1,4)\n\t"  // Write the pixel to the image
01125             : : "r"(data), "r"(i) );
01126         }
01127 
01128         // Main loop
01129         for ( int i = offset; i < pixels; i += 8 ) {
01130             __asm__ __volatile(
01131             // Load 8 pixels to XMM registers 1 - 4
01132             "movq         (%0,%1,4),      %%xmm0\n\t"  // Load pixels 1 and 2 to XMM1
01133             "movq        8(%0,%1,4),      %%xmm1\n\t"  // Load pixels 3 and 4 to XMM2
01134             "movq       16(%0,%1,4),      %%xmm2\n\t"  // Load pixels 5 and 6 to XMM3
01135             "movq       24(%0,%1,4),      %%xmm3\n\t"  // Load pixels 7 and 8 to XMM4
01136 
01137             // Prefetch the pixels for next iteration
01138             "prefetchnta 32(%0,%1,4)            \n\t"
01139 
01140             // Blend pixels 1 and 2
01141             "punpcklbw       %%xmm7,      %%xmm0\n\t"  // Unpack the pixels
01142             "pmullw          %%xmm6,      %%xmm0\n\t"  // Multiply the pixels with (1 - alpha) * 256
01143             "paddw           %%xmm5,      %%xmm0\n\t"  // Add color * alpha * 256 to the result
01144             "psrlw               $8,      %%xmm0\n\t"  // Divide by 256
01145 
01146             // Blend pixels 3 and 4
01147             "punpcklbw       %%xmm7,      %%xmm1\n\t"  // Unpack the pixels
01148             "pmullw          %%xmm6,      %%xmm1\n\t"  // Multiply the pixels with (1 - alpha) * 256
01149             "paddw           %%xmm5,      %%xmm1\n\t"  // Add color * alpha * 256 to the result
01150             "psrlw               $8,      %%xmm1\n\t"  // Divide by 256
01151 
01152             // Blend pixels 5 and 6
01153             "punpcklbw       %%xmm7,      %%xmm2\n\t"  // Unpack the pixels
01154             "pmullw          %%xmm6,      %%xmm2\n\t"  // Multiply the pixels with (1 - alpha) * 256
01155             "paddw           %%xmm5,      %%xmm2\n\t"  // Add color * alpha * 256 to the result
01156             "psrlw               $8,      %%xmm2\n\t"  // Divide by 256
01157 
01158             // Blend pixels 7 and 8
01159             "punpcklbw       %%xmm7,      %%xmm3\n\t"  // Unpack the pixels
01160             "pmullw          %%xmm6,      %%xmm3\n\t"  // Multiply the pixels with (1 - alpha) * 256
01161             "paddw           %%xmm5,      %%xmm3\n\t"  // Add color * alpha * 256 to the result
01162             "psrlw               $8,      %%xmm3\n\t"  // Divide by 256
01163 
01164             // Pack the pixels into 2 double quadwords
01165             "packuswb        %%xmm1,      %%xmm0\n\t"  // Pack pixels 1 - 4 to a double qword
01166             "packuswb        %%xmm3,      %%xmm2\n\t"  // Pack pixles 5 - 8 to a double qword
01167 
01168             // Write the pixels back to the image
01169             "movdqa          %%xmm0,   (%0,%1,4)\n\t"  // Store pixels 1 - 4
01170             "movdqa          %%xmm2, 16(%0,%1,4)\n\t"  // Store pixels 5 - 8
01171             : : "r"(data), "r"(i) );
01172         }
01173 
01174         // Cleanup loop
01175         for ( int i = pixels; i < pixels + remainder; i++ ) {
01176             __asm__ __volatile__(
01177             "movd         (%0,%1,4),      %%xmm0\n\t"  // Load one pixel to XMM1
01178             "punpcklbw       %%xmm7,      %%xmm0\n\t"  // Unpack the pixel
01179             "pmullw          %%xmm6,      %%xmm0\n\t"  // Multiply the pixel with (1 - alpha) * 256
01180             "paddw           %%xmm5,      %%xmm0\n\t"  // Add color * alpha * 256 to the result
01181             "psrlw               $8,      %%xmm0\n\t"  // Divide by 256
01182             "packuswb        %%xmm1,      %%xmm0\n\t"  // Pack the pixel to a dword
01183             "movd            %%xmm0,   (%0,%1,4)\n\t"  // Write the pixel to the image
01184             : : "r"(data), "r"(i) );
01185         }
01186     } else
01187 #endif
01188 
01189 #ifdef USE_MMX_INLINE_ASM
01190     if ( KCPUInfo::haveExtension( KCPUInfo::IntelMMX ) && pixels > 1 ) {
01191         Q_UINT16 alpha = Q_UINT16( ( 1.0 - opacity ) * 256.0 );
01192         KIE4Pack packedalpha = { { alpha, alpha, alpha, 256 } };
01193 
01194         Q_UINT16 red   = Q_UINT16( clr.red()   * 256 * opacity );
01195         Q_UINT16 green = Q_UINT16( clr.green() * 256 * opacity );
01196         Q_UINT16 blue  = Q_UINT16( clr.blue()  * 256 * opacity );
01197 
01198         KIE4Pack packedcolor = { { blue, green, red, 0 } };
01199 
01200         __asm__ __volatile__(
01201         "pxor        %%mm7,    %%mm7\n\t"       // Zero out MM7 for unpacking
01202         "movq         (%0),    %%mm6\n\t"       // Set up (1 - alpha) * 256 in MM6
01203         "movq         (%1),    %%mm5\n\t"       // Set up color * alpha * 256 in MM5
01204         : : "r"(&packedalpha), "r"(&packedcolor),