Ticket #8951: wince-scaler.patch

File wince-scaler.patch, 8.1 KB (added by fingolfin, 15 years ago)
  • backends/platform/wince/CEScaler.cpp

     
    2525#include "graphics/scaler/intern.h"
    2626#include "CEScaler.h"
    2727
    28 int redblueMasks[] = { 0x7C1F, 0xF81F };
    29 int greenMasks[] = { 0x03E0, 0x07E0 };
    30 
    31 static int maskUsed;
    32 
    33 void initCEScaler(void) {
    34         if (gBitFormat == 555)
    35                 maskUsed = 0;
    36         else
    37                 maskUsed = 1;
    38 }
    39 
    40 // FIXME: Fingolfin says: The following interpolation code is a lot slower than it needs
    41 // to be. The reason: Using the value of a global variable to index two global arrays is
    42 // extremly difficult if not impossible for the compiler to optimize. At the very least,
    43 // the two arrays should be 'static const', but even then, memory access is required.
    44 // To avoid this, one could use the techniques used by our other scalers. See also the
    45 // interpolate functions in graphics/scaler/intern.h.
    46 // Even if those can't be used directly for some reasons (e.g. the compiler has problems
    47 // with templates), then still the *techniques* could and should be used. I would exepct
    48 // that this way, even the C version of PocketPCPortrait() should get a big speed boost.
    49 
    50 static inline uint16 CEinterpolate16_4(uint16 p1, uint16 p2, uint16 p3, uint16 p4)
    51 {
    52         return ((((p1 & redblueMasks[maskUsed]) + (p2 & redblueMasks[maskUsed]) + (p3 & redblueMasks[maskUsed]) + (p4 & redblueMasks[maskUsed])) / 4) & redblueMasks[maskUsed]) |
    53                ((((p1 & greenMasks[maskUsed]) + (p2 & greenMasks[maskUsed]) + (p3 & greenMasks[maskUsed]) + (p4 & greenMasks[maskUsed])) / 4) & greenMasks[maskUsed]);
    54 }
    55 
    56 static inline uint16 CEinterpolate16_2(uint16 p1, int w1, uint16 p2, int w2) {
    57         return ((((p1 & redblueMasks[maskUsed]) * w1 + (p2 & redblueMasks[maskUsed]) * w2) / (w1 + w2)) & redblueMasks[maskUsed]) |
    58                ((((p1 & greenMasks[maskUsed]) * w1 + (p2 & greenMasks[maskUsed]) * w2) / (w1 + w2)) & greenMasks[maskUsed]);
    59 }
    60 
    61 void PocketPCPortrait(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
     28template<int bitFormat>
     29void PocketPCPortraitTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
    6230        uint8 *work;
    6331        int i;
    6432
     
    7341                        uint16 color3 = *(((const uint16 *)srcPtr) + (i + 2));
    7442                        uint16 color4 = *(((const uint16 *)srcPtr) + (i + 3));
    7543
    76                         *(((uint16 *)work) + 0) = CEinterpolate16_2(color1, 3, color2, 1);
    77                         *(((uint16 *)work) + 1) = CEinterpolate16_2(color2, 1, color3, 1);
    78                         *(((uint16 *)work) + 2) = CEinterpolate16_2(color3, 1, color4, 3);
     44                        *(((uint16 *)work) + 0) = interpolate32_3_1<bitFormat>(color1, color2);
     45                        *(((uint16 *)work) + 1) = interpolate32_1_1<bitFormat>(color2, color3);
     46                        *(((uint16 *)work) + 2) = interpolate32_3_1<bitFormat>(color4, color3);
    7947
    8048                        work += 3 * sizeof(uint16);
    8149                }
     
    8351                dstPtr += dstPitch;
    8452        }
    8553}
     54MAKE_WRAPPER(PocketPCPortrait)
    8655
    87 // FIXME: Fingolfin says: Please document this function. What does it compute? How
    88 // does it differ from the code in aspect.cpp ? It would be nice to speed up this function
    89 // here using the ideas and tracks from aspect.cpp and the comment above, as right now, it
    90 // is rather hard for the compiler to optimize this code properly.
     56// FIXME: Fingolfin says: Please document this function. What does it compute?
     57// How does it differ from the code in aspect.cpp ? Is it faster than the code
     58// in aspect.cpp, or vice versa? Maybe this could just be replaced by aspect.cpp?
    9159void PocketPCLandscapeAspect(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
    9260
     61        const int redblueMasks[] = { 0x7C1F, 0xF81F };
     62        const int greenMasks[] = { 0x03E0, 0x07E0 };
     63        const int maskUsed = (gBitFormat == 565);
     64
    9365#define RB(x) ((x & redblueMasks[maskUsed])<<8)
    9466#define G(x)  ((x & greenMasks[maskUsed])<<3)
    9567
     
    150122}
    151123#endif
    152124
    153 void PocketPCHalf(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
    154 #ifdef ARM
    155         PocketPCHalfARM(srcPtr, srcPitch, dstPtr, dstPitch, width, height, redbluegreenMasks[maskUsed],roundingconstants[maskUsed]);
    156 #else
     125template<int bitFormat>
     126void PocketPCHalfTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
    157127        uint8 *work;
    158128        int i;
    159129        uint16 srcPitch16 = (uint16)(srcPitch / sizeof(uint16));
     
    168138                        uint16 color2 = *(((const uint16 *)srcPtr) + (i + 1));
    169139                        uint16 color3 = *(((const uint16 *)srcPtr) + (i + srcPitch16));
    170140                        uint16 color4 = *(((const uint16 *)srcPtr) + (i + srcPitch16 + 1));
    171                         *(((uint16 *)work) + 0) = CEinterpolate16_4(color1, color2, color3, color4);
     141                        *(((uint16 *)work) + 0) = interpolate32_1_1_1_1<bitFormat>(color1, color2, color3, color4);
    172142
    173143                        work += sizeof(uint16);
    174144                }
    175145                srcPtr += 2 * srcPitch;
    176146                dstPtr += dstPitch;
    177147        }
     148}
     149
     150void PocketPCHalf(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
     151#ifdef ARM
     152        int maskUsed = (gBitFormat == 565);
     153        PocketPCHalfARM(srcPtr, srcPitch, dstPtr, dstPitch, width, height, redbluegreenMasks[maskUsed],roundingconstants[maskUsed]);
     154#else
     155        if (gBitFormat == 565)
     156                PocketPCHalfTemplate<565>(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
     157        else
     158                PocketPCHalfTemplate<565>(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
    178159#endif
    179160}
    180161
    181 
    182 void PocketPCHalfZoom(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
     162template<int bitFormat>
     163void PocketPCHalfZoomTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
    183164        uint8 *work;
    184165        int i;
    185166        uint16 srcPitch16 = (uint16)(srcPitch / sizeof(uint16));
     
    191172                i = 0;
    192173                work = dstPtr;
    193174
    194                 for (int i=0; i<width; i+=2) {
     175                for (int i = 0; i < width; i += 2) {
    195176                        uint16 color1 = *(((const uint16 *)srcPtr) + i);
    196177                        uint16 color2 = *(((const uint16 *)srcPtr) + (i + 1));
    197                         *(((uint16 *)work) + 0) = CEinterpolate16_2(color1, 1, color2, 1);
     178                        *(((uint16 *)work) + 0) = interpolate32_1_1<bitFormat>(color1, color2);
    198179
    199180                        work += sizeof(uint16);
    200181                }
     
    202183                dstPtr += dstPitch;
    203184        }
    204185}
     186MAKE_WRAPPER(PocketPCHalfZoom)
    205187
    206 void SmartphoneLandscape(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
     188template<int bitFormat>
     189void SmartphoneLandscapeTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
    207190        uint8 *work;
    208191        int i;
    209192        int line = 0;
     
    212195                i = 0;
    213196                work = dstPtr;
    214197
    215                 for (int i=0; i<width; i+=3) {
     198                for (int i = 0; i < width; i += 3) {
    216199                        // Filter 2/3
    217200                        uint16 color1 = *(((const uint16 *)srcPtr) + i);
    218201                        uint16 color2 = *(((const uint16 *)srcPtr) + (i + 1));
    219202                        uint16 color3 = *(((const uint16 *)srcPtr) + (i + 2));
    220203
    221                         *(((uint16 *)work) + 0) = CEinterpolate16_2(color1, 3, color2, 1);
    222                         *(((uint16 *)work) + 1) = CEinterpolate16_2(color2, 1, color3, 1);
     204                        *(((uint16 *)work) + 0) = interpolate32_3_1<bitFormat>(color1, color2);
     205                        *(((uint16 *)work) + 1) = interpolate32_3_1<bitFormat>(color3, color2);
    223206
    224207                        work += 2 * sizeof(uint16);
    225208                }
     
    233216                }
    234217        }
    235218}
     219MAKE_WRAPPER(SmartphoneLandscape)
  • backends/platform/wince/wince-sdl.cpp

     
    13981398                InitScalers(555);
    13991399        else
    14001400                InitScalers(565);
    1401         initCEScaler();
    14021401
    14031402        // Need some extra bytes around when using 2xSaI
    14041403        _tmpscreen = SDL_CreateRGBSurface(SDL_SWSURFACE, _videoMode.screenWidth + 3, _videoMode.screenHeight + 3, 16, _hwscreen->format->Rmask, _hwscreen->format->Gmask, _hwscreen->format->Bmask, _hwscreen->format->Amask);
  • backends/platform/wince/CEScaler.h

     
    3939DECLARE_SCALER(SmartphoneLandscape);
    4040//#endif
    4141
    42 void initCEScaler(void);
    43 
    4442#endif