Ticket #8706: DXAOptimisations2

File DXAOptimisations2, 8.0 KB (added by SF/robinwatts, 17 years ago)

Second version of the DXA Optimisations

Line 
1Index: graphics/dxa_player.cpp
2===================================================================
3--- graphics/dxa_player.cpp (revision 27747)
4+++ graphics/dxa_player.cpp (working copy)
5@@ -42,6 +42,12 @@
6 _scaledBuffer = 0;
7 _drawBuffer = 0;
8
9+ _inBuffer = 0;
10+ _inBufferSize = 0;
11+
12+ _decompBuffer = 0;
13+ _decompBufferSize = 0;
14+
15 _width = 0;
16 _height = 0;
17
18@@ -129,6 +135,7 @@
19 debug(2, "flags 0x0%x framesCount %d width %d height %d rate %d ticks %d", flags, _framesCount, _width, _height, _framesPerSec, _frameTicks);
20
21 _frameSize = _width * _height;
22+ _decompBufferSize = _frameSize;
23 _frameBuffer1 = (uint8 *)malloc(_frameSize);
24 _frameBuffer2 = (uint8 *)malloc(_frameSize);
25 if (!_frameBuffer1 || !_frameBuffer2)
26@@ -157,6 +164,8 @@
27 free(_frameBuffer1);
28 free(_frameBuffer2);
29 free(_scaledBuffer);
30+ free(_inBuffer);
31+ free(_decompBuffer);
32 }
33
34 void DXAPlayer::copyFrameToBuffer(byte *dst, uint x, uint y, uint pitch) {
35@@ -175,44 +184,43 @@
36
37 void DXAPlayer::decodeZlib(byte *data, int size, int totalSize) {
38 #ifdef USE_ZLIB
39- byte *temp = (byte *)malloc(size);
40- if (temp) {
41- memcpy(temp, data, size);
42-
43- z_stream d_stream;
44- d_stream.zalloc = (alloc_func)0;
45- d_stream.zfree = (free_func)0;
46- d_stream.opaque = (voidpf)0;
47- d_stream.next_in = temp;
48- d_stream.avail_in = size;
49- d_stream.total_in = size;
50- d_stream.next_out = data;
51- d_stream.avail_out = totalSize;
52- inflateInit(&d_stream);
53- inflate(&d_stream, Z_FINISH);
54- inflateEnd(&d_stream);
55- free(temp);
56- }
57+ z_stream d_stream;
58+ d_stream.zalloc = (alloc_func)0;
59+ d_stream.zfree = (free_func)0;
60+ d_stream.opaque = (voidpf)0;
61+ d_stream.next_in = _inBuffer;
62+ d_stream.avail_in = size;
63+ d_stream.total_in = size;
64+ d_stream.next_out = data;
65+ d_stream.avail_out = totalSize;
66+ inflateInit(&d_stream);
67+ inflate(&d_stream, Z_FINISH);
68+ inflateEnd(&d_stream);
69 #endif
70 }
71
72 #define BLOCKW 4
73 #define BLOCKH 4
74
75-void DXAPlayer::decode12(byte *data, int size, int totalSize) {
76+void DXAPlayer::decode12(int size) {
77 #ifdef USE_ZLIB
78+ if (_decompBuffer == NULL) {
79+ free(_decompBuffer);
80+ _decompBuffer = (byte *)malloc(_decompBufferSize);
81+ if (_decompBuffer == NULL)
82+ error("Error allocating decomp buffer (size %d)", _decompBufferSize);
83+ }
84 /* decompress the input data */
85- decodeZlib(data, size, totalSize);
86+ decodeZlib(_decompBuffer, size, _decompBufferSize);
87
88- byte *dat = data;
89- byte *frame2 = (byte *)malloc(totalSize);
90+ byte *dat = _decompBuffer;
91
92- memcpy(frame2, _frameBuffer1, totalSize);
93+ memcpy(_frameBuffer2, _frameBuffer1, _frameSize);
94
95 for (int by = 0; by < _height; by += BLOCKH) {
96 for (int bx = 0; bx < _width; bx += BLOCKW) {
97 byte type = *dat++;
98- byte *b2 = frame2 + bx + by * _width;
99+ byte *b2 = _frameBuffer1 + bx + by * _width;
100
101 switch (type) {
102 case 0:
103@@ -276,7 +284,7 @@
104 int my = mbyte & 0x07;
105 if (mbyte & 0x08)
106 my = -my;
107- byte *b1 = _frameBuffer1 + (bx+mx) + (by+my) * _width;
108+ byte *b1 = _frameBuffer2 + (bx+mx) + (by+my) * _width;
109 for (int yc = 0; yc < BLOCKH; yc++) {
110 memcpy(b2, b1, BLOCKW);
111 b1 += _width;
112@@ -291,30 +299,33 @@
113 }
114 }
115 }
116-
117- memcpy(data, frame2, totalSize);
118- free(frame2);
119 #endif
120 }
121
122-void DXAPlayer::decode13(byte *data, int size, int totalSize) {
123+void DXAPlayer::decode13(int size) {
124 #ifdef USE_ZLIB
125 uint8 *codeBuf, *dataBuf, *motBuf, *maskBuf;
126
127+ if (_decompBuffer == NULL) {
128+ free(_decompBuffer);
129+ _decompBuffer = (byte *)malloc(_decompBufferSize);
130+ if (_decompBuffer == NULL)
131+ error("Error allocating decomp buffer (size %d)", _decompBufferSize);
132+ }
133+
134 /* decompress the input data */
135- decodeZlib(data, size, totalSize);
136+ decodeZlib(_decompBuffer, size, _decompBufferSize);
137
138- uint8 *frame2 = (uint8*)malloc(totalSize);
139- memcpy(frame2, _frameBuffer1, totalSize);
140+ memcpy(_frameBuffer2, _frameBuffer1, _frameSize);
141
142 int codeSize = _width * _curHeight / 16;
143 int dataSize, motSize, maskSize;
144
145- dataSize = READ_BE_UINT32(&data[0]);
146- motSize = READ_BE_UINT32(&data[4]);
147- maskSize = READ_BE_UINT32(&data[8]);
148+ dataSize = READ_BE_UINT32(&_decompBuffer[0]);
149+ motSize = READ_BE_UINT32(&_decompBuffer[4]);
150+ maskSize = READ_BE_UINT32(&_decompBuffer[8]);
151
152- codeBuf = &data[12];
153+ codeBuf = &_decompBuffer[12];
154 dataBuf = &codeBuf[codeSize];
155 motBuf = &dataBuf[dataSize];
156 maskBuf = &motBuf[motSize];
157@@ -322,7 +333,7 @@
158 for (int by = 0; by < _curHeight; by += BLOCKH) {
159 for (int bx = 0; bx < _width; bx += BLOCKW) {
160 uint8 type = *codeBuf++;
161- uint8 *b2 = (uint8*)frame2 + bx + by * _width;
162+ uint8 *b2 = (uint8*)_frameBuffer1 + bx + by * _width;
163
164 switch (type) {
165 case 0:
166@@ -373,7 +384,7 @@
167 if (mbyte & 0x08)
168 my = -my;
169
170- uint8 *b1 = (uint8*)_frameBuffer1 + (bx+mx) + (by+my) * _width;
171+ uint8 *b1 = (uint8*)_frameBuffer2 + (bx+mx) + (by+my) * _width;
172 for (int yc = 0; yc < BLOCKH; yc++) {
173 memcpy(b2, b1, BLOCKW);
174 b1 += _width;
175@@ -389,7 +400,7 @@
176
177 for (int subBlock = 0; subBlock < 4; subBlock++) {
178 int sx = bx + subX[subBlock], sy = by + subY[subBlock];
179- b2 = (uint8*)frame2 + sx + sy * _width;
180+ b2 = (uint8*)_frameBuffer1 + sx + sy * _width;
181 switch (subMask & 0xC0) {
182 // 00: skip
183 case 0x00:
184@@ -417,7 +428,7 @@
185 if (mbyte & 0x08)
186 my = -my;
187
188- uint8 *b1 = (uint8*)_frameBuffer1 + (sx+mx) + (sy+my) * _width;
189+ uint8 *b1 = (uint8*)_frameBuffer2 + (sx+mx) + (sy+my) * _width;
190 for (int yc = 0; yc < BLOCKH / 2; yc++) {
191 memcpy(b2, b1, BLOCKW / 2);
192 b1 += _width;
193@@ -476,9 +487,6 @@
194 }
195 }
196 }
197-
198- memcpy(data, frame2, totalSize);
199- free(frame2);
200 #endif
201 }
202
203@@ -486,6 +494,23 @@
204 uint32 tag;
205
206 tag = _fd->readUint32BE();
207+ if (tag == MKID_BE('MAXD')) {
208+ // This tag specifies the maximum decompression buffer size
209+ // we will require (this should only occur once in a file,
210+ // at the start, and decreases our previous estimate, which
211+ // is _frameSize).
212+ uint32 size = _fd->readUint32BE();
213+
214+ // This should never happen, but cope in case
215+ if ((_decompBuffer != NULL) && (size > _decompBufferSize)) {
216+ free(_decompBuffer);
217+ _decompBuffer = NULL;
218+ }
219+ _decompBufferSize = size;
220+
221+ // Read the next tag
222+ tag = _fd->readUint32BE();
223+ }
224 if (tag == MKID_BE('CMAP')) {
225 byte rgb[768];
226
227@@ -498,25 +523,34 @@
228 byte type = _fd->readByte();
229 uint32 size = _fd->readUint32BE();
230
231- _fd->read(_frameBuffer2, size);
232+ if ((_inBuffer == NULL) || (_inBufferSize < size)) {
233+ free(_inBuffer);
234+ _inBuffer = (byte *)malloc(size);
235+ if (_inBuffer == NULL)
236+ error("Error allocating input buffer (size %d)", size);
237+ _inBufferSize = size;
238+ }
239
240+ _fd->read(_inBuffer, size);
241+
242 switch (type) {
243 case 2:
244+ decodeZlib(_frameBuffer1, size, _frameSize);
245+ break;
246 case 3:
247 decodeZlib(_frameBuffer2, size, _frameSize);
248 break;
249 case 12:
250- decode12(_frameBuffer2, size, _frameSize);
251+ decode12(size);
252 break;
253 case 13:
254- decode13(_frameBuffer2, size, _frameSize);
255+ decode13(size);
256 break;
257 default:
258 error("decodeFrame: Unknown compression type %d", type);
259 }
260- if (type == 2 || type == 4 || type == 12 || type == 13) {
261- memcpy(_frameBuffer1, _frameBuffer2, _frameSize);
262- } else {
263+
264+ if (type != 2 && type != 12 && type != 13) {
265 for (int j = 0; j < _curHeight; ++j) {
266 for (int i = 0; i < _width; ++i) {
267 const int offs = j * _width + i;
268Index: graphics/dxa_player.h
269===================================================================
270--- graphics/dxa_player.h (revision 27747)
271+++ graphics/dxa_player.h (working copy)
272@@ -47,6 +47,10 @@
273 byte *_frameBuffer2;
274 byte *_scaledBuffer;
275 byte *_drawBuffer;
276+ byte *_inBuffer;
277+ uint32 _inBufferSize;
278+ byte *_decompBuffer;
279+ uint32 _decompBufferSize;
280 uint16 _width;
281 uint16 _height, _curHeight;
282 uint16 _framesCount;
283@@ -121,10 +125,10 @@
284 void decodeNextFrame();
285
286 void decodeZlib(byte *data, int size, int totalSize);
287- void decode12(byte *data, int size, int totalSize);
288- void decode13(byte *data, int size, int totalSize);
289+ void decode12(int size);
290+ void decode13(int size);
291 };
292-
293+
294 } // End of namespace Graphics
295
296 #endif