Ticket #8701: ARM_Smush_Diffs

File ARM_Smush_Diffs, 11.1 KB (added by SF/robinwatts, 17 years ago)

ARM asm version of Smush codec.

Line 
1Index: engines/scumm/smush/codec47ARM.s
2===================================================================
3--- engines/scumm/smush/codec47ARM.s (revision 0)
4+++ engines/scumm/smush/codec47ARM.s (revision 0)
5@@ -0,0 +1,372 @@
6+@ ScummVM - Graphic Adventure Engine
7+@
8+@ ScummVM is the legal property of its developers, whose names
9+@ are too numerous to list here. Please refer to the COPYRIGHT
10+@ file distributed with this source distribution.
11+@
12+@ This program is free software@ you can redistribute it and/or
13+@ modify it under the terms of the GNU General Public License
14+@ as published by the Free Software Foundation@ either version 2
15+@ of the License, or (at your option) any later version.
16+@
17+@ This program is distributed in the hope that it will be useful,
18+@ but WITHOUT ANY WARRANTY@ without even the implied warranty of
19+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20+@ GNU General Public License for more details.
21+@
22+@ You should have received a copy of the GNU General Public License
23+@ along with this program@ if not, write to the Free Software
24+@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25+@
26+@ $URL:$
27+@ $Id:$
28+@
29+@ @author Robin Watts (robin@wss.co.uk)
30+@
31+@ This file, provides an ARM optimised version of sections of codec47.cpp.
32+@ The algorithm is essentially the same as that within codec47.cpp
33+@ so to understand this file you should understand codec47.cpp first.
34+
35+ .text
36+
37+ .global ARM_Smush_decode2
38+
39+ARM_Smush_decode2:
40+ @ r0 = dst
41+ @ r1 = src
42+ @ r2 = width
43+ @ r3 = height
44+ @ r4 = param
45+ @ <> = _table
46+ @ <> = _tableBig
47+ @ <> = _offset1
48+ @ <> = _offset2
49+ @ <> = _tableSmall
50+ STMFD r13!,{r2,r4-r11,R14}
51+
52+ LDR r4,[r13,#(9+1)*4] @ r4 = param
53+ @ stall
54+ @ stall
55+ SUB r4,r4,#0xF8
56+
57+ @ r0 = dst
58+ @ r1 = _d_src
59+ @ r2 = _d_pitch
60+ @ r3 = height
61+ @ r4 = param
62+ ADD r7,r2,#7 @ r14 = bw
63+ MOV r7,r7,LSR #3
64+y_loop:
65+x_loop:
66+ @ LEVEL 1
67+ LDRB r6,[r1],#1 @ r6 = *_d_src++
68+ @ stall
69+ @ stall
70+ CMP r6,#0xF8
71+ BLT level1codeSMALL
72+ CMP r6,#0xFC
73+ BLT level1codeMID
74+ BEQ level1codeFC
75+ CMP r6,#0xFE
76+ BGT level1codeFF
77+ BEQ level1codeFE
78+level1codeFD:
79+ LDRB r6,[r1],#1 @ r6 = tmp = *_d_src++
80+ LDR r8,[r13,#(9+1+2)*4] @ r8 = _tableBig
81+ @ stall
82+ ADD r12,r6,r6,LSL #1 @ r12= tmp*3
83+ ADD r6,r6,r12,LSL #5 @ r6 = tmp*97
84+ ADD r8,r8,r6,LSL #2 @ r8 = _tableBig + tmp*388
85+ LDRB r9,[r8,#384] @ r9 = l = tmp_ptr[384]
86+ LDRB r6,[r1],#1 @ r6 = val = *_d_src++
87+ ADD r12,r8,#384 @ r12= &tmp_ptr[384]
88+ @ I don't really believe the next 2 lines are necessary, but...
89+ CMP r9,#0
90+ BEQ level1codeFD_over1
91+level1codeFD_loop1:
92+ LDRB r10,[r8],#1
93+ LDRB r11,[r8],#1
94+ SUBS r9,r9,#1
95+ ADD r10,r10,r0
96+ STRB r6,[r10,r11,LSL #8] @ *(_d_dst + (*tmp_ptr2++)) = val
97+ BGT level1codeFD_loop1
98+level1codeFD_over1:
99+ LDRB r9,[r12,#1] @ r9 = l = tmp_ptr[385]
100+ LDRB r6,[r1],#1 @ r6 = val = *_d_src++
101+ SUB r12,r12,#384-128 @ r12= &tmp_ptr[128]
102+ @ I don't really believe the next 2 lines are necessary, but...
103+ CMP r9,#0
104+ BEQ level1codeFD_over2
105+level1codeFD_loop2:
106+ LDRB r10,[r12],#1
107+ LDRB r11,[r12],#1
108+ SUBS r9,r9,#1
109+ ADD r10,r10,r0
110+ STRB r6,[r10,r11,LSL #8] @ *(_d_dst + (*tmp_ptr2++)) = val
111+ BGT level1codeFD_loop2
112+level1codeFD_over2:
113+level1_end:
114+
115+ ADD r0,r0,#8
116+ SUBS r7,r7,#1
117+ BGT x_loop
118+
119+ ADD r7,r2,#7
120+ MOV r7,r7,LSR #3
121+ ADD r0,r0,r2,LSL #3
122+ SUB r0,r0,r7,LSL #3 @ r0 = dst += next_line
123+ SUBS r3,r3,#8 @ if (--bh > 0)
124+ BGT y_loop @ loop back
125+
126+ LDMFD r13!,{r2,r4-r11,PC}
127+
128+level1codeSMALL:
129+ LDR r8,[r13,#(9+1+1)*4] @ r8 = _table
130+ LDR r9,[r13,#(9+1+3)*4] @ r9 = _offset1
131+ MOV r6,r6,LSL #1 @ r6 = code<<1
132+ LDRSH r8,[r8,r6] @ tmp2 = _table[code]
133+level1codeFC:
134+ @ EQ => FC
135+ LDREQ r9,[r13,#(9+1+4)*4] @ r9 = _offset2
136+ MOVEQ r8,#0
137+ SUB r11,r2,#7 @ r11 = _d_pitch-7
138+ ADD r9,r9,r0 @ tmp2 = _d_dst+_offset
139+ ADD r8,r8,r9 @ tmp2 = _d_dst+_table[code]+_offset
140+ @ r8 = &_dst[tmp2]
141+ MOV r12,#8
142+level1codeSMALL_loop:
143+ LDRB r5, [r8],#1 @ r5 = d_dst[tmp2]
144+ LDRB r6, [r8],#1 @ r10 = d_dst[tmp2]
145+ LDRB r9, [r8],#1 @ r10 = d_dst[tmp2]
146+ LDRB r10,[r8],#1 @ r10 = d_dst[tmp2]
147+ STRB r5, [r0],#1 @ d_dst[0] = r5
148+ STRB r6, [r0],#1 @ d_dst[1] = r6
149+ STRB r9, [r0],#1 @ d_dst[2] = r9
150+ STRB r10,[r0],#1 @ d_dst[3] = r10
151+ LDRB r5, [r8],#1 @ r5 = d_dst[tmp2]
152+ LDRB r6, [r8],#1 @ r10 = d_dst[tmp2]
153+ LDRB r9, [r8],#1 @ r10 = d_dst[tmp2]
154+ LDRB r10,[r8],r11 @ r10 = d_dst[tmp2]
155+ STRB r5, [r0],#1 @ d_dst[4] = r5
156+ STRB r6, [r0],#1 @ d_dst[5] = r6
157+ STRB r9, [r0],#1 @ d_dst[6] = r9
158+ STRB r10,[r0],r11 @ d_dst[7] = r10 d_dst += d_pitch
159+ SUBS r12,r12,#1
160+ BGT level1codeSMALL_loop
161+ SUB r0,r0,r2,LSL #3 @ revert d_dst
162+ B level1_end
163+
164+level1codeMID:
165+ @ LT => F8<=code<FC case
166+ @ EQ => FE case
167+ LDRB r6,[r4,r6] @ r6 = t = _paramPtr[code]
168+level1codeFE:
169+ LDREQB r6,[r1],#1 @ r6 = t = *_d_src++
170+ MOV r12,#8
171+ SUB r11,r2,#7 @ r11 = _d_pitch-7
172+level1codeMID_loop:
173+ STRB r6,[r0],#1
174+ STRB r6,[r0],#1
175+ STRB r6,[r0],#1
176+ STRB r6,[r0],#1
177+ STRB r6,[r0],#1
178+ STRB r6,[r0],#1
179+ STRB r6,[r0],#1
180+ STRB r6,[r0],r11
181+ SUBS r12,r12,#1
182+ BGT level1codeMID_loop
183+ SUB r0,r0,r2,LSL #3 @ revert d_dst
184+ B level1_end
185+
186+level1codeFF:
187+ BL level2
188+ ADD r0,r0,#4
189+ BL level2
190+ ADD r0,r0,r2,LSL #2
191+ SUB r0,r0,#4
192+ BL level2
193+ ADD r0,r0,#4
194+ BL level2
195+ SUB r0,r0,#4
196+ SUB r0,r0,r2,LSL #2
197+ B level1_end
198+
199+level2:
200+ @ r0 = _d_dst
201+ @ r1 = _d_src
202+ @ r2 = _d_pitch
203+ @ r3 = PRESERVE
204+ @ r4 = param
205+ @ r7 = PRESERVE
206+ @ r14= return address
207+ LDRB r6,[r1],#1 @ r6 = *_d_src++
208+ @ stall
209+ @ stall
210+ CMP r6,#0xF8
211+ BLT level2codeSMALL
212+ CMP r6,#0xFC
213+ BLT level2codeMID
214+ BEQ level2codeFC
215+ CMP r6,#0xFE
216+ BGT level2codeFF
217+ BEQ level2codeFE
218+level2codeFD:
219+ LDRB r6,[r1],#1 @ r6 = tmp = *_d_src++
220+ LDR r8,[r13,#(9+1+5)*4] @ r8 = _tableSmall
221+ @ stall
222+ @ stall
223+ ADD r8,r8,r6,LSL #7 @ r8 = _tableSmall + tmp*128
224+ LDRB r9,[r8,#96] @ r9 = l = tmp_ptr[96]
225+ LDRB r6,[r1],#1 @ r6 = val = *_d_src++
226+ ADD r12,r8,#32 @ r12 = tmp_ptr + 32
227+ @ I don't really believe the next 2 lines are necessary, but...
228+ CMP r9,#0
229+ BEQ level2codeFD_over1
230+level2codeFD_loop1:
231+ LDRB r10,[r8],#1
232+ LDRB r11,[r8],#1
233+ SUBS r9,r9,#1
234+ ADD r10,r10,r0
235+ STRB r6,[r10,r11,LSL #8] @ *(_d_dst + (*tmp_ptr2++)) = val
236+ BGT level2codeFD_loop1
237+level2codeFD_over1:
238+ LDRB r9,[r12,#97-32] @ r9 = l = tmp_ptr[97]
239+ LDRB r6,[r1],#1 @ r6 = val = *_d_src++
240+ @ I don't really believe the next 2 lines are necessary, but...
241+ CMP r9,#0
242+ MOVEQ PC,R14
243+level2codeFD_loop2:
244+ LDRB r10,[r12],#1
245+ LDRB r11,[r12],#1
246+ SUBS r9,r9,#1
247+ ADD r10,r10,r0
248+ STRB r6,[r10,r11,LSL #8] @ *(_d_dst + (*tmp_ptr2++)) = val
249+ BGT level2codeFD_loop2
250+
251+ MOV PC,R14
252+
253+level2codeSMALL:
254+ LDR r8,[r13,#(9+1+1)*4] @ r8 = _table
255+ LDR r9,[r13,#(9+1+3)*4] @ r9 = _offset1
256+ MOV r6,r6,LSL #1 @ r6 = code<<1
257+ LDRSH r8,[r8,r6] @ tmp2 = _table[code]
258+level2codeFC:
259+ @ EQ => FC
260+ LDREQ r9,[r13,#(9+1+4)*4] @ r9 = _offset2
261+ MOVEQ r8,#0
262+ SUB r11,r2,#3 @ r11 = _d_pitch-3
263+ ADD r9,r9,r0 @ tmp2 = _d_dst + _table[code]
264+ ADD r8,r8,r9 @ tmp2 = _d_dst+_table[code]+_offset1
265+ @ r8 = &_dst[tmp2]
266+ MOV r12,#4
267+level2codeSMALL_loop:
268+ LDRB r5, [r8],#1 @ r5 = d_dst[tmp2]
269+ LDRB r6, [r8],#1 @ r10 = d_dst[tmp2]
270+ LDRB r9, [r8],#1 @ r10 = d_dst[tmp2]
271+ LDRB r10,[r8],r11 @ r10 = d_dst[tmp2]
272+ STRB r5, [r0],#1 @ d_dst[4] = r5
273+ STRB r6, [r0],#1 @ d_dst[5] = r6
274+ STRB r9, [r0],#1 @ d_dst[6] = r9
275+ STRB r10,[r0],r11 @ d_dst[7] = r10 d_dst += d_pitch
276+ SUBS r12,r12,#1
277+ BGT level2codeSMALL_loop
278+ SUB r0,r0,r2,LSL #2 @ revert d_dst
279+ MOV PC,R14
280+
281+level2codeMID:
282+ @ LT => F8<=code<FC case
283+ @ EQ => FE case
284+ LDRB r6,[r4,r6] @ r6 = t = _paramPtr[code]
285+level2codeFE:
286+ LDREQB r6,[r1],#1 @ r6 = t = *_d_src++
287+ MOV r12,#4
288+ SUB r11,r2,#3 @ r11 = _d_pitch-7
289+level2codeMID_loop:
290+ STRB r6,[r0],#1
291+ STRB r6,[r0],#1
292+ STRB r6,[r0],#1
293+ STRB r6,[r0],r11
294+ SUBS r12,r12,#1
295+ BGT level2codeMID_loop
296+ SUB r0,r0,r2,LSL #2 @ revert d_dst
297+ MOV PC,R14
298+
299+level2codeFF:
300+ MOV r5,r14
301+ BL level3
302+ ADD r0,r0,#2
303+ BL level3
304+ ADD r0,r0,r2,LSL #1
305+ SUB r0,r0,#2
306+ BL level3
307+ ADD r0,r0,#2
308+ BL level3
309+ SUB r0,r0,#2
310+ SUB r0,r0,r2,LSL #1
311+ MOV PC,R5
312+
313+level3:
314+ @ r0 = _d_dst
315+ @ r1 = _d_src
316+ @ r2 = _d_pitch
317+ @ r3 = PRESERVE
318+ @ r4 = param
319+ @ r5 = preserve
320+ @ r7 = PRESERVE
321+ @ r14= return address
322+ LDRB r6,[r1],#1 @ r6 = code = *_d_src++
323+ @ stall
324+ @ stall
325+ CMP r6,#0xF8
326+ BLT level3codeSMALL
327+ CMP r6,#0xFC
328+ BLT level3codeMID
329+ BEQ level3codeFC
330+ CMP r6,#0xFE
331+ BGT level3codeFF
332+level3codeFE:
333+ LDRB r6,[r1],#1 @ r6 = t = *_d_src++
334+level3codeMID:
335+ @ LT => F8<=code<FC case
336+ @ EQ => FE case
337+ LDRLTB r6,[r4,r6] @ r6 = t = _paramPtr[code]
338+ @ stall
339+ @ stall
340+ STRB r6,[r0,#1]
341+ STRB r6,[r0],r2
342+ STRB r6,[r0,#1]
343+ STRB r6,[r0],-r2
344+ MOV PC,R14
345+
346+level3codeFF:
347+ LDRB r6,[r1],#1
348+ LDRB r9,[r1],#1
349+ LDRB r10,[r1],#1
350+ LDRB r11,[r1],#1
351+ STRB r9, [r0,#1]
352+ STRB r6, [r0],r2
353+ STRB r11,[r0,#1]
354+ STRB r10,[r0],-r2
355+ MOV PC,R14
356+
357+level3codeSMALL:
358+ LDR r8,[r13,#(9+1+1)*4] @ r8 = _table
359+ LDR r9,[r13,#(9+1+3)*4] @ r9 = _offset1
360+ MOV r6,r6,LSL #1 @ r6 = code<<1
361+ LDRSH r8,[r8,r6] @ tmp2 = _table[code]
362+level3codeFC:
363+ @ EQ => FC
364+ LDREQ r9,[r13,#(9+1+4)*4] @ r9 = _offset2
365+ MOVEQ r8,#0
366+ ADD r9,r9,r0 @ tmp2 = _d_dst+offset
367+ ADD r8,r8,r9 @ tmp2 = _d_dst+_table[code]+_offset
368+ @ r8 = &_dst[tmp2]
369+ LDRB r6, [r8,#1] @ r6 = d_dst[tmp2+1]
370+ LDRB r9, [r8],r2 @ r9 = d_dst[tmp2+0]
371+ LDRB r10,[r8,#1] @ r10= d_dst[tmp2+dst+1]
372+ LDRB r11,[r8],-r2 @ r11= d_dst[tmp2+dst]
373+ STRB r6, [r0,#1] @ d_dst[1 ] = r6
374+ STRB r9, [r0],r2 @ d_dst[0 ] = r9
375+ STRB r10,[r0,#1] @ d_dst[dst+1] = r10
376+ STRB r11,[r0],-r2 @ d_dst[dst ] = r11
377+ MOV PC,R14
378
379
380Index: engines/scumm/smush/codec47.cpp
381===================================================================
382--- engines/scumm/smush/codec47.cpp (revision 27676)
383+++ engines/scumm/smush/codec47.cpp (working copy)
384@@ -342,6 +342,24 @@
385 } while (c < 32768);
386 }
387
388+#ifdef USE_ARM_SMUSH
389+
390+extern "C" void ARM_Smush_decode2( byte *dst,
391+ const byte *src,
392+ int width,
393+ int height,
394+ const byte *param_ptr,
395+ int16 *_table,
396+ byte *_tableBig,
397+ int32 offset1,
398+ int32 offset2,
399+ byte *_tableSmall);
400+
401+#define decode2(SRC,DST,WIDTH,HEIGHT,PARAM) \
402+ ARM_Smush_decode2(SRC,DST,WIDTH,HEIGHT,PARAM,_table,_tableBig, \
403+ _offset1,_offset2,_tableSmall)
404+
405+#else
406 void Codec47Decoder::level3(byte *d_dst) {
407 int32 tmp;
408 byte code = *_d_src++;
409@@ -503,6 +521,7 @@
410 dst += next_line;
411 } while (--bh);
412 }
413+#endif
414
415 Codec47Decoder::Codec47Decoder(int width, int height) {
416 _width = width;
417Index: engines/scumm/module.mk
418===================================================================
419--- engines/scumm/module.mk (revision 27676)
420+++ engines/scumm/module.mk (working copy)
421@@ -82,8 +82,14 @@
422 smush/saud_channel.o \
423 smush/smush_mixer.o \
424 smush/smush_font.o
425+
426+ifdef USE_ARM_SMUSH
427+MODULE_OBJS += \
428+ smush/codec47ARM.o
429 endif
430
431+endif
432+
433 ifndef DISABLE_HE
434 MODULE_OBJS += \
435 he/animation_he.o \