Ticket #9090: ARMscaler.s

File ARMscaler.s, 6.1 KB (added by SF/lubomyr_ua, 15 years ago)
Line 
1@ ScummVM Scumm Interpreter
2@ Copyright (C) 2007 The ScummVM project
3@
4@ This program is free software; you can redistribute it and/or
5@ modify it under the terms of the GNU General Public License
6@ as published by the Free Software Foundation; either version 2
7@ of the License, or (at your option) any later version.
8@
9@ This program is distributed in the hope that it will be useful,
10@ but WITHOUT ANY WARRANTY; without even the implied warranty of
11@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12@ GNU General Public License for more details.
13@
14@ You should have received a copy of the GNU General Public License
15@ along with this program; if not, write to the Free Software
16@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17@
18@ $URL: https://scummvm.svn.sourceforge.net/svnroot/scummvm/scummvm/trunk/backends/platform/wince/ARMscaler.s $
19@ $Id: ARMscaler.s 29306 2007-10-28 18:46:48Z knakos $
20@
21@ @author Robin Watts (robin@wss.co.uk)
22
23 .text
24
25 .global PocketPCHalfARM
26
27 @ ARM implementation of PocketPCHalf scaler.
28 @ Scales a width x height block of 16bpp pixels from srcPtr to
29 @ dstPtr. srcPitch and dstPitch identify how to reach subsequent
30 @ lines. redblueMask and round allow for one routine to do both
31 @ 565 and 555 formats.
32PocketPCHalfARM:
33 @ r0 = srcPtr
34 @ r1 = srcPitch
35 @ r2 = dstPtr
36 @ r3 = dstPitch
37 MOV r12,r13
38 STMFD r13!,{r4-r11,r14}
39 LDMIA r12,{r4-r7}
40 @ r4 = width
41 @ r5 = height
42 @ r6 = redblueMask
43 @ r7 = round
44
45 SUB r3,r3,r4 @ dstPitch -= width
46 SUBS r5,r5,#2 @ while ((height-=2) >= 0)
47 BLT end
48height_loop:
49
50 SUBS r11, r4, #8 @ r11= width_minus_8
51 BLT thin
52
53width_loop:
54 @ unroll loop 4 times here
55 LDRH r8,[r0],r1 @ r8 = A = srcPtr[0]
56 LDRH r9,[r0],#2 @ r9 = C = srcPtr[dstPitch]
57 LDRH r12,[r0],-r1 @ r12= D = srcPtr[dstPitch+2]
58 LDRH r14,[r0],#2 @ r14= B = srcPtr[2]
59
60 ORR r8, r8, r8, LSL #16 @ r8 = b | g | r | b | g | r
61 ORR r9, r9, r9, LSL #16 @ r9 = b | g | r | b | g | r
62 ORR r12,r12,r12,LSL #16 @ r12= b | g | r | b | g | r
63 ORR r14,r14,r14,LSL #16 @ r14= b | g | r | b | g | r
64 AND r8, r8, r6 @ r8 = 0 | g | 0 | b | 0 | r
65 AND r9, r9, r6 @ r9 = 0 | g | 0 | b | 0 | r
66 AND r12,r12,r6 @ r12= 0 | g | 0 | b | 0 | r
67 AND r14,r14,r6 @ r14= 0 | g | 0 | b | 0 | r
68 ADD r8, r8, r9
69 ADD r8, r8, r12
70 ADD r8, r8, r14
71 ADD r8, r8, r7 @ r8 = summed pixels + rounding
72 AND r8, r6, r8, LSR #2 @ r8 = 0 | g | 0 | b | 0 | r
73 ORR r10,r8, r8, LSR #16 @ r10= 0 | g | 0 | b | g | r
74
75 LDRH r8,[r0],r1 @ r8 = A = srcPtr[0]
76 LDRH r9,[r0],#2 @ r9 = C = srcPtr[dstPitch]
77 LDRH r12,[r0],-r1 @ r12= D = srcPtr[dstPitch+2]
78 LDRH r14,[r0],#2 @ r14= B = srcPtr[2]
79
80 STRH r10,[r2],#2 @ *dstPtr++
81
82 ORR r8, r8, r8, LSL #16 @ r8 = b | g | r | b | g | r
83 ORR r9, r9, r9, LSL #16 @ r9 = b | g | r | b | g | r
84 ORR r12,r12,r12,LSL #16 @ r12= b | g | r | b | g | r
85 ORR r14,r14,r14,LSL #16 @ r14= b | g | r | b | g | r
86 AND r8, r8, r6 @ r8 = 0 | g | 0 | b | 0 | r
87 AND r9, r9, r6 @ r9 = 0 | g | 0 | b | 0 | r
88 AND r12,r12,r6 @ r12= 0 | g | 0 | b | 0 | r
89 AND r14,r14,r6 @ r14= 0 | g | 0 | b | 0 | r
90 ADD r8, r8, r9
91 ADD r8, r8, r12
92 ADD r8, r8, r14
93 ADD r8, r8, r7 @ r8 = summed pixels + rounding
94 AND r8, r6, r8, LSR #2 @ r8 = 0 | g | 0 | b | 0 | r
95 ORR r10,r8, r8, LSR #16 @ r10= 0 | g | 0 | b | g | r
96
97 LDRH r8,[r0],r1 @ r8 = A = srcPtr[0]
98 LDRH r9,[r0],#2 @ r9 = C = srcPtr[dstPitch]
99 LDRH r12,[r0],-r1 @ r12= D = srcPtr[dstPitch+2]
100 LDRH r14,[r0],#2 @ r14= B = srcPtr[2]
101
102 STRH r10,[r2],#2 @ *dstPtr++
103
104 ORR r8, r8, r8, LSL #16 @ r8 = b | g | r | b | g | r
105 ORR r9, r9, r9, LSL #16 @ r9 = b | g | r | b | g | r
106 ORR r12,r12,r12,LSL #16 @ r12= b | g | r | b | g | r
107 ORR r14,r14,r14,LSL #16 @ r14= b | g | r | b | g | r
108 AND r8, r8, r6 @ r8 = 0 | g | 0 | b | 0 | r
109 AND r9, r9, r6 @ r9 = 0 | g | 0 | b | 0 | r
110 AND r12,r12,r6 @ r12= 0 | g | 0 | b | 0 | r
111 AND r14,r14,r6 @ r14= 0 | g | 0 | b | 0 | r
112 ADD r8, r8, r9
113 ADD r8, r8, r12
114 ADD r8, r8, r14
115 ADD r8, r8, r7 @ r8 = summed pixels + rounding
116 AND r8, r6, r8, LSR #2 @ r8 = 0 | g | 0 | b | 0 | r
117 ORR r10,r8, r8, LSR #16 @ r10= 0 | g | 0 | b | g | r
118
119 LDRH r8,[r0],r1 @ r8 = A = srcPtr[0]
120 LDRH r9,[r0],#2 @ r9 = C = srcPtr[dstPitch]
121 LDRH r12,[r0],-r1 @ r12= D = srcPtr[dstPitch+2]
122 LDRH r14,[r0],#2 @ r14= B = srcPtr[2]
123
124 STRH r10,[r2],#2 @ *dstPtr++
125
126 ORR r8, r8, r8, LSL #16 @ r8 = b | g | r | b | g | r
127 ORR r9, r9, r9, LSL #16 @ r9 = b | g | r | b | g | r
128 ORR r12,r12,r12,LSL #16 @ r12= b | g | r | b | g | r
129 ORR r14,r14,r14,LSL #16 @ r14= b | g | r | b | g | r
130 AND r8, r8, r6 @ r8 = 0 | g | 0 | b | 0 | r
131 AND r9, r9, r6 @ r9 = 0 | g | 0 | b | 0 | r
132 AND r12,r12,r6 @ r12= 0 | g | 0 | b | 0 | r
133 AND r14,r14,r6 @ r14= 0 | g | 0 | b | 0 | r
134 ADD r8, r8, r9
135 ADD r8, r8, r12
136 ADD r8, r8, r14
137 ADD r8, r8, r7 @ r8 = summed pixels + rounding
138 AND r8, r6, r8, LSR #2 @ r8 = 0 | g | 0 | b | 0 | r
139 ORR r10, r8, r8, LSR #16 @ r8 = 0 | g | 0 | b | g | r
140
141 STRH r10,[r2],#2 @ *dstPtr++
142
143 SUBS r11,r11,#8 @ width_minus_8 -= 8
144 BGE width_loop @ (width_minus_8 >= 0) => do 8+ more
145
146thin:
147 ADDS r11,r11,#8 @ r11= width
148 BEQ width_end @ if no more left to do, then bail
149thin_lp:
150 @ single output pixels done in this bit
151 LDRH r8,[r0],r1 @ r8 = A = srcPtr[0]
152 LDRH r9,[r0],#2 @ r9 = C = srcPtr[dstPitch]
153 LDRH r12,[r0],-r1 @ r12= D = srcPtr[dstPitch+2]
154 LDRH r14,[r0],#2 @ r14= B = srcPtr[2]
155
156 ORR r8, r8, r8, LSL #16 @ r8 = b | g | r | b | g | r
157 ORR r9, r9, r9, LSL #16 @ r9 = b | g | r | b | g | r
158 ORR r12,r12,r12,LSL #16 @ r12= b | g | r | b | g | r
159 ORR r14,r14,r14,LSL #16 @ r14= b | g | r | b | g | r
160 AND r8, r8, r6 @ r8 = 0 | g | 0 | b | 0 | r
161 AND r9, r9, r6 @ r9 = 0 | g | 0 | b | 0 | r
162 AND r12,r12,r6 @ r12= 0 | g | 0 | b | 0 | r
163 AND r14,r14,r6 @ r14= 0 | g | 0 | b | 0 | r
164 ADD r8, r8, r9
165 ADD r8, r8, r12
166 ADD r8, r8, r14
167 ADD r8, r8, r7 @ r8 = summed pixels + rounding
168 AND r8, r6, r8, LSR #2 @ r8 = 0 | g | 0 | b | 0 | r
169 ORR r8, r8, r8, LSR #16 @ r8 = 0 | g | 0 | b | g | r
170
171 STRH r8,[r2],#2 @ *dstPtr++
172
173 SUBS r11,r11,#2
174 BGT thin_lp
175width_end:
176 ADD r2,r2,r3 @ dstPtr += dstPitch
177 ADD r0,r0,r1,LSL #1 @ srcPtr += 2*srcPitch
178 SUB r0,r0,r4,LSL #1 @ srcPtr -= 2*width
179
180 SUBS r5,r5,#2
181 BGE height_loop
182
183end:
184 LDMFD r13!,{r4-r11,PC}