1 | /* -----------------------------------------------------------------------
|
---|
2 | unix64.S - Copyright (c) 2002 Bo Thorsen <bo@suse.de>
|
---|
3 |
|
---|
4 | x86-64 Foreign Function Interface
|
---|
5 |
|
---|
6 | Permission is hereby granted, free of charge, to any person obtaining
|
---|
7 | a copy of this software and associated documentation files (the
|
---|
8 | ``Software''), to deal in the Software without restriction, including
|
---|
9 | without limitation the rights to use, copy, modify, merge, publish,
|
---|
10 | distribute, sublicense, and/or sell copies of the Software, and to
|
---|
11 | permit persons to whom the Software is furnished to do so, subject to
|
---|
12 | the following conditions:
|
---|
13 |
|
---|
14 | The above copyright notice and this permission notice shall be included
|
---|
15 | in all copies or substantial portions of the Software.
|
---|
16 |
|
---|
17 | THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
---|
18 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
---|
19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
---|
20 | IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
---|
21 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
---|
22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
---|
23 | OTHER DEALINGS IN THE SOFTWARE.
|
---|
24 | ----------------------------------------------------------------------- */
|
---|
25 |
|
---|
26 | #ifdef __x86_64__
|
---|
27 | #define LIBFFI_ASM
|
---|
28 | #include <ffi.h>
|
---|
29 |
|
---|
30 | .section .rodata
|
---|
31 | .LC0:
|
---|
32 | .string "asm in progress %lld\n"
|
---|
33 | .LC1:
|
---|
34 | .string "asm in progress\n"
|
---|
35 | .text
|
---|
36 | .align 2
|
---|
37 | .globl ffi_call_UNIX64
|
---|
38 | .type ffi_call_UNIX64,@function
|
---|
39 |
|
---|
40 | ffi_call_UNIX64:
|
---|
41 | .LFB1:
|
---|
42 | pushq %rbp
|
---|
43 | .LCFI0:
|
---|
44 | movq %rsp, %rbp
|
---|
45 | .LCFI1:
|
---|
46 | /* Save all arguments */
|
---|
47 | subq $48, %rsp
|
---|
48 | .LCFI2:
|
---|
49 | movq %rdi, -8(%rbp) /* ffi_prep_args */
|
---|
50 | movq %rsi, -16(%rbp) /* ffi_fill_return_value */
|
---|
51 | movq %rdx, -24(%rbp) /* ecif */
|
---|
52 | movq %rcx, -32(%rbp) /* cif->bytes */
|
---|
53 | movq %r8, -40(%rbp) /* ecif.rvalue */
|
---|
54 | movq %r9, -48(%rbp) /* fn */
|
---|
55 |
|
---|
56 | /* Make room for all of the new args and the register args */
|
---|
57 | addl $176, %ecx
|
---|
58 | .LCFI3:
|
---|
59 | subq %rcx, %rsp
|
---|
60 | .LCFI4:
|
---|
61 | /* Setup the call to ffi_prep_args. */
|
---|
62 | movq %rdi, %rax /* &ffi_prep_args */
|
---|
63 | movq %rsp, %rdi /* stackLayout */
|
---|
64 | movq %rdx, %rsi /* ecif */
|
---|
65 | call *%rax /* ffi_prep_args(stackLayout, ecif);*/
|
---|
66 |
|
---|
67 | /* ffi_prep_args have put all the register contents into the */
|
---|
68 | /* stackLayout struct. Now put the register values in place. */
|
---|
69 | movq (%rsp), %rdi
|
---|
70 | movq 8(%rsp), %rsi
|
---|
71 | movq 16(%rsp), %rdx
|
---|
72 | movq 24(%rsp), %rcx
|
---|
73 | movq 32(%rsp), %r8
|
---|
74 | movq 40(%rsp), %r9
|
---|
75 | movaps 48(%rsp), %xmm0
|
---|
76 | movaps 64(%rsp), %xmm1
|
---|
77 | movaps 80(%rsp), %xmm2
|
---|
78 | movaps 96(%rsp), %xmm3
|
---|
79 | movaps 112(%rsp), %xmm4
|
---|
80 | movaps 128(%rsp), %xmm5
|
---|
81 | movaps 144(%rsp), %xmm6
|
---|
82 | movaps 160(%rsp), %xmm7
|
---|
83 |
|
---|
84 | /* Remove space for stackLayout so stack arguments are placed
|
---|
85 | correctly for the call. */
|
---|
86 | .LCFI5:
|
---|
87 | addq $176, %rsp
|
---|
88 | .LCFI6:
|
---|
89 | /* Call the user function. */
|
---|
90 | call *-48(%rbp)
|
---|
91 |
|
---|
92 | /* Make stack space for the return_value struct. */
|
---|
93 | subq $64, %rsp
|
---|
94 |
|
---|
95 | /* Fill in all potential return values to this struct. */
|
---|
96 | movq %rax, (%rsp)
|
---|
97 | movq %rdx, 8(%rsp)
|
---|
98 | movaps %xmm0, 16(%rsp)
|
---|
99 | movaps %xmm1, 32(%rsp)
|
---|
100 | fstpt 48(%rsp)
|
---|
101 |
|
---|
102 | /* Now call ffi_fill_return_value. */
|
---|
103 | movq %rsp, %rdi /* struct return_value */
|
---|
104 | movq -24(%rbp), %rsi /* ecif */
|
---|
105 | movq -16(%rbp), %rax /* &ffi_fill_return_value */
|
---|
106 | call *%rax /* call it */
|
---|
107 |
|
---|
108 | /* And the work is done. */
|
---|
109 | leave
|
---|
110 | ret
|
---|
111 | .LFE1:
|
---|
112 | .ffi_call_UNIX64_end:
|
---|
113 | .size ffi_call_UNIX64,.ffi_call_UNIX64_end-ffi_call_UNIX64
|
---|
114 |
|
---|
115 | .text
|
---|
116 | .align 2
|
---|
117 | .globl float2sse
|
---|
118 | .type float2sse,@function
|
---|
119 | float2sse:
|
---|
120 | /* Save the contents of this sse-float in a pointer. */
|
---|
121 | movaps %xmm0, (%rdi)
|
---|
122 | ret
|
---|
123 |
|
---|
124 | .align 2
|
---|
125 | .globl floatfloat2sse
|
---|
126 | .type floatfloat2sse,@function
|
---|
127 | floatfloat2sse:
|
---|
128 | /* Save the contents of these two sse-floats in a pointer. */
|
---|
129 | movq (%rdi), %xmm0
|
---|
130 | movaps %xmm0, (%rsi)
|
---|
131 | ret
|
---|
132 |
|
---|
133 | .align 2
|
---|
134 | .globl double2sse
|
---|
135 | .type double2sse,@function
|
---|
136 | double2sse:
|
---|
137 | /* Save the contents of this sse-double in a pointer. */
|
---|
138 | movaps %xmm0, (%rdi)
|
---|
139 | ret
|
---|
140 |
|
---|
141 | .align 2
|
---|
142 | .globl sse2float
|
---|
143 | .type sse2float,@function
|
---|
144 | sse2float:
|
---|
145 | /* Save the contents of this sse-float in a pointer. */
|
---|
146 | movaps (%rdi), %xmm0
|
---|
147 | ret
|
---|
148 |
|
---|
149 | .align 2
|
---|
150 | .globl sse2double
|
---|
151 | .type sse2double,@function
|
---|
152 | sse2double:
|
---|
153 | /* Save the contents of this pointer in a sse-double. */
|
---|
154 | movaps (%rdi), %xmm0
|
---|
155 | ret
|
---|
156 |
|
---|
157 | .align 2
|
---|
158 | .globl sse2floatfloat
|
---|
159 | .type sse2floatfloat,@function
|
---|
160 | sse2floatfloat:
|
---|
161 | /* Save the contents of this pointer in two sse-floats. */
|
---|
162 | movaps (%rdi), %xmm0
|
---|
163 | movq %xmm0, (%rsi)
|
---|
164 | ret
|
---|
165 |
|
---|
166 | .align 2
|
---|
167 | .globl ffi_closure_UNIX64
|
---|
168 | .type ffi_closure_UNIX64,@function
|
---|
169 |
|
---|
170 | ffi_closure_UNIX64:
|
---|
171 | .LFB2:
|
---|
172 | pushq %rbp
|
---|
173 | .LCFI10:
|
---|
174 | movq %rsp, %rbp
|
---|
175 | .LCFI11:
|
---|
176 | subq $240, %rsp
|
---|
177 | .LCFI12:
|
---|
178 | movq %rdi, -176(%rbp)
|
---|
179 | movq %rsi, -168(%rbp)
|
---|
180 | movq %rdx, -160(%rbp)
|
---|
181 | movq %rcx, -152(%rbp)
|
---|
182 | movq %r8, -144(%rbp)
|
---|
183 | movq %r9, -136(%rbp)
|
---|
184 | /* FIXME: We can avoid all this stashing of XMM registers by
|
---|
185 | (in ffi_prep_closure) computing the number of
|
---|
186 | floating-point args and moving it into %rax before calling
|
---|
187 | this function. Once this is done, uncomment the next few
|
---|
188 | lines and only the essential XMM registers will be written
|
---|
189 | to memory. This is a significant saving. */
|
---|
190 | /* movzbl %al, %eax */
|
---|
191 | /* movq %rax, %rdx */
|
---|
192 | /* leaq 0(,%rdx,4), %rax */
|
---|
193 | /* leaq 2f(%rip), %rdx */
|
---|
194 | /* subq %rax, %rdx */
|
---|
195 | leaq -1(%rbp), %rax
|
---|
196 | /* jmp *%rdx */
|
---|
197 | movaps %xmm7, -15(%rax)
|
---|
198 | movaps %xmm6, -31(%rax)
|
---|
199 | movaps %xmm5, -47(%rax)
|
---|
200 | movaps %xmm4, -63(%rax)
|
---|
201 | movaps %xmm3, -79(%rax)
|
---|
202 | movaps %xmm2, -95(%rax)
|
---|
203 | movaps %xmm1, -111(%rax)
|
---|
204 | movaps %xmm0, -127(%rax)
|
---|
205 | 2:
|
---|
206 | movl %edi, -180(%rbp)
|
---|
207 | movl $0, -224(%rbp)
|
---|
208 | movl $48, -220(%rbp)
|
---|
209 | leaq 16(%rbp), %rax
|
---|
210 | movq %rax, -216(%rbp)
|
---|
211 | leaq -176(%rbp), %rdx
|
---|
212 | movq %rdx, -208(%rbp)
|
---|
213 | leaq -224(%rbp), %rsi
|
---|
214 | movq %r10, %rdi
|
---|
215 | movq %rsp, %rdx
|
---|
216 | call ffi_closure_UNIX64_inner@PLT
|
---|
217 |
|
---|
218 | cmpl $FFI_TYPE_FLOAT, %eax
|
---|
219 | je 1f
|
---|
220 | cmpl $FFI_TYPE_DOUBLE, %eax
|
---|
221 | je 2f
|
---|
222 | cmpl $FFI_TYPE_LONGDOUBLE, %eax
|
---|
223 | je 3f
|
---|
224 | cmpl $FFI_TYPE_STRUCT, %eax
|
---|
225 | je 4f
|
---|
226 | popq %rax
|
---|
227 | leave
|
---|
228 | ret
|
---|
229 | 1:
|
---|
230 | 2:
|
---|
231 | 3:
|
---|
232 | movaps -240(%rbp), %xmm0
|
---|
233 | leave
|
---|
234 | ret
|
---|
235 | 4:
|
---|
236 | leave
|
---|
237 | ret
|
---|
238 | .LFE2:
|
---|
239 |
|
---|
240 | .section .eh_frame,"a",@progbits
|
---|
241 | .Lframe0:
|
---|
242 | .long .LECIE1-.LSCIE1
|
---|
243 | .LSCIE1:
|
---|
244 | .long 0x0
|
---|
245 | .byte 0x1
|
---|
246 | .string "zR"
|
---|
247 | .uleb128 0x1
|
---|
248 | .sleb128 -8
|
---|
249 | .byte 0x10
|
---|
250 | .uleb128 0x1
|
---|
251 | .byte 0x1b
|
---|
252 | .byte 0xc
|
---|
253 | .uleb128 0x7
|
---|
254 | .uleb128 0x8
|
---|
255 | .byte 0x90
|
---|
256 | .uleb128 0x1
|
---|
257 | .align 8
|
---|
258 | .LECIE1:
|
---|
259 | .LSFDE1:
|
---|
260 | .long .LEFDE1-.LASFDE1
|
---|
261 | .LASFDE1:
|
---|
262 | .long .LASFDE1-.Lframe0
|
---|
263 |
|
---|
264 | .long .LFB1-.
|
---|
265 | .long .LFE1-.LFB1
|
---|
266 | .uleb128 0x0
|
---|
267 | .byte 0x4 # DW_CFA_advance_loc4
|
---|
268 | .long .LCFI0-.LFB1
|
---|
269 | .byte 0xe # DW_CFA_def_cfa_offset
|
---|
270 | .uleb128 0x10
|
---|
271 | .byte 0x86 # DW_CFA_offset: r6 at cfa-16
|
---|
272 | .uleb128 0x2
|
---|
273 | .byte 0x4 # DW_CFA_advance_loc4
|
---|
274 | .long .LCFI1-.LCFI0
|
---|
275 | .byte 0x86 # DW_CFA_offset: r6 at cfa-16
|
---|
276 | .uleb128 0x2
|
---|
277 | .byte 0xd # DW_CFA_def_cfa_reg: r6
|
---|
278 | .uleb128 0x6
|
---|
279 | .align 8
|
---|
280 | .LEFDE1:
|
---|
281 | .LSFDE3:
|
---|
282 | .long .LEFDE3-.LASFDE3 # FDE Length
|
---|
283 | .LASFDE3:
|
---|
284 | .long .LASFDE3-.Lframe0 # FDE CIE offset
|
---|
285 |
|
---|
286 | .long .LFB2-. # FDE initial location
|
---|
287 | .long .LFE2-.LFB2 # FDE address range
|
---|
288 | .uleb128 0x0 # Augmentation size
|
---|
289 | .byte 0x4 # DW_CFA_advance_loc4
|
---|
290 | .long .LCFI10-.LFB2
|
---|
291 | .byte 0xe # DW_CFA_def_cfa_offset
|
---|
292 | .uleb128 0x10
|
---|
293 | .byte 0x86 # DW_CFA_offset, column 0x6
|
---|
294 | .uleb128 0x2
|
---|
295 | .byte 0x4 # DW_CFA_advance_loc4
|
---|
296 | .long .LCFI11-.LCFI10
|
---|
297 | .byte 0xd # DW_CFA_def_cfa_register
|
---|
298 | .uleb128 0x6
|
---|
299 | .align 8
|
---|
300 | .LEFDE3:
|
---|
301 |
|
---|
302 | #endif /* __x86_64__ */
|
---|