//*******************************//
// emx support functions for gcc //
//*******************************//

#include <emx/asm386.h>

		.text

#ifdef L_alloca
		.globl __alloca

// IN:	EAX = stack space to allocate (rounded to 4 boundary by GCC)
// OUT:	ESP adjusted down by EAX, stack probed
// NOTE: Never call this from C!
// CHG:	EAX

		ALIGN
__alloca:
		pushl	%ecx			/* save work registers */
		movl	%esp,%ecx		/* keep a pointer to stack frame */
		negl	%eax
		leal	4+4(%esp,%eax),%esp	/* adjust stack pointer */
		leal	4+4(%ecx),%eax
		ALIGN
L1:		subl	$0x1000,%eax		/* step down */
		cmpl	%esp,%eax
		jb	L2
		testb	%al,(%eax)		/* probe stack */
		jmp	L1
L2:		movl	4(%ecx),%eax		/* return address */
		movl	(%ecx),%ecx		/* pop ECX */
		EPILOGUE_NO_RET(_alloca)
		jmp	*%eax			/* return */

#endif // L_alloca

//------------------------------------------------------------------------------
// division of 64-bit numbers
//
// Original idea by Eberhard Mattes:
//
// There's a better solution: set x':=x, y':=y. Shift right both x' and
// y' until x' < 2^32. Then divide x' by y'. The result can be off by -1
// due to the approximation. This can be checked by multiplying the
// result by y and comparing to x-y. This verification step is not
// required for x < 2^32.
//
// later (A.Z.) -
//
// There`s even a better solution: since i386+ can divide a 64-bit
// integer by a 32-bit integer, we should shift both operands right until
// quotient (i.e. y' in terms above) will be < 2^32. We can do this in one
// step if we`ll use bsf/bsr i386+ instruction. If quotient is initially
// less than 2^32, we can check first for overflow (i.e. if result will be
// bigger or equal than 2^32) then proceed by well-known scheme (described
// below) if it will overflow, and simply divide if it will not overflow.
//
// even more later (A.Z.) -
//
// hmm... after looking into libgcc2.c I see that it uses absolutely the
// same idea :-) anyway, this asm implementation is shorter and faster
//------------------------------------------------------------------------------
// Here is how we`ll do 64-by-32 bit division (Eberhard Mattes):
//
// Divide 64-bit number by 32-bit number
//
// 2^32 a + b	      a    2^32 (a mod c) + b
// ---------- = 2^32 --- + ------------------	(integer division)
//	c	      c 	  c
//
// (2^32 a + b) mod c = (2^32 (a mod c) + b) mod c
//
// Proof:
//
// Divide by 2^32 c by successive subtraction (k steps) of 2^32 c.
//
// 2^32 a + b		 2^32 a + b - 2^32 k c		  2^32 (a-kc) + b
// ---------- = 2^32 k + --------------------- = 2^32 k + ---------------
//     c			    c				 c
//
// For k = [a/c], the equality 0 <= a-kc < c is true. Then, a-kc = a mod c.
// For this k, the final division cannot overflow.
//------------------------------------------------------------------------------

#ifdef L_udivdi3

// UDItype __udivdi3 (UDItype n, UDItype d)

		.globl	___udivdi3

#define n 8+4(%esp)
#define d 8+12(%esp)

		ALIGN
___udivdi3:
		pushl	%ebx
		pushl	%esi
		movl	n, %eax
		movl	4+n, %edx		// edx:eax = n
		movl	d, %ebx
		movl	4+d, %esi		// esi:ebx = d
		bsrl	%esi, %ecx
		jnz	Lshiftcl

// We`re here when d is less than 2^32
		cmpl	%ebx, %edx		// edx < ebx?
		jb	Ldiv			// Yes, do a straight div

// If we`re here, the result of division is a 64-bit integer
// A 64-bit integer can result ONLY when (n >> 32) >= d
		movl	%edx, %eax
		xorl	%edx, %edx
		divl	%ebx
		movl	%eax, %esi	// high-order 32 bits of result
		movl	n, %eax 	// remainder of prev. div. in %edx
Ldiv:		divl	%ebx
		movl	%esi, %edx	// edx:eax = result
		popl	%esi
		popl	%ebx
		ret

Lshiftcl:
// If we`re here, d is bigger or equal than 2^32
// When cl is 31, we have result = n >= d ? 1 : 0
		cmpb	$0x1f,%cl
                jb	Lless31
                xorl	%edx, %edx
		movl    $1, %eax	// Assume result = 1
                cmpl	%esi, %edx
                ja	Lexit
                jb	Lzero
                cmpl	%ebx, %eax
                jae	Lexit
Lzero:		decl	%eax
Lexit:  	popl	%esi
		popl	%ebx
		ret

Lless31:
		pushl   %edi

// Now we have to shift both n and d right cl times
		incb	%cl
		xorl	%edi, %edi
		shrdl	%cl, %ebx, %edi // edi = discarded bits from d
		shrdl	%cl, %esi, %ebx	// now esi is free
// Dirty trick: we should clear esi now, but rely on fact that esi >> cl = 0
		shrdl	%cl, %eax, %esi	// esi = discarded bits from n
		shrdl	%cl, %edx, %eax	// shift edx:eax right
		shrl	%cl, %edx
		
// Now ebx contains d, and edx:eax contains n
		divl	%ebx

// Now we should check whenever result*edi is
// bigger or equal than remainder:esi -- if so, we
// should decrement the result
		movl	%eax, %ebx	// Duplicate result into ebx
		movl    %edx, %ecx	// Duplicate remainder into ecx
		mull	%edi
		cmpl	%ecx, %edx
		jae	Ldecres
		jb	Lresok
		cmpl	%esi, %eax
		jb	Lresok

Ldecres:	decl	%ebx
Lresok: 	xorl	%edx, %edx	// edx:eax = result
		movl	%ebx, %eax
		popl    %edi
		popl	%esi
		popl	%ebx
		ret

#endif // L_udivdi3

#ifdef L_umoddi3

// UDItype __umoddi3 (UDItype n, UDItype d)

		.globl	___umoddi3

#define n 8+4(%esp)
#define d 8+12(%esp)

		ALIGN
___umoddi3:
		pushl	%ebx
		pushl	%esi
		movl	n, %eax
		movl	4+n, %edx		// edx:eax = n
		movl	d, %ebx
		movl	4+d, %esi		// esi:ebx = d
		bsrl	%esi, %ecx
		jnz	Lshiftcl

// We`re here when d is less than 2^32
		cmpl	%ebx, %edx		// edx < ebx?
		jb	Ldiv			// Yes, do a straight div

// If we`re here, the result of division is a 64-bit integer
// A 64-bit integer can result ONLY when (n >> 32) >= d
		movl	%edx, %eax
		xorl	%edx, %edx
		divl	%ebx
		movl	n, %eax 	// remainder of prev. div. in %edx
Ldiv:		divl	%ebx
		movl	%edx, %eax	// lo(remainder) = edx
		xorl	%edx, %edx	// hi(remainder) = 0
		popl	%esi
		popl	%ebx
		ret

Lshiftcl:
// If we`re here, d is bigger or equal than 2^32
// When cl is 31, we have result = n > d ? n - d : n
		cmpb	$0x1f,%cl
                jb	Lless31
                subl	%ebx, %eax
                sbbl	%esi, %edx
                jnc	Lexit
		movl	n, %eax
		movl	4+n, %edx		// edx:eax = n
Lexit:  	popl	%esi
		popl	%ebx
		ret

Lless31:
		pushl   %edi
		pushl	%ecx

// Now we have to shift both n and d right cl times
		incb	%cl
		xorl	%edi, %edi
		shrdl	%cl, %ebx, %edi // edi = discarded bits from d
		shrdl	%cl, %esi, %ebx	// now esi is free
// Dirty trick: we should clear esi now, but rely on fact that esi >> cl = 0
		shrdl	%cl, %eax, %esi	// esi = discarded bits from n
		shrdl	%cl, %edx, %eax	// shift edx:eax right
		shrl	%cl, %edx

// Now ebx contains d, and edx:eax contains n
		divl	%ebx

// Now compute remainder as (edx:esi - result*edi) >> cl
		movl    %edx, %ecx	// Duplicate remainder into ecx
		mull	%edi

		subl	%esi, %eax
		sbbl	%ecx, %edx	// Compute result*edi - ecx:esi
                jb	Lnegok		// remainder is bigger than result*edi
                jnz	Lnotz

                testl	%eax, %eax
                jz	Lnormalize	// modulo = 0

Lnotz:          subl	%edi, %eax	// Adjust modulo as if result
		sbbl	%ebx, %edx	// was decremented by one

Lnegok:		notl	%edx
		negl	%eax
		sbbl	$-1, %edx

Lnormalize:	popl	%ecx
		xorb	$0x1f, %cl
		shrdl	%cl, %edx, %eax
		shrl	%cl, %edx

		popl    %edi
		popl	%esi
		popl	%ebx
		ret

#endif // L_umoddi3

//**********************************************************//
//							    //
// Signed division/modulo uses unsigned versions of div/mod //
//							    //
//**********************************************************//

#ifdef L_divdi3

// DItype __divdi3 (DItype n, DItype d)

		.globl	___divdi3

#define n 4+4(%esp)
#define d 4+12(%esp)

		ALIGN
___divdi3:
		pushl	%esi

		movl	4+n, %edx		// edx:eax = n
		movl	4+d, %esi		// esi:ecx = d
		movl	%edx, %eax
		xorl	%esi, %eax		// Check resulting sign
		js	Lnegate_sign

		testl	%edx, %edx
		jns	Lgoforit

		notl	4+n			// Negate both args
		negl	n
		sbbl	$-1, 4+n

		notl	4+d
		negl	d
		sbbl	$-1, 4+d

Lgoforit:	popl	%esi
		jmp	___udivdi3		// Dirty trick, but works

Lnegate_sign:	movl	n, %eax
		movl	d, %ecx

		testl	%edx, %edx
		jns	Ln_pos
		notl	%edx
		negl	%eax
		sbbl	$-1, %edx
Ln_pos:
		testl	%esi, %esi
		jns	Ld_pos
		notl	%esi
		negl	%ecx
		sbbl	$-1, %esi
Ld_pos:
		pushl	%esi
		pushl	%ecx
		pushl	%edx
		pushl	%eax
		call	___udivdi3
		addl	$2*8, %esp

		notl	%edx			// Negate result
		negl	%eax
		sbbl	$-1, %edx

		popl	%esi
		ret
#endif // L_divdi3

#ifdef L_moddi3

// DItype __moddi3 (DItype n, DItype d)

		.globl  ___moddi3

#define n 4+4(%esp)
#define d 4+12(%esp)

		ALIGN
___moddi3:
		pushl	%esi

		movl	4+n, %edx		// edx:eax = n
		movl	4+d, %esi		// esi:ecx = d
		testl	%edx, %edx
		js	Lnegate_sign

		testl	%esi, %esi
		jns	Lgoforit

		notl	4+d			// negate d
		negl	d
		sbbl	$-1, 4+d

Lgoforit:	popl	%esi
		jmp	___umoddi3		// Dirty trick, but works

Lnegate_sign:	movl	n, %eax
		movl	d, %ecx

		testl	%edx, %edx
		jns	Ln_pos
		notl	%edx
		negl	%eax
		sbbl	$-1, %edx
Ln_pos:
		testl	%esi, %esi
		jns	Ld_pos
		notl	%esi
		negl	%ecx
		sbbl	$-1, %esi
Ld_pos:
		pushl	%esi
		pushl	%ecx
		pushl	%edx
		pushl	%eax
		call	___umoddi3
		addl	$2*8, %esp

		notl	%edx			// Negate result
		negl	%eax
		sbbl	$-1, %edx

		popl	%esi
		ret

#endif // L_moddi3
