/****************************************************************
*																*
*		Add on some more complex functions.  These are mostly used to find	*
*	different constants to be used in various expansions of trig and exp		*
*	functions.  Need to combine with polynomials of floats to create all terms	*
*	of a full expansion using Chebyshev polynomials and reducing to standard	*
*	polynomials.													*
*					Author = Mike Rosing							*
*					 date   = May 1, 2000							*
*																*
****************************************************************/

#include "bigfloat.h"
#include "multipoly.h"

void split( FLOAT *x, FLOAT *intprt, FLOAT *frac);

extern RAMDATA ram_block[];

MULTIPOLY	twoxcoef;		/*  2^x expansion series coefficients  */
MULTIPOLY	coscoef;		/*  cos(x)  expansion series coefficients  */
FLOAT		P2;			/*  PI/2  */
FLOAT		ln2;			/*  ln(2)	*/

/*  compute pi to 250 bits  or so.  
	Uses formula arcsin(1/2) = pi/6 = pi/2 - 1 - sum(
		1*3*5*...*(2k-1))/(2^3k (2k+1) k!)
*/
void calcpi( FLOAT *pi)
{
	FLOAT tn, constant;
	int i;
	
	null( pi);
	int_to_float( 1, &tn);
	tn.expnt = -2;
	int_to_float( 3, &constant);
	divide( &tn, &constant, &tn);
	add( pi, &tn, pi);
	for( i=2; i<123; i++)
	{
		int_to_float( 2*i-1, &constant);
		multiply(&constant, &constant, &constant);
		multiply( &constant, &tn, &tn);
		int_to_float( 2*i+1, &constant);
		divide( &tn, &constant, &tn);
		int_to_float( i, &constant);
		divide( &tn, &constant, &tn);
		tn.expnt -= 3;
		add( &tn, pi, pi);
	}
	int_to_float( 1, &constant);
	add( &constant, pi, pi);
	int_to_float( 3, &constant);
	multiply( &constant, pi, pi);
}

/*  Output of above routine is:
tn.expnt = -256
pi = 
E +3.14159265358979323846264338327950288419716939937\
	510582097494459230781640628585258
pi.expnt = 2
pi.mntsa.e[] = {
	 0x1d89cd8c,  0x105df53,  0x4533e63a,  0x94812704,  
	 0xc06e0e68,  0x62633145,  0x10b4611a,  0x6487ed51 
                     }
*/

/*  compute ln(2) to 256 bits.
	ln(2) = 2 sum( 1/( (2k+1) * 3^(2k+1) )
	which converges in about 70+ terms.
*/

void calcln2( FLOAT *ln2)
{
	int	k, epsilon, startxp;
	FLOAT	tk, constant, nine;
	
	null( ln2);
	int_to_float( 3, &constant);
	reciprical( &constant, &tk);	// gives me t0
	startxp = tk.expnt;
	multiply( &tk, &tk, &nine);	// additional 3^2k term
	epsilon = 1;
	k = 0;
	
	while( epsilon > -256)
	{
		add( &tk, ln2, ln2);
		int_to_float( 2*k+1, &constant);
		multiply( &constant, &tk, &tk);
		int_to_float( 2*k+3, &constant);
		divide( &tk, &constant, &tk);
		multiply( &nine, &tk, &tk);
		epsilon = tk.expnt - startxp;
		k++;
	}
	ln2->expnt++;		// final multiply by 2
}

/*  compute y = x^k
	where k is a signed integer in range +/- 2^31 and x, y are FLOAT.
	Copied from complex version.
	Returns 1 if ok, 0 if x = 0 and k<0
*/

int intpwr( FLOAT *x, int k, FLOAT *y)
{
	int signflag, n;
	FLOAT z, t;
	
/*  initialize Knuth's algorithm A pg 442 V2  */

	copy( x, &z);
	if( k<0)
	{
		signflag = -1;
		n = -k;
	}
	else
	{
		signflag = 0;
		n = k;
	}
	int_to_float( 1, &t);
	while( n)
	{
		if( n & 1) multiply( &t, &z, &t);
		multiply( &z, &z, &z);
		n >>= 1;
	}
	if( signflag) return reciprical( &t, y);
	copy( &t, y);
	return 1;
}

/*  This routine computes bessel functions for real arguments
	for nth order to accuracy of 256 bits.  Accuracy is easy to 
	change, assuming storage chages.  Purpose is coefficients
	for exp and trig functions approximated by chebyshev
	polynomials.
	Enter with type = +1 for In(x) and type = -1 for Jn(x),
		FLOAT pointing to x, integer n
	Returns with Float y = Jn(x) or In(x).
	uses |n|
*/

void bessel( int type, int n, FLOAT *x, FLOAT *y)
{
	FLOAT	z2, z4, constant, t1, sum;
	int		startxp, epsilon, j, k;
	
	if( n<0) n = -n;
	copy( x, &z2);
	z2.expnt--;		// divide input by 2
	multiply( &z2, &z2, &z4);		// z^2/4
	if( type < 0) negate( &z4);
	int_to_float( 1, &t1);
	j = n;			// compute 1/n!
	while( j>1)
	{
		int_to_float( j, &constant);
		multiply( &constant, &t1, &t1);
		j--;
	}
	reciprical( &t1, &sum);
	startxp = sum.expnt;
	copy( &sum, &t1);
	
/*  when next term exponent is 256 less than starting
	exponent, we have more bits of accuracy.
	j and k increment by 1 each time to create factorial
	terms 1/k! and 1/(n+j)!
*/

	j = n;
	k = 0;
	epsilon = 1;
	while( epsilon > -250)
	{
		j++;
		k++;
		int_to_float( j, &constant);
		divide( &t1, &constant, &t1);
		multiply( &z4, &t1, &t1);
		int_to_float( k, &constant);
		divide( &t1, &constant, &t1);
		add( &t1, &sum, &sum);
		epsilon = t1.expnt - startxp;
	}
	intpwr( &z2, n, &t1);
	multiply( &t1, &sum, y);
}

/* create table of chebyshev polynomials.
	Enter with maximum degree desired and array large
	enough to hold the MULTIPOLY data pointers.
	Returns array of pointers filled and maximum
	degree successfully created.
	
	T(n,x) = 2xT(n-1, x) - T(n-2, x)
*/

int gen_chebyshev( MULTIPOLY *chebary, int degree)
{
	int 		numgen;
	FLOAT	*fptr;
	MULTIPOLY twox, temp;
	
	if( degree < 0) return 0;
		
	twox.degree = 1;
	if( !get_space( &twox)) return 0;
	fptr = Address(twox);
	null( fptr);
	fptr++;
	int_to_float( 2, fptr);
	
	numgen = 0;
	chebary[0].degree = 0;
	if( !get_space( chebary))  goto chebdie;
	fptr = Address( chebary[0]);
	int_to_float( 1, fptr);
	numgen++;
	if (degree < 1) return 1;
	chebary[1].degree = 1;
	if( !get_space( &chebary[1])) goto chebdie;
	fptr = Address( chebary[1]);
	null( fptr);
	fptr++;
	int_to_float( 1, fptr);
	numgen++;
	
	while( numgen <= degree)
	{
		chebary[numgen].degree = numgen;
		if( !get_space( &chebary[numgen])) break;
		multi_mul( twox, chebary[numgen - 1], &temp);
		multi_sub( temp, chebary[numgen - 2], &chebary[numgen]);
		free_space( &temp);
		numgen++;
	}
chebdie:
	free_space( &twox);
	return numgen;
}

/*  compute coefficients of 2^x using Chebyshev polynomials.
	
	2^x  =  I(o, ln(2)) + sum{ 2*I(n, ln(2))*T(n, x)} n = 1... oo
	
	Enter with pointer to table of cheybshev polynomials, 
	maximum degree of approximation and pointer to where
	you want result.
	Returns with power series in x of above, and max degree
	actually computed.
*/

int calc_2x_coef( MULTIPOLY *cheb,  int maxdegree, 
				MULTIPOLY *twoxcoef)
{
	INDEX		i, j;
	FLOAT		ibesl, *iptr;
	MULTIPOLY	tnterm, sum;
	
	char		test[32];
	
	calcln2( &ln2);
	
	sum.degree = 0;
	if( !get_space( &sum))
	{
		printf(" no space left, calc_2x_coef \n");
		return 0;
	}
	iptr = Address(sum);
	bessel (+1, 0, &ln2, iptr);
	for( i=1; i<=maxdegree; i++)
	{
		bessel( +1, i, &ln2, &ibesl);
		ibesl.expnt++;
		multi_dup( cheb[i], &tnterm);
		for( j = (i&1); j<=i; j += 2)
		{
			iptr = Address( tnterm) + j;
			multiply( &ibesl, iptr, iptr);
		}
		if( !multi_add( tnterm, sum, &sum)) break;
		free_space( &tnterm);
	}
	if( i< maxdegree)  free_space( &tnterm);
	else i = maxdegree;
	multi_dup( sum, twoxcoef);
	free_space( &sum);
	return (i);
}

/*  compute coefficients of cos(x*PI/2) using Chebyshev polynomials.
	
	cos(x*P2)  =   J(0, P2) + 2*sum{(-1)^n*J(2n, P2)*T(2n+1, x)} n = 1... oo
	
	Enter with pointer to table of cheybshev polynomials, 
	maximum degree of approximation and pointer to where
	you want result.
	Returns with power series in x^2 of above, and max degree
	actually computed.  cos( x*PI/2) = sum{ a_j * (x^2)^j}.
	
	NOTE:  All formulas in all the books I found are WRONG.  None have
	the (-1)^n factor, but they act like it's there when constructing
	the polynomial.
*/

int calc_cos_coef( MULTIPOLY *cheb,  int maxdegree, 
				MULTIPOLY *coscoef)
{
	INDEX		i, j, k;
	FLOAT		jbesl, *jptr, *kptr;
	MULTIPOLY	tnterm, sum;
	
	char		test[32];

	calcpi( &P2);
	P2.expnt--;
	sum.degree = 0;
	if( !get_space( &sum))
	{
		printf(" no space left, calc_cos_coef \n");
		return 0;
	}
	jptr = Address(sum);
	bessel(-1, 0, &P2, jptr);
	for( i=1; i<=maxdegree/2; i++)
	{
		k = 2*i;
		bessel( -1, k, &P2, &jbesl);
		jbesl.expnt++;
		if( i&1) negate( &jbesl);
		multi_dup( cheb[k], &tnterm);
		jptr = Address( tnterm);
		for( j = 0; j<=k; j += 2)
		{
			multiply( &jbesl, jptr, jptr);
			jptr += 2;
		}
		if( !multi_add( tnterm, sum, &sum)) break;
		free_space( &tnterm);
	}
	if( i< maxdegree/2)  
	{
		free_space( &tnterm);
		i = 2*i;
	}
	else i = maxdegree;

/*  convert to polynomial in x^2  */

	j = sum.degree/2;
	coscoef->degree = j;
	if( !get_space(coscoef))
	{
		printf(" no space for cosine.\n");
		return 0;
	}
	for( k=0; k<=j; k++)
	{
		kptr = AddressOf( coscoef) + k;
		jptr = Address( sum) + 2*k;
		copy( jptr, kptr);
	}
	free_space( &sum);
	return (i);
}

/*  evaluate simple polynomial.
	Input:  MULTIPOLY coefficients, pointer to x, pointer to y
	Output:  y = F(x)
	y can equal x
*/

void polyeval( MULTIPOLY coef, FLOAT *x, FLOAT *y)
{
	INDEX 	i;
	FLOAT	sum, *cof;
	
	null( &sum);
	for( i=coef.degree; i>0; i--)
	{
		cof = Address( coef) + i;
		add( cof, &sum, &sum);
		multiply( x, &sum, &sum);
	}
	cof = Address( coef);
	add( cof, &sum, y);
}

/*  compute 2^x for x in the range -1 ... 1.
	Most inputs will be in range +/- .5 ... 1 but this routine could handle
	unnormalized inputs.
	Enter with pointer to input and storage for output
	Returns y = 2^x ( works in place, both pointers can be the same)
*/

void twoexp( FLOAT *x, FLOAT *y)
{
	polyeval( twoxcoef, x, y);
}

/*  compute cos( x) for x in range +/- PI/2
	As above, works in place.
	This is a *core* routine, no range checking!
*/

void corecos(FLOAT *x, FLOAT *y)
{
	FLOAT	x2;
	
	divide( x, &P2, &x2);
	multiply( &x2, &x2, &x2);
	polyeval( coscoef, &x2, y);
}

/*  convert a float to a long.  Overflow is max
	possible result.
*/

int	float_to_int( FLOAT *f)
{
	FLOAT	dummy, intprt;
	int		value;
	
	if( f->expnt < 1) return 0;
	if( f->expnt > 31)
	{
		if( f->mntsa.e[MS_MNTSA] & SIGN_BIT)
			return SIGN_BIT;
		return ~SIGN_BIT;
	}
	split( f, &intprt, &dummy);
	if( intprt.mntsa.e[MS_MNTSA] & SIGN_BIT)
	{
		negate( &intprt);
		value = -(intprt.mntsa.e[MS_MNTSA] >> ( 31 - intprt.expnt));
	}
	else	value = intprt.mntsa.e[MS_MNTSA] >> ( 31 - intprt.expnt);
	return value;
}
	
/*  compute e^x for any x.  |x| > 2^32/ln(2) will overflow
	and return max possible value and 0.
	Otherwise returns y = exp(x) and 1.
	works in place.
*/

int exp( FLOAT *x, FLOAT *y)
{
	FLOAT	z, xp;
	long		xpnt;
	INDEX	i;
	
/*  convert to base 2  */

	divide( x, &ln2, &z);
	
/*  check range is possible to do  */

	if (z.expnt > 32)
	{
		if( x->mntsa.e[MS_MNTSA] & SIGN_BIT)
			null(y);
		else
		{
			OPLOOP(i) y->mntsa.e[MS_MNTSA] = ~0;
			y->mntsa.e[MS_MNTSA] >>= 1;
			y->expnt = ~0 >> 1;
		}
		return 0;
	}
	
/*  we can perform operation, send z mods 2 to core */

	split(&z, &xp, &z);
	xpnt = float_to_int( &xp);
	twoexp( &z, y);
	
/*  next add xpnt to exponent of y  */

	y->expnt +=  xpnt;
	return 1;
}

/*  split a FLOAT into its integer and fractional parts  */

void split( FLOAT *x, FLOAT *intprt, FLOAT *frac)
{
	FLOAT	fracpart;
	INDEX	i, signflag;
	ELEMENT	mask, xpchk;

/*  if number < 1, return just a fraction  */
	
	if( x->expnt <= 0)
	{
		null( intprt);
		copy( x, frac);
		return;
	}

/*  zero out 31 bits of ms word, then one block at a time */

	copy( x, &fracpart);
	signflag = 0;
	if( fracpart.mntsa.e[MS_MNTSA] & SIGN_BIT)
	{
		negate( &fracpart);
		signflag = 1;
	}
	i = MS_MNTSA;
	xpchk = fracpart.expnt;
	if( xpchk > 31)
	{
		fracpart.mntsa.e[i] = 0;
		i--;
		xpchk -= 31;
	}
	while( (xpchk > 32) && ( i>0 ))
	{
		fracpart.mntsa.e[i] = 0;
		i--;
		xpchk -= 32;
	}

/*  next zero out the remaining integer bits in a left over word  */

	if( i != MS_MNTSA) mask = ~0UL >> xpchk;
	else 	mask = ( ~0UL >> (xpchk+1));
	fracpart.mntsa.e[i] &= mask;
	if( signflag) negate( &fracpart);
	normal( &fracpart);
	subtract( x, &fracpart, intprt);
	copy( &fracpart, frac);
}

/*  compute cosine(x) for any x.
	x values larger than PI*2^200 will be in gross error, so watch out!
	works in place, returns y = cos(x)
*/

void cosine( FLOAT *x, FLOAT *y)
{
	FLOAT	z, PI, dummy, PI3;
	int		cmpr;
	
/*  create 2*PI  */

	copy( &P2, &PI);
	PI.expnt += 2;

/*  check range of input and force modulo 2PI operation  */

	cmpr = compare( x, &PI);
	if( cmpr > 0 )
	{
		divide( x, &PI, &z);
		split( &z, &dummy, &z);
		multiply( &PI, &z, &z);
	}
	else copy( x, &z);
	if( z.mntsa.e[MS_MNTSA] & SIGN_BIT) negate( &z);

/*  z is now in range 0...2PI.  Now convert to range of core cos */

	cmpr = compare( &z, &P2);
	if( cmpr <= 0)
	{
		corecos( &z, y);
		return;
	}
	PI.expnt--;
	add( &PI, &P2, &PI3);	// 3 PI/2
	cmpr = compare( &z, &PI3);
	if( cmpr > 0)
	{
		PI.expnt++;
		subtract( &PI, &z, &z);	// 2PI - x
		corecos( &z, y);
		return;
	}
	subtract( &PI, &z, &z);	// PI - x
	corecos( &z, y);
	negate( y);
}

/*  compute sine(x) for any x.
	same as cosine, jus move arguments around.
	works in place, 
	returns y = sin(x)
*/

void sine( FLOAT *x, FLOAT *y)
{
	FLOAT	z, PI, dummy;
	int		cmpr, signflag;
	
/*  create 2*PI and reduce x modulo 2PI signed  */

	copy( &P2, &PI);
	PI.expnt += 2;
	cmpr = compare( x, &PI);
	if( cmpr > 0)
	{
		divide( x, &PI, &z);
		split( &z, &dummy, &z);
		multiply( &PI, &z, &z);
	}
	else copy( x, &z);
	if( z.mntsa.e[MS_MNTSA] & SIGN_BIT)
	{
		negate( &z);
		signflag = 1;
	}
	else signflag = 0;

/*  z is no in range 0... 2*PI.
	if bigger than PI, flip sign of result and fold back to 0..PI,
	then subtract PI/2 to put into corecos range.
*/
	PI.expnt--;
	cmpr = compare( &z, &PI);
	if( cmpr > 0)
	{
		signflag ^= 1;
		subtract( &z, &PI, &z);
	}
	subtract( &z, &P2, &z);
	corecos( &z, y);
	if( signflag) negate(y);
}