Re: [hatari-devel] FPU update

[ Thread Index | Date Index | More lists.tuxfamily.org/hatari-devel Archives ]


Le 14/04/2017 à 16:38, Andreas Grabher a écrit :
Hello all, hello Douglas,

i have translated FPSP algorithms using SoftFloat functions. Now all FPU functions are emulated without using floats and thus absolutely independent from host floating point formats. Before releasing the should be tested.

Douglas, can you send the version of your test utility that supported testing transcendental functions (sin, cos, tan, etc) again? We might have lost that version. The latest code is already in WinUAE source repository and in Previous source repository (note that WinUAE version is license safe, while Previous is not).


Hi

thanks for your work (and to Toni for merging this into WinUAE).

I just updated Hatari latest cpu from WinUAE 3.4.1 beta 6, so people can test your changes in the devel version.

By the way, while porting changes from WinUAE to Hatari, I had several errors/warnings when compiling. I made some patch files for each type of problem, could you (and Toni) consider merging them in Previous/WinUAE to ease porting later ?

There're 6 patches ; they should be applied in the order fpu.1_xxx to fpu.6_xxx

Only patch fpu.1 was applied to Hatari source for now, other patches are waiting for your opinion.

--------------------------------------

fpu.1_error_static_inline.patch : required to compile under Hatari, as some functions are declared both static and non-static and gcc will fail with Hatari flags.

eg :
/home/npomarede/src/hatari-fpu/src/cpu/softfloat/softfloat-specialize.h:363:24: error: static declaration of 'propagateFloatx80NaNOneArg' follows non-static declaration static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status)
                        ^
In file included from /home/npomarede/src/hatari-fpu/src/cpu/newcpu.h:44:0,
from /home/npomarede/src/hatari-fpu/src/cpu/fpp_softfloat.c:26: /home/npomarede/src/hatari-fpu/src/cpu/softfloat/softfloat.h:454:10: note: previous declaration of 'propagateFloatx80NaNOneArg' was here
 floatx80 propagateFloatx80NaNOneArg( floatx80 a, float_status *status );
          ^
[...]

--------------------------------------

fpu.2_warning_unused.patch : defined but not used variables, some variables should be moved inside the "USE_LONG_DOUBLE" ifdef

eg :
/home/npomarede/src/hatari-fpu/src/cpu/fpp_native.c:52:16: warning: 'xhex_pi' defined but not used [-Wunused-variable]
 static uae_u32 xhex_pi[]    ={0x2168c235, 0xc90fdaa2, 0x4000};

--------------------------------------

fpu.3_warning_unused.patch : more unused varirables from above, except "fp_nan" all other variables are in fact unused and were commented out. Maybe they could be removed in case they were used for older code ?

eg :
home/npomarede/src/hatari-fpu/src/cpu/fpp_native.c:92:16: warning: 'fp_pi' defined but not used [-Wunused-variable]
 static double *fp_pi     = (double *)dhex_pi;

--------------------------------------

fpu.4_warning_unused_function.patch : some unused functions, commented with "#if 0"

eg :
/home/npomarede/src/hatari-fpu/src/cpu/fpp_softfloat.c:291:17: warning: 'fp_to_sgl' defined but not used [-Wunused-function]
 static floatx80 fp_to_sgl(floatx80 a)

--------------------------------------

fpu.5_warning_or_bug_uninitialized_var.patch : pack_int can be uninitialized if kfactor can be "0" when entering function (which seems possible ?). Adding "pack_int=0" fixes this.

/home/npomarede/src/hatari-fpu/src/cpu/fpp_softfloat.c: In function 'fp_from_pack': /home/npomarede/src/hatari-fpu/src/cpu/fpp_softfloat.c:959:10: warning: 'pack_int' may be used uninitialized in this function [-Wmaybe-uninitialized]
   wrd[0] |= pack_int;

--------------------------------------

fpu.6_warning_prototype.patch : missing prototypes for some functions, I also added "static" as they're just used in softfloat_decimal.cpp

eg:
/home/npomarede/src/hatari-fpu/src/cpu/softfloat/softfloat_decimal.c:29:6: warning: no previous prototype for 'round128to64' [-Wmissing-prototypes] void round128to64(flag aSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, float_status *status)

--------------------------------------

Those are just "cosmetic" changes with no code impact ; there're also some warning about signed/unsigned comparisons or other unused functions that I can post later.


Nicolas



--- softfloat.h.orig	2017-04-19 11:21:11.575096338 +0200
+++ softfloat.h	2017-04-19 11:33:14.841606239 +0200
@@ -403,13 +403,6 @@
 flag floatx80_lt( floatx80, floatx80, float_status *status);
 
 #ifdef SOFTFLOAT_68K
-flag floatx80_is_zero( floatx80 );
-flag floatx80_is_infinity( floatx80 );
-flag floatx80_is_negative( floatx80 );
-flag floatx80_is_denormal( floatx80 );
-flag floatx80_is_unnormal( floatx80 );
-flag floatx80_is_normal( floatx80 );
-
 // functions are in softfloat.c
 floatx80 floatx80_move( floatx80 a, float_status *status );
 floatx80 floatx80_abs( floatx80 a, float_status *status );
@@ -450,9 +443,6 @@
 floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig );
 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status);
 
-// functions are in softfloat-specialize.h
-floatx80 propagateFloatx80NaNOneArg( floatx80 a, float_status *status );
-floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b, float_status *status );
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE extended double-precision operations.
 *----------------------------------------------------------------------------*/
@@ -462,7 +452,6 @@
 floatx80 floatx80_mul(floatx80, floatx80, float_status *status);
 floatx80 floatx80_div(floatx80, floatx80, float_status *status);
 floatx80 floatx80_sqrt(floatx80, float_status *status);
-flag floatx80_is_signaling_nan(floatx80);
 floatx80 floatx80_normalize(floatx80);
 floatx80 floatx80_denormalize(floatx80, flag);
 
@@ -498,9 +487,4 @@
 #define floatx80_half make_floatx80(0x3ffe, 0x8000000000000000LL)
 #define floatx80_infinity make_floatx80(0x7fff, 0x8000000000000000LL)
 
-/*----------------------------------------------------------------------------
-| The pattern for a default generated extended double-precision NaN.
-*----------------------------------------------------------------------------*/
-floatx80 floatx80_default_nan(float_status *status);
-
 #endif /* SOFTFLOAT_H */
--- softfloat-specialize.h.orig	2017-04-19 11:32:09.599752601 +0200
+++ softfloat-specialize.h	2017-04-19 11:37:23.286047292 +0200
@@ -285,6 +285,24 @@
     }
 
 }
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+static inline flag floatx80_is_signaling_nan( floatx80 a )
+{
+    uint64_t aLow;
+
+    aLow = a.low & ~ LIT64( 0x4000000000000000 );
+    return
+           ( ( a.high & 0x7FFF ) == 0x7FFF )
+        && (uint64_t) ( aLow<<1 )
+        && ( a.low == aLow );
+
+}
+
 /*----------------------------------------------------------------------------
 | Returns the result of converting the extended double-precision floating-
 | point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
@@ -370,23 +388,6 @@
 }
 #endif
 
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is a
-| signaling NaN; otherwise returns 0.
-*----------------------------------------------------------------------------*/
-
-static flag floatx80_is_signaling_nan( floatx80 a )
-{
-    uint64_t aLow;
-
-    aLow = a.low & ~ LIT64( 0x4000000000000000 );
-    return
-           ( ( a.high & 0x7FFF ) == 0x7FFF )
-        && (uint64_t) ( aLow<<1 )
-        && ( a.low == aLow );
-
-}
-
 // 28-12-2016: Added for Previous:
 
 /*----------------------------------------------------------------------------
--- fpp_native.c.orig	2017-04-19 12:38:30.823202122 +0200
+++ fpp_native.c	2017-04-19 12:38:38.773184890 +0200
@@ -49,13 +49,13 @@
 float  fp_1e0 = 1, fp_1e1 = 10, fp_1e2 = 100, fp_1e4 = 10000;
 #endif
 
+#ifdef USE_LONG_DOUBLE
 static uae_u32 xhex_pi[]    ={0x2168c235, 0xc90fdaa2, 0x4000};
 static uae_u32 xhex_l2_e[]  ={0x5c17f0bc, 0xb8aa3b29, 0x3fff};
 static uae_u32 xhex_ln_2[]  ={0xd1cf79ac, 0xb17217f7, 0x3ffe};
 static uae_u32 xhex_inf[]   ={0x00000000, 0x00000000, 0x7fff};
 static uae_u32 xhex_nan[]   ={0xffffffff, 0xffffffff, 0x7fff};
 static uae_u32 xhex_snan[]  ={0xffffffff, 0xbfffffff, 0x7fff};
-#ifdef USE_LONG_DOUBLE
 static long double *fp_pi     = (long double *)xhex_pi;
 static long double *fp_exp_1  = (long double *)xhex_exp_1;
 static long double *fp_l2_e   = (long double *)xhex_l2_e;
--- fpp_native.c.orig	2017-04-19 12:39:32.234068960 +0200
+++ fpp_native.c	2017-04-19 12:42:23.392697163 +0200
@@ -50,62 +50,62 @@
 #endif
 
 #ifdef USE_LONG_DOUBLE
-static uae_u32 xhex_pi[]    ={0x2168c235, 0xc90fdaa2, 0x4000};
-static uae_u32 xhex_l2_e[]  ={0x5c17f0bc, 0xb8aa3b29, 0x3fff};
-static uae_u32 xhex_ln_2[]  ={0xd1cf79ac, 0xb17217f7, 0x3ffe};
-static uae_u32 xhex_inf[]   ={0x00000000, 0x00000000, 0x7fff};
+//static uae_u32 xhex_pi[]    ={0x2168c235, 0xc90fdaa2, 0x4000};
+//static uae_u32 xhex_l2_e[]  ={0x5c17f0bc, 0xb8aa3b29, 0x3fff};
+//static uae_u32 xhex_ln_2[]  ={0xd1cf79ac, 0xb17217f7, 0x3ffe};
+//static uae_u32 xhex_inf[]   ={0x00000000, 0x00000000, 0x7fff};
 static uae_u32 xhex_nan[]   ={0xffffffff, 0xffffffff, 0x7fff};
-static uae_u32 xhex_snan[]  ={0xffffffff, 0xbfffffff, 0x7fff};
-static long double *fp_pi     = (long double *)xhex_pi;
-static long double *fp_exp_1  = (long double *)xhex_exp_1;
-static long double *fp_l2_e   = (long double *)xhex_l2_e;
-static long double *fp_ln_2   = (long double *)xhex_ln_2;
-static long double *fp_ln_10  = (long double *)xhex_ln_10;
-static long double *fp_l10_2  = (long double *)xhex_l10_2;
-static long double *fp_l10_e  = (long double *)xhex_l10_e;
-static long double *fp_1e16   = (long double *)xhex_1e16;
-static long double *fp_1e32   = (long double *)xhex_1e32;
-static long double *fp_1e64   = (long double *)xhex_1e64;
-static long double *fp_1e128  = (long double *)xhex_1e128;
-static long double *fp_1e256  = (long double *)xhex_1e256;
-static long double *fp_1e512  = (long double *)xhex_1e512;
-static long double *fp_1e1024 = (long double *)xhex_1e1024;
-static long double *fp_1e2048 = (long double *)xhex_1e2048;
-static long double *fp_1e4096 = (long double *)xhex_1e4096;
-static long double *fp_inf    = (long double *)xhex_inf;
+//static uae_u32 xhex_snan[]  ={0xffffffff, 0xbfffffff, 0x7fff};
+//static long double *fp_pi     = (long double *)xhex_pi;
+//static long double *fp_exp_1  = (long double *)xhex_exp_1;
+//static long double *fp_l2_e   = (long double *)xhex_l2_e;
+//static long double *fp_ln_2   = (long double *)xhex_ln_2;
+//static long double *fp_ln_10  = (long double *)xhex_ln_10;
+//static long double *fp_l10_2  = (long double *)xhex_l10_2;
+//static long double *fp_l10_e  = (long double *)xhex_l10_e;
+//static long double *fp_1e16   = (long double *)xhex_1e16;
+//static long double *fp_1e32   = (long double *)xhex_1e32;
+//static long double *fp_1e64   = (long double *)xhex_1e64;
+//static long double *fp_1e128  = (long double *)xhex_1e128;
+//static long double *fp_1e256  = (long double *)xhex_1e256;
+//static long double *fp_1e512  = (long double *)xhex_1e512;
+//static long double *fp_1e1024 = (long double *)xhex_1e1024;
+//static long double *fp_1e2048 = (long double *)xhex_1e2048;
+//static long double *fp_1e4096 = (long double *)xhex_1e4096;
+//static long double *fp_inf    = (long double *)xhex_inf;
 static long double *fp_nan    = (long double *)xhex_nan;
 #else
-static uae_u32 dhex_pi[]    ={0x54442D18, 0x400921FB};
-static uae_u32 dhex_exp_1[] ={0x8B145769, 0x4005BF0A};
-static uae_u32 dhex_l2_e[]  ={0x652B82FE, 0x3FF71547};
-static uae_u32 dhex_ln_2[]  ={0xFEFA39EF, 0x3FE62E42};
-static uae_u32 dhex_ln_10[] ={0xBBB55516, 0x40026BB1};
-static uae_u32 dhex_l10_2[] ={0x509F79FF, 0x3FD34413};
-static uae_u32 dhex_l10_e[] ={0x1526E50E, 0x3FDBCB7B};
-static uae_u32 dhex_1e16[]  ={0x37E08000, 0x4341C379};
-static uae_u32 dhex_1e32[]  ={0xB5056E17, 0x4693B8B5};
-static uae_u32 dhex_1e64[]  ={0xE93FF9F5, 0x4D384F03};
-static uae_u32 dhex_1e128[] ={0xF9301D32, 0x5A827748};
-static uae_u32 dhex_1e256[] ={0x7F73BF3C, 0x75154FDD};
-static uae_u32 dhex_inf[]   ={0x00000000, 0x7ff00000};
+//static uae_u32 dhex_pi[]    ={0x54442D18, 0x400921FB};
+//static uae_u32 dhex_exp_1[] ={0x8B145769, 0x4005BF0A};
+//static uae_u32 dhex_l2_e[]  ={0x652B82FE, 0x3FF71547};
+//static uae_u32 dhex_ln_2[]  ={0xFEFA39EF, 0x3FE62E42};
+//static uae_u32 dhex_ln_10[] ={0xBBB55516, 0x40026BB1};
+//static uae_u32 dhex_l10_2[] ={0x509F79FF, 0x3FD34413};
+//static uae_u32 dhex_l10_e[] ={0x1526E50E, 0x3FDBCB7B};
+//static uae_u32 dhex_1e16[]  ={0x37E08000, 0x4341C379};
+//static uae_u32 dhex_1e32[]  ={0xB5056E17, 0x4693B8B5};
+//static uae_u32 dhex_1e64[]  ={0xE93FF9F5, 0x4D384F03};
+//static uae_u32 dhex_1e128[] ={0xF9301D32, 0x5A827748};
+//static uae_u32 dhex_1e256[] ={0x7F73BF3C, 0x75154FDD};
+//static uae_u32 dhex_inf[]   ={0x00000000, 0x7ff00000};
 static uae_u32 dhex_nan[]   ={0xffffffff, 0x7fffffff};
-static double *fp_pi     = (double *)dhex_pi;
-static double *fp_exp_1  = (double *)dhex_exp_1;
-static double *fp_l2_e   = (double *)dhex_l2_e;
-static double *fp_ln_2   = (double *)dhex_ln_2;
-static double *fp_ln_10  = (double *)dhex_ln_10;
-static double *fp_l10_2  = (double *)dhex_l10_2;
-static double *fp_l10_e  = (double *)dhex_l10_e;
-static double *fp_1e16   = (double *)dhex_1e16;
-static double *fp_1e32   = (double *)dhex_1e32;
-static double *fp_1e64   = (double *)dhex_1e64;
-static double *fp_1e128  = (double *)dhex_1e128;
-static double *fp_1e256  = (double *)dhex_1e256;
-static double *fp_1e512  = (double *)dhex_inf;
-static double *fp_1e1024 = (double *)dhex_inf;
-static double *fp_1e2048 = (double *)dhex_inf;
-static double *fp_1e4096 = (double *)dhex_inf;
-static double *fp_inf    = (double *)dhex_inf;
+//static double *fp_pi     = (double *)dhex_pi;
+//static double *fp_exp_1  = (double *)dhex_exp_1;
+//static double *fp_l2_e   = (double *)dhex_l2_e;
+//static double *fp_ln_2   = (double *)dhex_ln_2;
+//static double *fp_ln_10  = (double *)dhex_ln_10;
+//static double *fp_l10_2  = (double *)dhex_l10_2;
+//static double *fp_l10_e  = (double *)dhex_l10_e;
+//static double *fp_1e16   = (double *)dhex_1e16;
+//static double *fp_1e32   = (double *)dhex_1e32;
+//static double *fp_1e64   = (double *)dhex_1e64;
+//static double *fp_1e128  = (double *)dhex_1e128;
+//static double *fp_1e256  = (double *)dhex_1e256;
+//static double *fp_1e512  = (double *)dhex_inf;
+//static double *fp_1e1024 = (double *)dhex_inf;
+//static double *fp_1e2048 = (double *)dhex_inf;
+//static double *fp_1e4096 = (double *)dhex_inf;
+//static double *fp_inf    = (double *)dhex_inf;
 static double *fp_nan    = (double *)dhex_nan;
 #endif
 static const double twoto32 = 4294967296.0;
--- fpp_softfloat.c.orig	2017-04-19 12:52:58.345309972 +0200
+++ fpp_softfloat.c	2017-04-19 12:54:12.859146419 +0200
@@ -288,6 +288,7 @@
 
 /* Functions for rounding */
 
+#if 0
 static floatx80 fp_to_sgl(floatx80 a)
 {
 	floatx80 v = floatx80_round32(a, &fs);
@@ -295,6 +296,7 @@
 	v.high |= a.high & 0x7fff;
 	return v;
 }
+#endif
 
 // round to float with extended precision exponent
 static void fp_round32(fpdata *fpd)
@@ -320,6 +322,7 @@
 	fpd->fpx = floatx80_round_to_float64(fpd->fpx, &fs);
 }
 
+#if 0
 // round to selected precision
 static void fp_round(fpdata *a)
 {
@@ -334,6 +337,7 @@
 		break;
 	}
 }
+#endif
 
 /* Arithmetic functions */
 
--- fpp_softfloat.c.orig	2017-04-19 12:56:27.270849313 +0200
+++ fpp_softfloat.c	2017-04-19 12:57:27.928714888 +0200
@@ -928,6 +928,7 @@
 		exponent = f.high & 0x3FFF;
 		significand = f.low;
 		
+		pack_int = 0;
 		pack_frac = 0;
 		len = kfactor; // SoftFloat saved len to kfactor variable
 		while (len > 0) {
--- softfloat/softfloat_decimal.c.orig	2017-04-19 13:08:22.481115392 +0200
+++ softfloat/softfloat_decimal.c	2017-04-19 14:12:17.265135286 +0200
@@ -22,11 +22,22 @@
 #include "softfloat-macros.h"
 #include "softfloat/softfloat-specialize.h"
 
+
+static void round128to64(flag aSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, float_status *status);
+static void mul128by128round(int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1, float_status *status);
+static void mul128by128(int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1);
+static void div128by128(int32_t *paExp, uint64_t *paSig0, uint64_t *paSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1);
+static void tentoint128(flag mSign, flag eSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t scale, float_status *status);
+static int64_t tentointdec(int32_t scale);
+static int64_t float128toint64(flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status);
+static int32_t getDecimalExponent(int32_t aExp, uint64_t aSig);
+
+
 /*----------------------------------------------------------------------------
 | Methods for converting decimal floats to binary extended precision floats.
 *----------------------------------------------------------------------------*/
 
-void round128to64(flag aSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, float_status *status)
+static void round128to64(flag aSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, float_status *status)
 {
 	flag increment;
 	int32_t zExp;
@@ -66,7 +77,7 @@
 	*aSig1 = 0;
 }
 
-void mul128by128round(int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1, float_status *status)
+static void mul128by128round(int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1, float_status *status)
 {
 	int32_t zExp;
 	uint64_t zSig0, zSig1, zSig2, zSig3;
@@ -91,7 +102,7 @@
 	round128to64(0, aExp, aSig0, aSig1, status);
 }
 
-void mul128by128(int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1)
+static void mul128by128(int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1)
 {
 	int32_t zExp;
 	uint64_t zSig0, zSig1, zSig2, zSig3;
@@ -112,7 +123,7 @@
 	*aSig1 = zSig1;
 }
 
-void div128by128(int32_t *paExp, uint64_t *paSig0, uint64_t *paSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1)
+static void div128by128(int32_t *paExp, uint64_t *paSig0, uint64_t *paSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1)
 {
 	int32_t zExp, aExp;
 	uint64_t zSig0, zSig1, aSig0, aSig1;
@@ -178,7 +189,7 @@
 
 #else
 
-void tentoint128(flag mSign, flag eSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t scale, float_status *status)
+static void tentoint128(flag mSign, flag eSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t scale, float_status *status)
  {
     int8_t save_rounding_mode;
     int32_t mExp;
@@ -230,7 +241,7 @@
 
 #endif
 
-int64_t tentointdec(int32_t scale)
+static int64_t tentointdec(int32_t scale)
 {
 	uint64_t decM, decX;
 	 
@@ -249,7 +260,7 @@
 }
 
 
-int64_t float128toint64(flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status)
+static int64_t float128toint64(flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status)
 {
 	int8_t roundingMode;
 	flag roundNearestEven, increment;
@@ -280,7 +291,7 @@
 	return z;
 }
 
-int32_t getDecimalExponent(int32_t aExp, uint64_t aSig)
+static int32_t getDecimalExponent(int32_t aExp, uint64_t aSig)
 {
 	flag zSign;
 	int32_t zExp, shiftCount;


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/