Library Flocq.Appli.Fappli_IEEE
This file is part of the Flocq formalization of floating-point
arithmetic in Coq: http://flocq.gforge.inria.fr/
Copyright (C) 2010-2013 Sylvie Boldo
Copyright (C) 2010-2013 Guillaume Melquiond
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
COPYING file for more details.
Copyright (C) 2010-2013 Guillaume Melquiond
IEEE-754 arithmetic
Require Import Fcore.
Require Import Fcore_digits.
Require Import Fcalc_digits.
Require Import Fcalc_round.
Require Import Fcalc_bracket.
Require Import Fcalc_ops.
Require Import Fcalc_div.
Require Import Fcalc_sqrt.
Require Import Fprop_relative.
Section AnyRadix.
Inductive full_float :=
| F754_zero : bool → full_float
| F754_infinity : bool → full_float
| F754_nan : bool → positive → full_float
| F754_finite : bool → positive → Z → full_float.
Definition FF2R beta x :=
match x with
| F754_finite s m e ⇒ F2R (Float beta (cond_Zopp s (Zpos m)) e)
| _ ⇒ R0
end.
End AnyRadix.
Section Binary.
Implicit Arguments exist [[A] [P]].
Require Import Fcore_digits.
Require Import Fcalc_digits.
Require Import Fcalc_round.
Require Import Fcalc_bracket.
Require Import Fcalc_ops.
Require Import Fcalc_div.
Require Import Fcalc_sqrt.
Require Import Fprop_relative.
Section AnyRadix.
Inductive full_float :=
| F754_zero : bool → full_float
| F754_infinity : bool → full_float
| F754_nan : bool → positive → full_float
| F754_finite : bool → positive → Z → full_float.
Definition FF2R beta x :=
match x with
| F754_finite s m e ⇒ F2R (Float beta (cond_Zopp s (Zpos m)) e)
| _ ⇒ R0
end.
End AnyRadix.
Section Binary.
Implicit Arguments exist [[A] [P]].
prec is the number of bits of the mantissa including the implicit one
emax is the exponent of the infinities
Typically p=24 and emax = 128 in single precision
Variable prec emax : Z.
Context (prec_gt_0_ : Prec_gt_0 prec).
Hypothesis Hmax : (prec < emax)%Z.
Let emin := (3 - emax - prec)%Z.
Let fexp := FLT_exp emin prec.
Instance fexp_correct : Valid_exp fexp := FLT_exp_valid emin prec.
Instance fexp_monotone : Monotone_exp fexp := FLT_exp_monotone emin prec.
Definition canonic_mantissa m e :=
Zeq_bool (fexp (Z_of_nat (S (digits2_Pnat m)) + e)) e.
Definition bounded m e :=
andb (canonic_mantissa m e) (Zle_bool e (emax - prec)).
Definition valid_binary x :=
match x with
| F754_finite _ m e ⇒ bounded m e
| F754_nan _ pl ⇒ (Z_of_nat' (S (digits2_Pnat pl)) <? prec)%Z
| _ ⇒ true
end.
Context (prec_gt_0_ : Prec_gt_0 prec).
Hypothesis Hmax : (prec < emax)%Z.
Let emin := (3 - emax - prec)%Z.
Let fexp := FLT_exp emin prec.
Instance fexp_correct : Valid_exp fexp := FLT_exp_valid emin prec.
Instance fexp_monotone : Monotone_exp fexp := FLT_exp_monotone emin prec.
Definition canonic_mantissa m e :=
Zeq_bool (fexp (Z_of_nat (S (digits2_Pnat m)) + e)) e.
Definition bounded m e :=
andb (canonic_mantissa m e) (Zle_bool e (emax - prec)).
Definition valid_binary x :=
match x with
| F754_finite _ m e ⇒ bounded m e
| F754_nan _ pl ⇒ (Z_of_nat' (S (digits2_Pnat pl)) <? prec)%Z
| _ ⇒ true
end.
Basic type used for representing binary FP numbers.
Note that there is exactly one such object per FP datum.
NaNs do not have any payload. They cannot be distinguished.
Definition nan_pl := {pl | (Z_of_nat' (S (digits2_Pnat pl)) <? prec)%Z = true}.
Inductive binary_float :=
| B754_zero : bool → binary_float
| B754_infinity : bool → binary_float
| B754_nan : bool → nan_pl → binary_float
| B754_finite : bool →
∀ (m : positive) (e : Z), bounded m e = true → binary_float.
Definition FF2B x :=
match x as x return valid_binary x = true → binary_float with
| F754_finite s m e ⇒ B754_finite s m e
| F754_infinity s ⇒ fun _ ⇒ B754_infinity s
| F754_zero s ⇒ fun _ ⇒ B754_zero s
| F754_nan b pl ⇒ fun H ⇒ B754_nan b (exist pl H)
end.
Definition B2FF x :=
match x with
| B754_finite s m e _ ⇒ F754_finite s m e
| B754_infinity s ⇒ F754_infinity s
| B754_zero s ⇒ F754_zero s
| B754_nan b (exist pl _) ⇒ F754_nan b pl
end.
Definition radix2 := Build_radix 2 (refl_equal true).
Definition B2R f :=
match f with
| B754_finite s m e _ ⇒ F2R (Float radix2 (cond_Zopp s (Zpos m)) e)
| _ ⇒ R0
end.
Theorem FF2R_B2FF :
∀ x,
FF2R radix2 (B2FF x) = B2R x.
Theorem B2FF_FF2B :
∀ x Hx,
B2FF (FF2B x Hx) = x.
Theorem valid_binary_B2FF :
∀ x,
valid_binary (B2FF x) = true.
Theorem FF2B_B2FF :
∀ x H,
FF2B (B2FF x) H = x.
Theorem FF2B_B2FF_valid :
∀ x,
FF2B (B2FF x) (valid_binary_B2FF x) = x.
Theorem B2R_FF2B :
∀ x Hx,
B2R (FF2B x Hx) = FF2R radix2 x.
Theorem match_FF2B :
∀ {T} fz fi fn ff x Hx,
match FF2B x Hx return T with
| B754_zero sx ⇒ fz sx
| B754_infinity sx ⇒ fi sx
| B754_nan b (exist p _) ⇒ fn b p
| B754_finite sx mx ex _ ⇒ ff sx mx ex
end =
match x with
| F754_zero sx ⇒ fz sx
| F754_infinity sx ⇒ fi sx
| F754_nan b p ⇒ fn b p
| F754_finite sx mx ex ⇒ ff sx mx ex
end.
Theorem canonic_canonic_mantissa :
∀ (sx : bool) mx ex,
canonic_mantissa mx ex = true →
canonic radix2 fexp (Float radix2 (cond_Zopp sx (Zpos mx)) ex).
Theorem generic_format_B2R :
∀ x,
generic_format radix2 fexp (B2R x).
Theorem FLT_format_B2R :
∀ x,
FLT_format radix2 emin prec (B2R x).
Theorem B2FF_inj :
∀ x y : binary_float,
B2FF x = B2FF y →
x = y.
Definition is_finite_strict f :=
match f with
| B754_finite _ _ _ _ ⇒ true
| _ ⇒ false
end.
Theorem B2R_inj:
∀ x y : binary_float,
is_finite_strict x = true →
is_finite_strict y = true →
B2R x = B2R y →
x = y.
Definition Bsign x :=
match x with
| B754_nan s _ ⇒ s
| B754_zero s ⇒ s
| B754_infinity s ⇒ s
| B754_finite s _ _ _ ⇒ s
end.
Definition sign_FF x :=
match x with
| F754_nan s _ ⇒ s
| F754_zero s ⇒ s
| F754_infinity s ⇒ s
| F754_finite s _ _ ⇒ s
end.
Theorem Bsign_FF2B :
∀ x H,
Bsign (FF2B x H) = sign_FF x.
Definition is_finite f :=
match f with
| B754_finite _ _ _ _ ⇒ true
| B754_zero _ ⇒ true
| _ ⇒ false
end.
Definition is_finite_FF f :=
match f with
| F754_finite _ _ _ ⇒ true
| F754_zero _ ⇒ true
| _ ⇒ false
end.
Theorem is_finite_FF2B :
∀ x Hx,
is_finite (FF2B x Hx) = is_finite_FF x.
Theorem is_finite_FF_B2FF :
∀ x,
is_finite_FF (B2FF x) = is_finite x.
Theorem B2R_Bsign_inj:
∀ x y : binary_float,
is_finite x = true →
is_finite y = true →
B2R x = B2R y →
Bsign x = Bsign y →
x = y.
Definition is_nan f :=
match f with
| B754_nan _ _ ⇒ true
| _ ⇒ false
end.
Definition is_nan_FF f :=
match f with
| F754_nan _ _ ⇒ true
| _ ⇒ false
end.
Theorem is_nan_FF2B :
∀ x Hx,
is_nan (FF2B x Hx) = is_nan_FF x.
Theorem is_nan_FF_B2FF :
∀ x,
is_nan_FF (B2FF x) = is_nan x.
Definition Bopp opp_nan x :=
match x with
| B754_nan sx plx ⇒
let '(sres, plres) := opp_nan sx plx in B754_nan sres plres
| B754_infinity sx ⇒ B754_infinity (negb sx)
| B754_finite sx mx ex Hx ⇒ B754_finite (negb sx) mx ex Hx
| B754_zero sx ⇒ B754_zero (negb sx)
end.
Theorem Bopp_involutive :
∀ opp_nan x,
is_nan x = false →
Bopp opp_nan (Bopp opp_nan x) = x.
Theorem B2R_Bopp :
∀ opp_nan x,
B2R (Bopp opp_nan x) = (- B2R x)%R.
Theorem is_finite_Bopp :
∀ opp_nan x,
is_finite (Bopp opp_nan x) = is_finite x.
Theorem bounded_lt_emax :
∀ mx ex,
bounded mx ex = true →
(F2R (Float radix2 (Zpos mx) ex) < bpow radix2 emax)%R.
Theorem abs_B2R_lt_emax :
∀ x,
(Rabs (B2R x) < bpow radix2 emax)%R.
Theorem bounded_canonic_lt_emax :
∀ mx ex,
canonic radix2 fexp (Float radix2 (Zpos mx) ex) →
(F2R (Float radix2 (Zpos mx) ex) < bpow radix2 emax)%R →
bounded mx ex = true.
mantissa, round and sticky bits
Record shr_record := { shr_m : Z ; shr_r : bool ; shr_s : bool }.
Definition shr_1 mrs :=
let '(Build_shr_record m r s) := mrs in
let s := orb r s in
match m with
| Z0 ⇒ Build_shr_record Z0 false s
| Zpos xH ⇒ Build_shr_record Z0 true s
| Zpos (xO p) ⇒ Build_shr_record (Zpos p) false s
| Zpos (xI p) ⇒ Build_shr_record (Zpos p) true s
| Zneg xH ⇒ Build_shr_record Z0 true s
| Zneg (xO p) ⇒ Build_shr_record (Zneg p) false s
| Zneg (xI p) ⇒ Build_shr_record (Zneg p) true s
end.
Definition loc_of_shr_record mrs :=
match mrs with
| Build_shr_record _ false false ⇒ loc_Exact
| Build_shr_record _ false true ⇒ loc_Inexact Lt
| Build_shr_record _ true false ⇒ loc_Inexact Eq
| Build_shr_record _ true true ⇒ loc_Inexact Gt
end.
Definition shr_record_of_loc m l :=
match l with
| loc_Exact ⇒ Build_shr_record m false false
| loc_Inexact Lt ⇒ Build_shr_record m false true
| loc_Inexact Eq ⇒ Build_shr_record m true false
| loc_Inexact Gt ⇒ Build_shr_record m true true
end.
Theorem shr_m_shr_record_of_loc :
∀ m l,
shr_m (shr_record_of_loc m l) = m.
Theorem loc_of_shr_record_of_loc :
∀ m l,
loc_of_shr_record (shr_record_of_loc m l) = l.
Definition shr mrs e n :=
match n with
| Zpos p ⇒ (iter_pos p _ shr_1 mrs, (e + n)%Z)
| _ ⇒ (mrs, e)
end.
Lemma inbetween_shr_1 :
∀ x mrs e,
(0 ≤ shr_m mrs)%Z →
inbetween_float radix2 (shr_m mrs) e x (loc_of_shr_record mrs) →
inbetween_float radix2 (shr_m (shr_1 mrs)) (e + 1) x (loc_of_shr_record (shr_1 mrs)).
Theorem inbetween_shr :
∀ x m e l n,
(0 ≤ m)%Z →
inbetween_float radix2 m e x l →
let '(mrs, e') := shr (shr_record_of_loc m l) e n in
inbetween_float radix2 (shr_m mrs) e' x (loc_of_shr_record mrs).
Definition Zdigits2 m :=
match m with Z0 ⇒ m | Zpos p ⇒ Z_of_nat (S (digits2_Pnat p)) | Zneg p ⇒ Z_of_nat (S (digits2_Pnat p)) end.
Theorem Zdigits2_Zdigits :
∀ m,
Zdigits2 m = Zdigits radix2 m.
Definition shr_fexp m e l :=
shr (shr_record_of_loc m l) e (fexp (Zdigits2 m + e) - e).
Theorem shr_truncate :
∀ m e l,
(0 ≤ m)%Z →
shr_fexp m e l =
let '(m', e', l') := truncate radix2 fexp (m, e, l) in (shr_record_of_loc m' l', e').
Inductive mode := mode_NE | mode_ZR | mode_DN | mode_UP | mode_NA.
Definition round_mode m :=
match m with
| mode_NE ⇒ ZnearestE
| mode_ZR ⇒ Ztrunc
| mode_DN ⇒ Zfloor
| mode_UP ⇒ Zceil
| mode_NA ⇒ ZnearestA
end.
Definition choice_mode m sx mx lx :=
match m with
| mode_NE ⇒ cond_incr (round_N (negb (Zeven mx)) lx) mx
| mode_ZR ⇒ mx
| mode_DN ⇒ cond_incr (round_sign_DN sx lx) mx
| mode_UP ⇒ cond_incr (round_sign_UP sx lx) mx
| mode_NA ⇒ cond_incr (round_N true lx) mx
end.
Global Instance valid_rnd_round_mode : ∀ m, Valid_rnd (round_mode m).
Definition overflow_to_inf m s :=
match m with
| mode_NE ⇒ true
| mode_NA ⇒ true
| mode_ZR ⇒ false
| mode_UP ⇒ negb s
| mode_DN ⇒ s
end.
Definition binary_overflow m s :=
if overflow_to_inf m s then F754_infinity s
else F754_finite s (match (Zpower 2 prec - 1)%Z with Zpos p ⇒ p | _ ⇒ xH end) (emax - prec).
Definition binary_round_aux mode sx mx ex lx :=
let '(mrs', e') := shr_fexp (Zpos mx) ex lx in
let '(mrs'', e'') := shr_fexp (choice_mode mode sx (shr_m mrs') (loc_of_shr_record mrs')) e' loc_Exact in
match shr_m mrs'' with
| Z0 ⇒ F754_zero sx
| Zpos m ⇒ if Zle_bool e'' (emax - prec) then F754_finite sx m e'' else binary_overflow mode sx
| _ ⇒ F754_nan false xH
end.
Theorem binary_round_aux_correct :
∀ mode x mx ex lx,
inbetween_float radix2 (Zpos mx) ex (Rabs x) lx →
(ex ≤ fexp (Zdigits radix2 (Zpos mx) + ex))%Z →
let z := binary_round_aux mode (Rlt_bool x 0) mx ex lx in
valid_binary z = true ∧
if Rlt_bool (Rabs (round radix2 fexp (round_mode mode) x)) (bpow radix2 emax) then
FF2R radix2 z = round radix2 fexp (round_mode mode) x ∧
is_finite_FF z = true ∧ sign_FF z = Rlt_bool x 0
else
z = binary_overflow mode (Rlt_bool x 0).
Definition shr_1 mrs :=
let '(Build_shr_record m r s) := mrs in
let s := orb r s in
match m with
| Z0 ⇒ Build_shr_record Z0 false s
| Zpos xH ⇒ Build_shr_record Z0 true s
| Zpos (xO p) ⇒ Build_shr_record (Zpos p) false s
| Zpos (xI p) ⇒ Build_shr_record (Zpos p) true s
| Zneg xH ⇒ Build_shr_record Z0 true s
| Zneg (xO p) ⇒ Build_shr_record (Zneg p) false s
| Zneg (xI p) ⇒ Build_shr_record (Zneg p) true s
end.
Definition loc_of_shr_record mrs :=
match mrs with
| Build_shr_record _ false false ⇒ loc_Exact
| Build_shr_record _ false true ⇒ loc_Inexact Lt
| Build_shr_record _ true false ⇒ loc_Inexact Eq
| Build_shr_record _ true true ⇒ loc_Inexact Gt
end.
Definition shr_record_of_loc m l :=
match l with
| loc_Exact ⇒ Build_shr_record m false false
| loc_Inexact Lt ⇒ Build_shr_record m false true
| loc_Inexact Eq ⇒ Build_shr_record m true false
| loc_Inexact Gt ⇒ Build_shr_record m true true
end.
Theorem shr_m_shr_record_of_loc :
∀ m l,
shr_m (shr_record_of_loc m l) = m.
Theorem loc_of_shr_record_of_loc :
∀ m l,
loc_of_shr_record (shr_record_of_loc m l) = l.
Definition shr mrs e n :=
match n with
| Zpos p ⇒ (iter_pos p _ shr_1 mrs, (e + n)%Z)
| _ ⇒ (mrs, e)
end.
Lemma inbetween_shr_1 :
∀ x mrs e,
(0 ≤ shr_m mrs)%Z →
inbetween_float radix2 (shr_m mrs) e x (loc_of_shr_record mrs) →
inbetween_float radix2 (shr_m (shr_1 mrs)) (e + 1) x (loc_of_shr_record (shr_1 mrs)).
Theorem inbetween_shr :
∀ x m e l n,
(0 ≤ m)%Z →
inbetween_float radix2 m e x l →
let '(mrs, e') := shr (shr_record_of_loc m l) e n in
inbetween_float radix2 (shr_m mrs) e' x (loc_of_shr_record mrs).
Definition Zdigits2 m :=
match m with Z0 ⇒ m | Zpos p ⇒ Z_of_nat (S (digits2_Pnat p)) | Zneg p ⇒ Z_of_nat (S (digits2_Pnat p)) end.
Theorem Zdigits2_Zdigits :
∀ m,
Zdigits2 m = Zdigits radix2 m.
Definition shr_fexp m e l :=
shr (shr_record_of_loc m l) e (fexp (Zdigits2 m + e) - e).
Theorem shr_truncate :
∀ m e l,
(0 ≤ m)%Z →
shr_fexp m e l =
let '(m', e', l') := truncate radix2 fexp (m, e, l) in (shr_record_of_loc m' l', e').
Inductive mode := mode_NE | mode_ZR | mode_DN | mode_UP | mode_NA.
Definition round_mode m :=
match m with
| mode_NE ⇒ ZnearestE
| mode_ZR ⇒ Ztrunc
| mode_DN ⇒ Zfloor
| mode_UP ⇒ Zceil
| mode_NA ⇒ ZnearestA
end.
Definition choice_mode m sx mx lx :=
match m with
| mode_NE ⇒ cond_incr (round_N (negb (Zeven mx)) lx) mx
| mode_ZR ⇒ mx
| mode_DN ⇒ cond_incr (round_sign_DN sx lx) mx
| mode_UP ⇒ cond_incr (round_sign_UP sx lx) mx
| mode_NA ⇒ cond_incr (round_N true lx) mx
end.
Global Instance valid_rnd_round_mode : ∀ m, Valid_rnd (round_mode m).
Definition overflow_to_inf m s :=
match m with
| mode_NE ⇒ true
| mode_NA ⇒ true
| mode_ZR ⇒ false
| mode_UP ⇒ negb s
| mode_DN ⇒ s
end.
Definition binary_overflow m s :=
if overflow_to_inf m s then F754_infinity s
else F754_finite s (match (Zpower 2 prec - 1)%Z with Zpos p ⇒ p | _ ⇒ xH end) (emax - prec).
Definition binary_round_aux mode sx mx ex lx :=
let '(mrs', e') := shr_fexp (Zpos mx) ex lx in
let '(mrs'', e'') := shr_fexp (choice_mode mode sx (shr_m mrs') (loc_of_shr_record mrs')) e' loc_Exact in
match shr_m mrs'' with
| Z0 ⇒ F754_zero sx
| Zpos m ⇒ if Zle_bool e'' (emax - prec) then F754_finite sx m e'' else binary_overflow mode sx
| _ ⇒ F754_nan false xH
end.
Theorem binary_round_aux_correct :
∀ mode x mx ex lx,
inbetween_float radix2 (Zpos mx) ex (Rabs x) lx →
(ex ≤ fexp (Zdigits radix2 (Zpos mx) + ex))%Z →
let z := binary_round_aux mode (Rlt_bool x 0) mx ex lx in
valid_binary z = true ∧
if Rlt_bool (Rabs (round radix2 fexp (round_mode mode) x)) (bpow radix2 emax) then
FF2R radix2 z = round radix2 fexp (round_mode mode) x ∧
is_finite_FF z = true ∧ sign_FF z = Rlt_bool x 0
else
z = binary_overflow mode (Rlt_bool x 0).
Multiplication
Lemma Bmult_correct_aux :
∀ m sx mx ex (Hx : bounded mx ex = true) sy my ey (Hy : bounded my ey = true),
let x := F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) in
let y := F2R (Float radix2 (cond_Zopp sy (Zpos my)) ey) in
let z := binary_round_aux m (xorb sx sy) (mx × my) (ex + ey) loc_Exact in
valid_binary z = true ∧
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (x × y))) (bpow radix2 emax) then
FF2R radix2 z = round radix2 fexp (round_mode m) (x × y) ∧
is_finite_FF z = true ∧ sign_FF z = xorb sx sy
else
z = binary_overflow m (xorb sx sy).
Definition Bmult mult_nan m x y :=
let f pl := B754_nan (fst pl) (snd pl) in
match x, y with
| B754_nan _ _, _ | _, B754_nan _ _ ⇒ f (mult_nan x y)
| B754_infinity sx, B754_infinity sy ⇒ B754_infinity (xorb sx sy)
| B754_infinity sx, B754_finite sy _ _ _ ⇒ B754_infinity (xorb sx sy)
| B754_finite sx _ _ _, B754_infinity sy ⇒ B754_infinity (xorb sx sy)
| B754_infinity _, B754_zero _ ⇒ f (mult_nan x y)
| B754_zero _, B754_infinity _ ⇒ f (mult_nan x y)
| B754_finite sx _ _ _, B754_zero sy ⇒ B754_zero (xorb sx sy)
| B754_zero sx, B754_finite sy _ _ _ ⇒ B754_zero (xorb sx sy)
| B754_zero sx, B754_zero sy ⇒ B754_zero (xorb sx sy)
| B754_finite sx mx ex Hx, B754_finite sy my ey Hy ⇒
FF2B _ (proj1 (Bmult_correct_aux m sx mx ex Hx sy my ey Hy))
end.
Theorem Bmult_correct :
∀ mult_nan m x y,
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x × B2R y))) (bpow radix2 emax) then
B2R (Bmult mult_nan m x y) = round radix2 fexp (round_mode m) (B2R x × B2R y) ∧
is_finite (Bmult mult_nan m x y) = andb (is_finite x) (is_finite y) ∧
(is_nan (Bmult mult_nan m x y) = false →
Bsign (Bmult mult_nan m x y) = xorb (Bsign x) (Bsign y))
else
B2FF (Bmult mult_nan m x y) = binary_overflow m (xorb (Bsign x) (Bsign y)).
Definition Bmult_FF mult_nan m x y :=
let f pl := F754_nan (fst pl) (snd pl) in
match x, y with
| F754_nan _ _, _ | _, F754_nan _ _ ⇒ f (mult_nan x y)
| F754_infinity sx, F754_infinity sy ⇒ F754_infinity (xorb sx sy)
| F754_infinity sx, F754_finite sy _ _ ⇒ F754_infinity (xorb sx sy)
| F754_finite sx _ _, F754_infinity sy ⇒ F754_infinity (xorb sx sy)
| F754_infinity _, F754_zero _ ⇒ f (mult_nan x y)
| F754_zero _, F754_infinity _ ⇒ f (mult_nan x y)
| F754_finite sx _ _, F754_zero sy ⇒ F754_zero (xorb sx sy)
| F754_zero sx, F754_finite sy _ _ ⇒ F754_zero (xorb sx sy)
| F754_zero sx, F754_zero sy ⇒ F754_zero (xorb sx sy)
| F754_finite sx mx ex, F754_finite sy my ey ⇒
binary_round_aux m (xorb sx sy) (mx × my) (ex + ey) loc_Exact
end.
Theorem B2FF_Bmult :
∀ mult_nan mult_nan_ff,
∀ m x y,
mult_nan_ff (B2FF x) (B2FF y) = (let '(sr, exist plr _) := mult_nan x y in (sr, plr)) →
B2FF (Bmult mult_nan m x y) = Bmult_FF mult_nan_ff m (B2FF x) (B2FF y).
Definition shl_align mx ex ex' :=
match (ex' - ex)%Z with
| Zneg d ⇒ (shift_pos d mx, ex')
| _ ⇒ (mx, ex)
end.
Theorem shl_align_correct :
∀ mx ex ex',
let (mx', ex'') := shl_align mx ex ex' in
F2R (Float radix2 (Zpos mx) ex) = F2R (Float radix2 (Zpos mx') ex'') ∧
(ex'' ≤ ex')%Z.
Theorem snd_shl_align :
∀ mx ex ex',
(ex' ≤ ex)%Z →
snd (shl_align mx ex ex') = ex'.
Definition shl_align_fexp mx ex :=
shl_align mx ex (fexp (Z_of_nat (S (digits2_Pnat mx)) + ex)).
Theorem shl_align_fexp_correct :
∀ mx ex,
let (mx', ex') := shl_align_fexp mx ex in
F2R (Float radix2 (Zpos mx) ex) = F2R (Float radix2 (Zpos mx') ex') ∧
(ex' ≤ fexp (Zdigits radix2 (Zpos mx') + ex'))%Z.
Definition binary_round m sx mx ex :=
let '(mz, ez) := shl_align_fexp mx ex in binary_round_aux m sx mz ez loc_Exact.
Theorem binary_round_correct :
∀ m sx mx ex,
let z := binary_round m sx mx ex in
valid_binary z = true ∧
let x := F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) in
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) x)) (bpow radix2 emax) then
FF2R radix2 z = round radix2 fexp (round_mode m) x ∧
is_finite_FF z = true ∧
sign_FF z = sx
else
z = binary_overflow m sx.
Definition binary_normalize mode m e szero :=
match m with
| Z0 ⇒ B754_zero szero
| Zpos m ⇒ FF2B _ (proj1 (binary_round_correct mode false m e))
| Zneg m ⇒ FF2B _ (proj1 (binary_round_correct mode true m e))
end.
Theorem binary_normalize_correct :
∀ m mx ex szero,
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (F2R (Float radix2 mx ex)))) (bpow radix2 emax) then
B2R (binary_normalize m mx ex szero) = round radix2 fexp (round_mode m) (F2R (Float radix2 mx ex)) ∧
is_finite (binary_normalize m mx ex szero) = true ∧
Bsign (binary_normalize m mx ex szero) =
match Rcompare (F2R (Float radix2 mx ex)) 0 with
| Eq ⇒ szero
| Lt ⇒ true
| Gt ⇒ false
end
else
B2FF (binary_normalize m mx ex szero) = binary_overflow m (Rlt_bool (F2R (Float radix2 mx ex)) 0).
Addition
Definition Bplus plus_nan m x y :=
let f pl := B754_nan (fst pl) (snd pl) in
match x, y with
| B754_nan _ _, _ | _, B754_nan _ _ ⇒ f (plus_nan x y)
| B754_infinity sx, B754_infinity sy ⇒
if Bool.eqb sx sy then x else f (plus_nan x y)
| B754_infinity _, _ ⇒ x
| _, B754_infinity _ ⇒ y
| B754_zero sx, B754_zero sy ⇒
if Bool.eqb sx sy then x else
match m with mode_DN ⇒ B754_zero true | _ ⇒ B754_zero false end
| B754_zero _, _ ⇒ y
| _, B754_zero _ ⇒ x
| B754_finite sx mx ex Hx, B754_finite sy my ey Hy ⇒
let ez := Zmin ex ey in
binary_normalize m (Zplus (cond_Zopp sx (Zpos (fst (shl_align mx ex ez)))) (cond_Zopp sy (Zpos (fst (shl_align my ey ez)))))
ez (match m with mode_DN ⇒ true | _ ⇒ false end)
end.
Theorem Bplus_correct :
∀ plus_nan m x y,
is_finite x = true →
is_finite y = true →
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x + B2R y))) (bpow radix2 emax) then
B2R (Bplus plus_nan m x y) = round radix2 fexp (round_mode m) (B2R x + B2R y) ∧
is_finite (Bplus plus_nan m x y) = true ∧
Bsign (Bplus plus_nan m x y) =
match Rcompare (B2R x + B2R y) 0 with
| Eq ⇒ match m with mode_DN ⇒ orb (Bsign x) (Bsign y)
| _ ⇒ andb (Bsign x) (Bsign y) end
| Lt ⇒ true
| Gt ⇒ false
end
else
(B2FF (Bplus plus_nan m x y) = binary_overflow m (Bsign x) ∧ Bsign x = Bsign y).
Definition Bminus minus_nan m x y := Bplus minus_nan m x (Bopp pair y).
Theorem Bminus_correct :
∀ minus_nan m x y,
is_finite x = true →
is_finite y = true →
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x - B2R y))) (bpow radix2 emax) then
B2R (Bminus minus_nan m x y) = round radix2 fexp (round_mode m) (B2R x - B2R y) ∧
is_finite (Bminus minus_nan m x y) = true ∧
Bsign (Bminus minus_nan m x y) =
match Rcompare (B2R x - B2R y) 0 with
| Eq ⇒ match m with mode_DN ⇒ orb (Bsign x) (negb (Bsign y))
| _ ⇒ andb (Bsign x) (negb (Bsign y)) end
| Lt ⇒ true
| Gt ⇒ false
end
else
(B2FF (Bminus minus_nan m x y) = binary_overflow m (Bsign x) ∧ Bsign x = negb (Bsign y)).
let f pl := B754_nan (fst pl) (snd pl) in
match x, y with
| B754_nan _ _, _ | _, B754_nan _ _ ⇒ f (plus_nan x y)
| B754_infinity sx, B754_infinity sy ⇒
if Bool.eqb sx sy then x else f (plus_nan x y)
| B754_infinity _, _ ⇒ x
| _, B754_infinity _ ⇒ y
| B754_zero sx, B754_zero sy ⇒
if Bool.eqb sx sy then x else
match m with mode_DN ⇒ B754_zero true | _ ⇒ B754_zero false end
| B754_zero _, _ ⇒ y
| _, B754_zero _ ⇒ x
| B754_finite sx mx ex Hx, B754_finite sy my ey Hy ⇒
let ez := Zmin ex ey in
binary_normalize m (Zplus (cond_Zopp sx (Zpos (fst (shl_align mx ex ez)))) (cond_Zopp sy (Zpos (fst (shl_align my ey ez)))))
ez (match m with mode_DN ⇒ true | _ ⇒ false end)
end.
Theorem Bplus_correct :
∀ plus_nan m x y,
is_finite x = true →
is_finite y = true →
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x + B2R y))) (bpow radix2 emax) then
B2R (Bplus plus_nan m x y) = round radix2 fexp (round_mode m) (B2R x + B2R y) ∧
is_finite (Bplus plus_nan m x y) = true ∧
Bsign (Bplus plus_nan m x y) =
match Rcompare (B2R x + B2R y) 0 with
| Eq ⇒ match m with mode_DN ⇒ orb (Bsign x) (Bsign y)
| _ ⇒ andb (Bsign x) (Bsign y) end
| Lt ⇒ true
| Gt ⇒ false
end
else
(B2FF (Bplus plus_nan m x y) = binary_overflow m (Bsign x) ∧ Bsign x = Bsign y).
Definition Bminus minus_nan m x y := Bplus minus_nan m x (Bopp pair y).
Theorem Bminus_correct :
∀ minus_nan m x y,
is_finite x = true →
is_finite y = true →
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x - B2R y))) (bpow radix2 emax) then
B2R (Bminus minus_nan m x y) = round radix2 fexp (round_mode m) (B2R x - B2R y) ∧
is_finite (Bminus minus_nan m x y) = true ∧
Bsign (Bminus minus_nan m x y) =
match Rcompare (B2R x - B2R y) 0 with
| Eq ⇒ match m with mode_DN ⇒ orb (Bsign x) (negb (Bsign y))
| _ ⇒ andb (Bsign x) (negb (Bsign y)) end
| Lt ⇒ true
| Gt ⇒ false
end
else
(B2FF (Bminus minus_nan m x y) = binary_overflow m (Bsign x) ∧ Bsign x = negb (Bsign y)).
Division
Definition Fdiv_core_binary m1 e1 m2 e2 :=
let d1 := Zdigits2 m1 in
let d2 := Zdigits2 m2 in
let e := (e1 - e2)%Z in
let (m, e') :=
match (d2 + prec - d1)%Z with
| Zpos p ⇒ (m1 × Zpower_pos 2 p, e + Zneg p)%Z
| _ ⇒ (m1, e)
end in
let '(q, r) := Zdiv_eucl m m2 in
(q, e', new_location m2 r loc_Exact).
Lemma Bdiv_correct_aux :
∀ m sx mx ex sy my ey,
let x := F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) in
let y := F2R (Float radix2 (cond_Zopp sy (Zpos my)) ey) in
let z :=
let '(mz, ez, lz) := Fdiv_core_binary (Zpos mx) ex (Zpos my) ey in
match mz with
| Zpos mz ⇒ binary_round_aux m (xorb sx sy) mz ez lz
| _ ⇒ F754_nan false xH
end in
valid_binary z = true ∧
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (x / y))) (bpow radix2 emax) then
FF2R radix2 z = round radix2 fexp (round_mode m) (x / y) ∧
is_finite_FF z = true ∧ sign_FF z = xorb sx sy
else
z = binary_overflow m (xorb sx sy).
Definition Bdiv div_nan m x y :=
let f pl := B754_nan (fst pl) (snd pl) in
match x, y with
| B754_nan _ _, _ | _, B754_nan _ _ ⇒ f (div_nan x y)
| B754_infinity sx, B754_infinity sy ⇒ f (div_nan x y)
| B754_infinity sx, B754_finite sy _ _ _ ⇒ B754_infinity (xorb sx sy)
| B754_finite sx _ _ _, B754_infinity sy ⇒ B754_zero (xorb sx sy)
| B754_infinity sx, B754_zero sy ⇒ B754_infinity (xorb sx sy)
| B754_zero sx, B754_infinity sy ⇒ B754_zero (xorb sx sy)
| B754_finite sx _ _ _, B754_zero sy ⇒ B754_infinity (xorb sx sy)
| B754_zero sx, B754_finite sy _ _ _ ⇒ B754_zero (xorb sx sy)
| B754_zero sx, B754_zero sy ⇒ f (div_nan x y)
| B754_finite sx mx ex _, B754_finite sy my ey _ ⇒
FF2B _ (proj1 (Bdiv_correct_aux m sx mx ex sy my ey))
end.
Theorem Bdiv_correct :
∀ div_nan m x y,
B2R y ≠ R0 →
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x / B2R y))) (bpow radix2 emax) then
B2R (Bdiv div_nan m x y) = round radix2 fexp (round_mode m) (B2R x / B2R y) ∧
is_finite (Bdiv div_nan m x y) = is_finite x ∧
(is_nan (Bdiv div_nan m x y) = false →
Bsign (Bdiv div_nan m x y) = xorb (Bsign x) (Bsign y))
else
B2FF (Bdiv div_nan m x y) = binary_overflow m (xorb (Bsign x) (Bsign y)).
let d1 := Zdigits2 m1 in
let d2 := Zdigits2 m2 in
let e := (e1 - e2)%Z in
let (m, e') :=
match (d2 + prec - d1)%Z with
| Zpos p ⇒ (m1 × Zpower_pos 2 p, e + Zneg p)%Z
| _ ⇒ (m1, e)
end in
let '(q, r) := Zdiv_eucl m m2 in
(q, e', new_location m2 r loc_Exact).
Lemma Bdiv_correct_aux :
∀ m sx mx ex sy my ey,
let x := F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) in
let y := F2R (Float radix2 (cond_Zopp sy (Zpos my)) ey) in
let z :=
let '(mz, ez, lz) := Fdiv_core_binary (Zpos mx) ex (Zpos my) ey in
match mz with
| Zpos mz ⇒ binary_round_aux m (xorb sx sy) mz ez lz
| _ ⇒ F754_nan false xH
end in
valid_binary z = true ∧
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (x / y))) (bpow radix2 emax) then
FF2R radix2 z = round radix2 fexp (round_mode m) (x / y) ∧
is_finite_FF z = true ∧ sign_FF z = xorb sx sy
else
z = binary_overflow m (xorb sx sy).
Definition Bdiv div_nan m x y :=
let f pl := B754_nan (fst pl) (snd pl) in
match x, y with
| B754_nan _ _, _ | _, B754_nan _ _ ⇒ f (div_nan x y)
| B754_infinity sx, B754_infinity sy ⇒ f (div_nan x y)
| B754_infinity sx, B754_finite sy _ _ _ ⇒ B754_infinity (xorb sx sy)
| B754_finite sx _ _ _, B754_infinity sy ⇒ B754_zero (xorb sx sy)
| B754_infinity sx, B754_zero sy ⇒ B754_infinity (xorb sx sy)
| B754_zero sx, B754_infinity sy ⇒ B754_zero (xorb sx sy)
| B754_finite sx _ _ _, B754_zero sy ⇒ B754_infinity (xorb sx sy)
| B754_zero sx, B754_finite sy _ _ _ ⇒ B754_zero (xorb sx sy)
| B754_zero sx, B754_zero sy ⇒ f (div_nan x y)
| B754_finite sx mx ex _, B754_finite sy my ey _ ⇒
FF2B _ (proj1 (Bdiv_correct_aux m sx mx ex sy my ey))
end.
Theorem Bdiv_correct :
∀ div_nan m x y,
B2R y ≠ R0 →
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x / B2R y))) (bpow radix2 emax) then
B2R (Bdiv div_nan m x y) = round radix2 fexp (round_mode m) (B2R x / B2R y) ∧
is_finite (Bdiv div_nan m x y) = is_finite x ∧
(is_nan (Bdiv div_nan m x y) = false →
Bsign (Bdiv div_nan m x y) = xorb (Bsign x) (Bsign y))
else
B2FF (Bdiv div_nan m x y) = binary_overflow m (xorb (Bsign x) (Bsign y)).
Square root
Definition Fsqrt_core_binary m e :=
let d := Zdigits2 m in
let s := Zmax (2 × prec - d) 0 in
let e' := (e - s)%Z in
let (s', e'') := if Zeven e' then (s, e') else (s + 1, e' - 1)%Z in
let m' :=
match s' with
| Zpos p ⇒ (m × Zpower_pos 2 p)%Z
| _ ⇒ m
end in
let (q, r) := Z.sqrtrem m' in
let l :=
if Zeq_bool r 0 then loc_Exact
else loc_Inexact (if Zle_bool r q then Lt else Gt) in
(q, Zdiv2 e'', l).
Lemma Bsqrt_correct_aux :
∀ m mx ex (Hx : bounded mx ex = true),
let x := F2R (Float radix2 (Zpos mx) ex) in
let z :=
let '(mz, ez, lz) := Fsqrt_core_binary (Zpos mx) ex in
match mz with
| Zpos mz ⇒ binary_round_aux m false mz ez lz
| _ ⇒ F754_nan false xH
end in
valid_binary z = true ∧
FF2R radix2 z = round radix2 fexp (round_mode m) (sqrt x) ∧
is_finite_FF z = true ∧ sign_FF z = false.
Definition Bsqrt sqrt_nan m x :=
let f pl := B754_nan (fst pl) (snd pl) in
match x with
| B754_nan sx plx ⇒ f (sqrt_nan x)
| B754_infinity false ⇒ x
| B754_infinity true ⇒ f (sqrt_nan x)
| B754_finite true _ _ _ ⇒ f (sqrt_nan x)
| B754_zero _ ⇒ x
| B754_finite sx mx ex Hx ⇒
FF2B _ (proj1 (Bsqrt_correct_aux m mx ex Hx))
end.
Theorem Bsqrt_correct :
∀ sqrt_nan m x,
B2R (Bsqrt sqrt_nan m x) = round radix2 fexp (round_mode m) (sqrt (B2R x)) ∧
is_finite (Bsqrt sqrt_nan m x) = match x with B754_zero _ ⇒ true | B754_finite false _ _ _ ⇒ true | _ ⇒ false end ∧
(is_nan (Bsqrt sqrt_nan m x) = false → Bsign (Bsqrt sqrt_nan m x) = Bsign x).
End Binary.
let d := Zdigits2 m in
let s := Zmax (2 × prec - d) 0 in
let e' := (e - s)%Z in
let (s', e'') := if Zeven e' then (s, e') else (s + 1, e' - 1)%Z in
let m' :=
match s' with
| Zpos p ⇒ (m × Zpower_pos 2 p)%Z
| _ ⇒ m
end in
let (q, r) := Z.sqrtrem m' in
let l :=
if Zeq_bool r 0 then loc_Exact
else loc_Inexact (if Zle_bool r q then Lt else Gt) in
(q, Zdiv2 e'', l).
Lemma Bsqrt_correct_aux :
∀ m mx ex (Hx : bounded mx ex = true),
let x := F2R (Float radix2 (Zpos mx) ex) in
let z :=
let '(mz, ez, lz) := Fsqrt_core_binary (Zpos mx) ex in
match mz with
| Zpos mz ⇒ binary_round_aux m false mz ez lz
| _ ⇒ F754_nan false xH
end in
valid_binary z = true ∧
FF2R radix2 z = round radix2 fexp (round_mode m) (sqrt x) ∧
is_finite_FF z = true ∧ sign_FF z = false.
Definition Bsqrt sqrt_nan m x :=
let f pl := B754_nan (fst pl) (snd pl) in
match x with
| B754_nan sx plx ⇒ f (sqrt_nan x)
| B754_infinity false ⇒ x
| B754_infinity true ⇒ f (sqrt_nan x)
| B754_finite true _ _ _ ⇒ f (sqrt_nan x)
| B754_zero _ ⇒ x
| B754_finite sx mx ex Hx ⇒
FF2B _ (proj1 (Bsqrt_correct_aux m mx ex Hx))
end.
Theorem Bsqrt_correct :
∀ sqrt_nan m x,
B2R (Bsqrt sqrt_nan m x) = round radix2 fexp (round_mode m) (sqrt (B2R x)) ∧
is_finite (Bsqrt sqrt_nan m x) = match x with B754_zero _ ⇒ true | B754_finite false _ _ _ ⇒ true | _ ⇒ false end ∧
(is_nan (Bsqrt sqrt_nan m x) = false → Bsign (Bsqrt sqrt_nan m x) = Bsign x).
End Binary.