Library Flocq.Appli.Fappli_IEEE_bits

This file is part of the Flocq formalization of floating-point arithmetic in Coq: http://flocq.gforge.inria.fr/
Copyright (C) 2011-2013 Sylvie Boldo
Copyright (C) 2011-2013 Guillaume Melquiond
This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the COPYING file for more details.

IEEE-754 encoding of binary floating-point data

Require Import Fcore.
Require Import Fcore_digits.
Require Import Fcalc_digits.
Require Import Fappli_IEEE.

Section Binary_Bits.

Implicit Arguments exist [[A] [P]].
Implicit Arguments B754_zero [[prec] [emax]].
Implicit Arguments B754_infinity [[prec] [emax]].
Implicit Arguments B754_nan [[prec] [emax]].
Implicit Arguments B754_finite [[prec] [emax]].

Number of bits for the fraction and exponent
Variable mw ew : Z.
Hypothesis Hmw : (0 < mw)%Z.
Hypothesis Hew : (0 < ew)%Z.

Let emax := Zpower 2 (ew - 1).
Let prec := (mw + 1)%Z.
Let emin := (3 - emax - prec)%Z.
Let binary_float := binary_float prec emax.

Let Hprec : (0 < prec)%Z.
Qed.

Let Hm_gt_0 : (0 < 2^mw)%Z.
Qed.

Let He_gt_0 : (0 < 2^ew)%Z.
Qed.

Hypothesis Hmax : (prec < emax)%Z.

Definition join_bits (s : bool) m e :=
  (Z.shiftl ((if s then Zpower 2 ew else 0) + e) mw + m)%Z.

Lemma join_bits_range :
   s m e,
  (0 m < 2^mw)%Z
  (0 e < 2^ew)%Z
  (0 join_bits s m e < 2 ^ (mw + ew + 1))%Z.

Definition split_bits x :=
  let mm := Zpower 2 mw in
  let em := Zpower 2 ew in
  (Zle_bool (mm × em) x, Zmod x mm, Zmod (Zdiv x mm) em)%Z.

Theorem split_join_bits :
   s m e,
  (0 m < Zpower 2 mw)%Z
  (0 e < Zpower 2 ew)%Z
  split_bits (join_bits s m e) = (s, m, e).

Theorem join_split_bits :
   x,
  (0 x < Zpower 2 (mw + ew + 1))%Z
  let ´(s, m, e) := split_bits x in
  join_bits s m e = x.

Theorem split_bits_inj :
   x y,
  (0 x < Zpower 2 (mw + ew + 1))%Z
  (0 y < Zpower 2 (mw + ew + 1))%Z
  split_bits x = split_bits y
  x = y.

Definition bits_of_binary_float (x : binary_float) :=
  match x with
  | B754_zero sxjoin_bits sx 0 0
  | B754_infinity sxjoin_bits sx 0 (Zpower 2 ew - 1)
  | B754_nan sx (exist plx _) ⇒ join_bits sx (Zpos plx) (Zpower 2 ew - 1)
  | B754_finite sx mx ex _
    let m := (Zpos mx - Zpower 2 mw)%Z in
    if Zle_bool 0 m then
      join_bits sx m (ex - emin + 1)
    else
      join_bits sx (Zpos mx) 0
  end.

Definition split_bits_of_binary_float (x : binary_float) :=
  match x with
  | B754_zero sx(sx, 0, 0)%Z
  | B754_infinity sx(sx, 0, Zpower 2 ew - 1)%Z
  | B754_nan sx (exist plx _) ⇒ (sx, Zpos plx, Zpower 2 ew - 1)%Z
  | B754_finite sx mx ex _
    let m := (Zpos mx - Zpower 2 mw)%Z in
    if Zle_bool 0 m then
      (sx, m, ex - emin + 1)%Z
    else
      (sx, Zpos mx, 0)%Z
  end.

Theorem split_bits_of_binary_float_correct :
   x,
  split_bits (bits_of_binary_float x) = split_bits_of_binary_float x.

Theorem bits_of_binary_float_range:
   x, (0 bits_of_binary_float x < 2^(mw+ew+1))%Z.

Definition binary_float_of_bits_aux x :=
  let ´(sx, mx, ex) := split_bits x in
  if Zeq_bool ex 0 then
    match mx with
    | Z0F754_zero sx
    | Zpos pxF754_finite sx px emin
    | Zneg _F754_nan false xH
    end
  else if Zeq_bool ex (Zpower 2 ew - 1) then
    match mx with
      | Z0F754_infinity sx
      | Zpos plxF754_nan sx plx
      | Zneg _F754_nan false xH
    end
  else
    match (mx + Zpower 2 mw)%Z with
    | Zpos pxF754_finite sx px (ex + emin - 1)
    | _F754_nan false xH
    end.

Lemma binary_float_of_bits_aux_correct :
   x,
  valid_binary prec emax (binary_float_of_bits_aux x) = true.

Definition binary_float_of_bits x :=
  FF2B prec emax _ (binary_float_of_bits_aux_correct x).

Theorem binary_float_of_bits_of_binary_float :
   x,
  binary_float_of_bits (bits_of_binary_float x) = x.

Theorem bits_of_binary_float_of_bits :
   x,
  (0 x < 2^(mw+ew+1))%Z
  bits_of_binary_float (binary_float_of_bits x) = x.

End Binary_Bits.

Specialization for IEEE single precision operations
Section B32_Bits.

Implicit Arguments B754_nan [[prec] [emax]].

Definition binary32 := binary_float 24 128.

Let Hprec : (0 < 24)%Z.
Qed.

Let Hprec_emax : (24 < 128)%Z.
Qed.

Definition default_nan_pl32 : bool × nan_pl 24 :=
  (false, exist _ (iter_nat xO 22 xH) (refl_equal true)).

Definition unop_nan_pl32 (f : binary32) : bool × nan_pl 24 :=
  match f with
  | B754_nan s pl(s, pl)
  | _default_nan_pl32
  end.

Definition binop_nan_pl32 (f1 f2 : binary32) : bool × nan_pl 24 :=
  match f1, f2 with
  | B754_nan s1 pl1, _(s1, pl1)
  | _, B754_nan s2 pl2(s2, pl2)
  | _, _default_nan_pl32
  end.

Definition b32_opp := Bopp 24 128 pair.
Definition b32_plus := Bplus _ _ Hprec Hprec_emax binop_nan_pl32.
Definition b32_minus := Bminus _ _ Hprec Hprec_emax binop_nan_pl32.
Definition b32_mult := Bmult _ _ Hprec Hprec_emax binop_nan_pl32.
Definition b32_div := Bdiv _ _ Hprec Hprec_emax binop_nan_pl32.
Definition b32_sqrt := Bsqrt _ _ Hprec Hprec_emax unop_nan_pl32.

Definition b32_of_bits : Zbinary32 := binary_float_of_bits 23 8 (refl_equal _) (refl_equal _) (refl_equal _).
Definition bits_of_b32 : binary32Z := bits_of_binary_float 23 8.

End B32_Bits.

Specialization for IEEE double precision operations
Section B64_Bits.

Implicit Arguments B754_nan [[prec] [emax]].

Definition binary64 := binary_float 53 1024.

Let Hprec : (0 < 53)%Z.
Qed.

Let Hprec_emax : (53 < 1024)%Z.
Qed.

Definition default_nan_pl64 : bool × nan_pl 53 :=
  (false, exist _ (iter_nat xO 51 xH) (refl_equal true)).

Definition unop_nan_pl64 (f : binary64) : bool × nan_pl 53 :=
  match f with
  | B754_nan s pl(s, pl)
  | _default_nan_pl64
  end.

Definition binop_nan_pl64 (pl1 pl2 : binary64) : bool × nan_pl 53 :=
  match pl1, pl2 with
  | B754_nan s1 pl1, _(s1, pl1)
  | _, B754_nan s2 pl2(s2, pl2)
  | _, _default_nan_pl64
  end.

Definition b64_opp := Bopp 53 1024 pair.
Definition b64_plus := Bplus _ _ Hprec Hprec_emax binop_nan_pl64.
Definition b64_minus := Bminus _ _ Hprec Hprec_emax binop_nan_pl64.
Definition b64_mult := Bmult _ _ Hprec Hprec_emax binop_nan_pl64.
Definition b64_div := Bdiv _ _ Hprec Hprec_emax binop_nan_pl64.
Definition b64_sqrt := Bsqrt _ _ Hprec Hprec_emax unop_nan_pl64.

Definition b64_of_bits : Zbinary64 := binary_float_of_bits 52 11 (refl_equal _) (refl_equal _) (refl_equal _).
Definition bits_of_b64 : binary64Z := bits_of_binary_float 52 11.

End B64_Bits.