Issue #11734: Add support for IEEE 754 half-precision floats to the struct module. Original patch by Eli Stevens.

mdickinson · mdickinson · commit 7c4e409d075f · 2016-09-03T17:21:29.000+01:00
diff --git a/Doc/library/struct.rst b/Doc/library/struct.rst
@@ -216,6 +216,8 @@ platform-dependent.
 +--------+--------------------------+--------------------+----------------+------------+
 | ``N``  | :c:type:`size_t`         | integer            |                | \(4)       |
 +--------+--------------------------+--------------------+----------------+------------+
+| ``e``  | \(7)                     | float              | 2              | \(5)       |
++--------+--------------------------+--------------------+----------------+------------+
 | ``f``  | :c:type:`float`          | float              | 4              | \(5)       |
 +--------+--------------------------+--------------------+----------------+------------+
 | ``d``  | :c:type:`double`         | float              | 8              | \(5)       |
@@ -257,9 +259,10 @@ Notes:
    fits your application.
 
 (5)
-   For the ``'f'`` and ``'d'`` conversion codes, the packed representation uses
-   the IEEE 754 binary32 (for ``'f'``) or binary64 (for ``'d'``) format,
-   regardless of the floating-point format used by the platform.
+   For the ``'f'``, ``'d'`` and ``'e'`` conversion codes, the packed
+   representation uses the IEEE 754 binary32, binary64 or binary16 format (for
+   ``'f'``, ``'d'`` or ``'e'`` respectively), regardless of the floating-point
+   format used by the platform.
 
 (6)
    The ``'P'`` format character is only available for the native byte ordering
@@ -268,6 +271,16 @@ Notes:
    on the host system. The struct module does not interpret this as native
    ordering, so the ``'P'`` format is not available.
 
+(7)
+   The IEEE 754 binary16 "half precision" type was introduced in the 2008
+   revision of the `IEEE 754 standard <ieee 754 standard_>`_. It has a sign
+   bit, a 5-bit exponent and 11-bit precision (with 10 bits explicitly stored),
+   and can represent numbers between approximately ``6.1e-05`` and ``6.5e+04``
+   at full precision. This type is not widely supported by C compilers: on a
+   typical machine, an unsigned short can be used for storage, but not for math
+   operations. See the Wikipedia page on the `half-precision floating-point
+   format <half precision format_>`_ for more information.
+
 
 A format character may be preceded by an integral repeat count.  For example,
 the format string ``'4h'`` means exactly the same as ``'hhhh'``.
@@ -430,3 +443,7 @@ The :mod:`struct` module also defines the following type:
       The calculated size of the struct (and hence of the bytes object produced
       by the :meth:`pack` method) corresponding to :attr:`format`.
 
+
+.. _half precision format: https://en.wikipedia.org/wiki/Half-precision_floating-point_format
+
+.. _ieee 754 standard: https://en.wikipedia.org/wiki/IEEE_floating_point#IEEE_754-2008
diff --git a/Include/floatobject.h b/Include/floatobject.h
@@ -74,16 +74,17 @@ PyAPI_FUNC(double) PyFloat_AsDouble(PyObject *);
  * happens in such cases is partly accidental (alas).
  */
 
-/* The pack routines write 4 or 8 bytes, starting at p.  le is a bool
+/* The pack routines write 2, 4 or 8 bytes, starting at p.  le is a bool
  * argument, true if you want the string in little-endian format (exponent
- * last, at p+3 or p+7), false if you want big-endian format (exponent
+ * last, at p+1, p+3 or p+7), false if you want big-endian format (exponent
  * first, at p).
  * Return value:  0 if all is OK, -1 if error (and an exception is
  * set, most likely OverflowError).
  * There are two problems on non-IEEE platforms:
  * 1):  What this does is undefined if x is a NaN or infinity.
  * 2):  -0.0 and +0.0 produce the same string.
  */
+PyAPI_FUNC(int) _PyFloat_Pack2(double x, unsigned char *p, int le);
 PyAPI_FUNC(int) _PyFloat_Pack4(double x, unsigned char *p, int le);
 PyAPI_FUNC(int) _PyFloat_Pack8(double x, unsigned char *p, int le);
 
@@ -96,14 +97,15 @@ PyAPI_FUNC(int) _PyFloat_Repr(double x, char *p, size_t len);
 PyAPI_FUNC(int) _PyFloat_Digits(char *buf, double v, int *signum);
 PyAPI_FUNC(void) _PyFloat_DigitsInit(void);
 
-/* The unpack routines read 4 or 8 bytes, starting at p.  le is a bool
+/* The unpack routines read 2, 4 or 8 bytes, starting at p.  le is a bool
  * argument, true if the string is in little-endian format (exponent
- * last, at p+3 or p+7), false if big-endian (exponent first, at p).
+ * last, at p+1, p+3 or p+7), false if big-endian (exponent first, at p).
  * Return value:  The unpacked double.  On error, this is -1.0 and
  * PyErr_Occurred() is true (and an exception is set, most likely
  * OverflowError).  Note that on a non-IEEE platform this will refuse
  * to unpack a string that represents a NaN or infinity.
  */
+PyAPI_FUNC(double) _PyFloat_Unpack2(const unsigned char *p, int le);
 PyAPI_FUNC(double) _PyFloat_Unpack4(const unsigned char *p, int le);
 PyAPI_FUNC(double) _PyFloat_Unpack8(const unsigned char *p, int le);
 
diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py
@@ -1,5 +1,6 @@
 from collections import abc
 import array
+import math
 import operator
 import unittest
 import struct
@@ -366,8 +367,6 @@ def test_705836(self):
         # SF bug 705836.  "<f" and ">f" had a severe rounding bug, where a carry
         # from the low-order discarded bits could propagate into the exponent
         # field, causing the result to be wrong by a factor of 2.
-        import math
-
         for base in range(1, 33):
             # smaller <- largest representable float less than base.
             delta = 0.5
@@ -659,6 +658,110 @@ def test_module_func(self):
         self.assertRaises(StopIteration, next, it)
         self.assertRaises(StopIteration, next, it)
 
+    def test_half_float(self):
+        # Little-endian examples from:
+        # http://en.wikipedia.org/wiki/Half_precision_floating-point_format
+        format_bits_float__cleanRoundtrip_list = [
+            (b'\x00\x3c', 1.0),
+            (b'\x00\xc0', -2.0),
+            (b'\xff\x7b', 65504.0), #  (max half precision)
+            (b'\x00\x04', 2**-14), # ~= 6.10352 * 10**-5 (min pos normal)
+            (b'\x01\x00', 2**-24), # ~= 5.96046 * 10**-8 (min pos subnormal)
+            (b'\x00\x00', 0.0),
+            (b'\x00\x80', -0.0),
+            (b'\x00\x7c', float('+inf')),
+            (b'\x00\xfc', float('-inf')),
+            (b'\x55\x35', 0.333251953125), # ~= 1/3
+        ]
+
+        for le_bits, f in format_bits_float__cleanRoundtrip_list:
+            be_bits = le_bits[::-1]
+            self.assertEqual(f, struct.unpack('<e', le_bits)[0])
+            self.assertEqual(le_bits, struct.pack('<e', f))
+            self.assertEqual(f, struct.unpack('>e', be_bits)[0])
+            self.assertEqual(be_bits, struct.pack('>e', f))
+            if sys.byteorder == 'little':
+                self.assertEqual(f, struct.unpack('e', le_bits)[0])
+                self.assertEqual(le_bits, struct.pack('e', f))
+            else:
+                self.assertEqual(f, struct.unpack('e', be_bits)[0])
+                self.assertEqual(be_bits, struct.pack('e', f))
+
+        # Check for NaN handling:
+        format_bits__nan_list = [
+            ('<e', b'\x01\xfc'),
+            ('<e', b'\x00\xfe'),
+            ('<e', b'\xff\xff'),
+            ('<e', b'\x01\x7c'),
+            ('<e', b'\x00\x7e'),
+            ('<e', b'\xff\x7f'),
+        ]
+
+        for formatcode, bits in format_bits__nan_list:
+            self.assertTrue(math.isnan(struct.unpack('<e', bits)[0]))
+            self.assertTrue(math.isnan(struct.unpack('>e', bits[::-1])[0]))
+
+        # Check that packing produces a bit pattern representing a quiet NaN:
+        # all exponent bits and the msb of the fraction should all be 1.
+        packed = struct.pack('<e', math.nan)
+        self.assertEqual(packed[1] & 0x7e, 0x7e)
+        packed = struct.pack('<e', -math.nan)
+        self.assertEqual(packed[1] & 0x7e, 0x7e)
+
+        # Checks for round-to-even behavior
+        format_bits_float__rounding_list = [
+            ('>e', b'\x00\x01', 2.0**-25 + 2.0**-35), # Rounds to minimum subnormal
+            ('>e', b'\x00\x00', 2.0**-25), # Underflows to zero (nearest even mode)
+            ('>e', b'\x00\x00', 2.0**-26), # Underflows to zero
+            ('>e', b'\x03\xff', 2.0**-14 - 2.0**-24), # Largest subnormal.
+            ('>e', b'\x03\xff', 2.0**-14 - 2.0**-25 - 2.0**-65),
+            ('>e', b'\x04\x00', 2.0**-14 - 2.0**-25),
+            ('>e', b'\x04\x00', 2.0**-14), # Smallest normal.
+            ('>e', b'\x3c\x01', 1.0+2.0**-11 + 2.0**-16), # rounds to 1.0+2**(-10)
+            ('>e', b'\x3c\x00', 1.0+2.0**-11), # rounds to 1.0 (nearest even mode)
+            ('>e', b'\x3c\x00', 1.0+2.0**-12), # rounds to 1.0
+            ('>e', b'\x7b\xff', 65504), # largest normal
+            ('>e', b'\x7b\xff', 65519), # rounds to 65504
+            ('>e', b'\x80\x01', -2.0**-25 - 2.0**-35), # Rounds to minimum subnormal
+            ('>e', b'\x80\x00', -2.0**-25), # Underflows to zero (nearest even mode)
+            ('>e', b'\x80\x00', -2.0**-26), # Underflows to zero
+            ('>e', b'\xbc\x01', -1.0-2.0**-11 - 2.0**-16), # rounds to 1.0+2**(-10)
+            ('>e', b'\xbc\x00', -1.0-2.0**-11), # rounds to 1.0 (nearest even mode)
+            ('>e', b'\xbc\x00', -1.0-2.0**-12), # rounds to 1.0
+            ('>e', b'\xfb\xff', -65519), # rounds to 65504
+        ]
+
+        for formatcode, bits, f in format_bits_float__rounding_list:
+            self.assertEqual(bits, struct.pack(formatcode, f))
+
+        # This overflows, and so raises an error
+        format_bits_float__roundingError_list = [
+            # Values that round to infinity.
+            ('>e', 65520.0),
+            ('>e', 65536.0),
+            ('>e', 1e300),
+            ('>e', -65520.0),
+            ('>e', -65536.0),
+            ('>e', -1e300),
+            ('<e', 65520.0),
+            ('<e', 65536.0),
+            ('<e', 1e300),
+            ('<e', -65520.0),
+            ('<e', -65536.0),
+            ('<e', -1e300),
+        ]
+
+        for formatcode, f in format_bits_float__roundingError_list:
+            self.assertRaises(OverflowError, struct.pack, formatcode, f)
+
+        # Double rounding
+        format_bits_float__doubleRoundingError_list = [
+            ('>e', b'\x67\xff', 0x1ffdffffff * 2**-26), # should be 2047, if double-rounded 64>32>16, becomes 2048
+        ]
+
+        for formatcode, bits, f in format_bits_float__doubleRoundingError_list:
+            self.assertEqual(bits, struct.pack(formatcode, f))
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/Misc/ACKS b/Misc/ACKS
@@ -1435,6 +1435,7 @@ Greg Stein
 Marek Stepniowski
 Baruch Sterin
 Chris Stern
+Eli Stevens
 Alex Stewart
 Victor Stinner
 Richard Stoakley
diff --git a/Misc/NEWS b/Misc/NEWS
@@ -69,6 +69,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #11734: Add support for IEEE 754 half-precision floats to the
+  struct module. Based on a patch by Eli Stevens.
+
 - Issue #27919: Deprecated ``extra_path`` distribution option in distutils
   packaging.
 
diff --git a/Modules/_struct.c b/Modules/_struct.c
@@ -266,6 +266,33 @@ get_size_t(PyObject *v, size_t *p)
 
 /* Floating point helpers */
 
+static PyObject *
+unpack_halffloat(const char *p,  /* start of 2-byte string */
+                 int le)         /* true for little-endian, false for big-endian */
+{
+    double x;
+
+    x = _PyFloat_Unpack2((unsigned char *)p, le);
+    if (x == -1.0 && PyErr_Occurred()) {
+        return NULL;
+    }
+    return PyFloat_FromDouble(x);
+}
+
+static int
+pack_halffloat(char *p,      /* start of 2-byte string */
+               PyObject *v,  /* value to pack */
+               int le)       /* true for little-endian, false for big-endian */
+{
+    double x = PyFloat_AsDouble(v);
+    if (x == -1.0 && PyErr_Occurred()) {
+        PyErr_SetString(StructError,
+                        "required argument is not a float");
+        return -1;
+    }
+    return _PyFloat_Pack2(x, (unsigned char *)p, le);
+}
+
 static PyObject *
 unpack_float(const char *p,  /* start of 4-byte string */
          int le)             /* true for little-endian, false for big-endian */
@@ -469,6 +496,16 @@ nu_bool(const char *p, const formatdef *f)
 }
 
 
+static PyObject *
+nu_halffloat(const char *p, const formatdef *f)
+{
+#if PY_LITTLE_ENDIAN
+    return unpack_halffloat(p, 1);
+#else
+    return unpack_halffloat(p, 0);
+#endif    
+}
+
 static PyObject *
 nu_float(const char *p, const formatdef *f)
 {
@@ -680,6 +717,16 @@ np_bool(char *p, PyObject *v, const formatdef *f)
     return 0;
 }
 
+static int
+np_halffloat(char *p, PyObject *v, const formatdef *f)
+{
+#if PY_LITTLE_ENDIAN
+    return pack_halffloat(p, v, 1);
+#else
+    return pack_halffloat(p, v, 0);
+#endif
+}
+
 static int
 np_float(char *p, PyObject *v, const formatdef *f)
 {
@@ -743,6 +790,7 @@ static const formatdef native_table[] = {
     {'Q',       sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_ulonglong,np_ulonglong},
 #endif
     {'?',       sizeof(BOOL_TYPE),      BOOL_ALIGN,     nu_bool,        np_bool},
+    {'e',       sizeof(short),  SHORT_ALIGN,    nu_halffloat,   np_halffloat},
     {'f',       sizeof(float),  FLOAT_ALIGN,    nu_float,       np_float},
     {'d',       sizeof(double), DOUBLE_ALIGN,   nu_double,      np_double},
     {'P',       sizeof(void *), VOID_P_ALIGN,   nu_void_p,      np_void_p},
@@ -825,6 +873,12 @@ bu_ulonglong(const char *p, const formatdef *f)
 #endif
 }
 
+static PyObject *
+bu_halffloat(const char *p, const formatdef *f)
+{
+    return unpack_halffloat(p, 0);
+}
+
 static PyObject *
 bu_float(const char *p, const formatdef *f)
 {
@@ -921,6 +975,12 @@ bp_ulonglong(char *p, PyObject *v, const formatdef *f)
     return res;
 }
 
+static int
+bp_halffloat(char *p, PyObject *v, const formatdef *f)
+{
+    return pack_halffloat(p, v, 0);
+}
+
 static int
 bp_float(char *p, PyObject *v, const formatdef *f)
 {
@@ -972,6 +1032,7 @@ static formatdef bigendian_table[] = {
     {'q',       8,              0,              bu_longlong,    bp_longlong},
     {'Q',       8,              0,              bu_ulonglong,   bp_ulonglong},
     {'?',       1,              0,              bu_bool,        bp_bool},
+    {'e',       2,              0,              bu_halffloat,   bp_halffloat},
     {'f',       4,              0,              bu_float,       bp_float},
     {'d',       8,              0,              bu_double,      bp_double},
     {0}
@@ -1053,6 +1114,12 @@ lu_ulonglong(const char *p, const formatdef *f)
 #endif
 }
 
+static PyObject *
+lu_halffloat(const char *p, const formatdef *f)
+{
+    return unpack_halffloat(p, 1);
+}
+
 static PyObject *
 lu_float(const char *p, const formatdef *f)
 {
@@ -1141,6 +1208,12 @@ lp_ulonglong(char *p, PyObject *v, const formatdef *f)
     return res;
 }
 
+static int
+lp_halffloat(char *p, PyObject *v, const formatdef *f)
+{
+    return pack_halffloat(p, v, 1);
+}
+
 static int
 lp_float(char *p, PyObject *v, const formatdef *f)
 {
@@ -1182,6 +1255,7 @@ static formatdef lilendian_table[] = {
     {'Q',       8,              0,              lu_ulonglong,   lp_ulonglong},
     {'?',       1,              0,              bu_bool,        bp_bool}, /* Std rep not endian dep,
         but potentially different from native rep -- reuse bx_bool funcs. */
+    {'e',       2,              0,              lu_halffloat,   lp_halffloat},
     {'f',       4,              0,              lu_float,       lp_float},
     {'d',       8,              0,              lu_double,      lp_double},
     {0}
@@ -2239,7 +2313,7 @@ these can be preceded by a decimal repeat count:\n\
   x: pad byte (no data); c:char; b:signed byte; B:unsigned byte;\n\
   ?: _Bool (requires C99; if not available, char is used instead)\n\
   h:short; H:unsigned short; i:int; I:unsigned int;\n\
-  l:long; L:unsigned long; f:float; d:double.\n\
+  l:long; L:unsigned long; f:float; d:double; e:half-float.\n\
 Special cases (preceding decimal count indicates length):\n\
   s:string (array of char); p: pascal string (with count byte).\n\
 Special cases (only available in native format):\n\
diff --git a/Objects/floatobject.c b/Objects/floatobject.c