blob: 886d55fb8ba1d404316d78c38921778dba080176 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
/*
* Copyright © 2025 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#pragma once
#include <assert.h>
#include <math.h>
#include <stdint.h>
#include "u_math.h"
/* When converting a Float NaN value to BFloat16 it is possible that the
* significand bits that make the value a NaN will be rounded/truncated off
* so ensure at least one significand bit is set.
*/
static inline uint16_t
_mesa_float_nan_to_bfloat_bits(union fi x)
{
assert(isnan(x.f));
return x.ui >> 16 | 1 << 6;
}
/* Round-towards-zero. */
static inline uint16_t
_mesa_float_to_bfloat16_bits_rtz(float f)
{
union fi x;
x.f = f;
if (isnan(f))
return _mesa_float_nan_to_bfloat_bits(x);
return x.ui >> 16;
}
/* Round-to-nearest-even. */
static inline uint16_t
_mesa_float_to_bfloat16_bits_rte(float f)
{
union fi x;
x.f = f;
if (isnan(f))
return _mesa_float_nan_to_bfloat_bits(x);
/* Use the tail part that is discarded to decide rounding,
* break the tie with the nearest even.
*
* Overflow of the significand value will turn to zero and
* increment the exponent. If exponent reaches 0xff, the
* value will correctly end up as +/- Inf.
*/
uint32_t result = x.ui >> 16;
const uint32_t tail = x.ui & 0xffff;
if (tail > 0x8000 || (tail == 0x8000 && (result & 1) == 1))
result++;
return result;
}
static inline float
_mesa_bfloat16_bits_to_float(uint16_t bf)
{
union fi x;
x.ui = bf << 16;
return x.f;
}
|