decimal-to-binary.cpp
13.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
//===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "big-radix-floating-point.h"
#include "flang/Common/bit-population-count.h"
#include "flang/Common/leading-zero-bit-count.h"
#include "flang/Decimal/binary-floating-point.h"
#include "flang/Decimal/decimal.h"
#include <cinttypes>
#include <cstring>
#include <ctype.h>
namespace Fortran::decimal {
template <int PREC, int LOG10RADIX>
bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
const char *&p, bool &inexact) {
SetToZero();
while (*p == ' ') {
++p;
}
const char *q{p};
isNegative_ = *q == '-';
if (*q == '-' || *q == '+') {
++q;
}
const char *start{q};
while (*q == '0') {
++q;
}
const char *first{q};
for (; *q >= '0' && *q <= '9'; ++q) {
}
const char *point{nullptr};
if (*q == '.') {
point = q;
for (++q; *q >= '0' && *q <= '9'; ++q) {
}
}
if (q == start || (q == start + 1 && *start == '.')) {
return false; // require at least one digit
}
// There's a valid number here; set the reference argument to point to
// the first character afterward.
p = q;
// Strip off trailing zeroes
if (point) {
while (q[-1] == '0') {
--q;
}
if (q[-1] == '.') {
point = nullptr;
--q;
}
}
if (!point) {
while (q > first && q[-1] == '0') {
--q;
++exponent_;
}
}
// Trim any excess digits
const char *limit{first + maxDigits * log10Radix + (point != nullptr)};
if (q > limit) {
inexact = true;
if (point >= limit) {
q = point;
point = nullptr;
}
if (!point) {
exponent_ += q - limit;
}
q = limit;
}
if (point) {
exponent_ -= static_cast<int>(q - point - 1);
}
if (q == first) {
exponent_ = 0; // all zeros
}
// Rack the decimal digits up into big Digits.
for (auto times{radix}; q-- > first;) {
if (*q != '.') {
if (times == radix) {
digit_[digits_++] = *q - '0';
times = 10;
} else {
digit_[digits_ - 1] += times * (*q - '0');
times *= 10;
}
}
}
// Look for an optional exponent field.
q = p;
switch (*q) {
case 'e':
case 'E':
case 'd':
case 'D':
case 'q':
case 'Q': {
bool negExpo{*++q == '-'};
if (*q == '-' || *q == '+') {
++q;
}
if (*q >= '0' && *q <= '9') {
int expo{0};
while (*q == '0') {
++q;
}
const char *expDig{q};
while (*q >= '0' && *q <= '9') {
expo = 10 * expo + *q++ - '0';
}
if (q >= expDig + 8) {
// There's a ridiculous number of nonzero exponent digits.
// The decimal->binary conversion routine will cope with
// returning 0 or Inf, but we must ensure that "expo" didn't
// overflow back around to something legal.
expo = 10 * Real::decimalRange;
exponent_ = 0;
}
p = q; // exponent was valid
if (negExpo) {
exponent_ -= expo;
} else {
exponent_ += expo;
}
}
} break;
default:
break;
}
return true;
}
template <int PREC, int LOG10RADIX>
void BigRadixFloatingPointNumber<PREC,
LOG10RADIX>::LoseLeastSignificantDigit() {
Digit LSD{digit_[0]};
for (int j{0}; j < digits_ - 1; ++j) {
digit_[j] = digit_[j + 1];
}
digit_[digits_ - 1] = 0;
bool incr{false};
switch (rounding_) {
case RoundNearest:
incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0);
break;
case RoundUp:
incr = LSD > 0 && !isNegative_;
break;
case RoundDown:
incr = LSD > 0 && isNegative_;
break;
case RoundToZero:
break;
case RoundCompatible:
incr = LSD >= radix / 2;
break;
}
for (int j{0}; (digit_[j] += incr) == radix; ++j) {
digit_[j] = 0;
}
}
// This local utility class represents an unrounded nonnegative
// binary floating-point value with an unbiased (i.e., signed)
// binary exponent, an integer value (not a fraction) with an implied
// binary point to its *right*, and some guard bits for rounding.
template <int PREC> class IntermediateFloat {
public:
static constexpr int precision{PREC};
using IntType = common::HostUnsignedIntType<precision>;
static constexpr IntType topBit{IntType{1} << (precision - 1)};
static constexpr IntType mask{topBit + (topBit - 1)};
IntermediateFloat() {}
IntermediateFloat(const IntermediateFloat &) = default;
// Assumes that exponent_ is valid on entry, and may increment it.
// Returns the number of guard_ bits that have been determined.
template <typename UINT> bool SetTo(UINT n) {
static constexpr int nBits{CHAR_BIT * sizeof n};
if constexpr (precision >= nBits) {
value_ = n;
guard_ = 0;
return 0;
} else {
int shift{common::BitsNeededFor(n) - precision};
if (shift <= 0) {
value_ = n;
guard_ = 0;
return 0;
} else {
value_ = n >> shift;
exponent_ += shift;
n <<= nBits - shift;
guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0);
return shift;
}
}
}
void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; }
bool IsFull() const { return value_ >= topBit; }
void AdjustExponent(int by) { exponent_ += by; }
void SetGuard(int g) {
guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1);
}
ConversionToBinaryResult<PREC> ToBinary(
bool isNegative, FortranRounding) const;
private:
static constexpr int guardBits{3}; // guard, round, sticky
using GuardType = int;
static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)};
IntType value_{0};
GuardType guard_{0};
int exponent_{0};
};
template <int PREC>
ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
bool isNegative, FortranRounding rounding) const {
using Binary = BinaryFloatingPointNumber<PREC>;
// Create a fraction with a binary point to the left of the integer
// value_, and bias the exponent.
IntType fraction{value_};
GuardType guard{guard_};
int expo{exponent_ + Binary::exponentBias + (precision - 1)};
while (expo < 1 && (fraction > 0 || guard > oneHalf)) {
guard = (guard & 1) | (guard >> 1) |
((static_cast<GuardType>(fraction) & 1) << (guardBits - 1));
fraction >>= 1;
++expo;
}
int flags{Exact};
if (guard != 0) {
flags |= Inexact;
}
if (fraction == 0 && guard <= oneHalf) {
return {Binary{}, static_cast<enum ConversionResultFlags>(flags)};
}
// The value is nonzero; normalize it.
while (fraction < topBit && expo > 1) {
--expo;
fraction = fraction * 2 + (guard >> (guardBits - 2));
guard = (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1);
}
// Apply rounding
bool incr{false};
switch (rounding) {
case RoundNearest:
incr = guard > oneHalf || (guard == oneHalf && (fraction & 1));
break;
case RoundUp:
incr = guard != 0 && !isNegative;
break;
case RoundDown:
incr = guard != 0 && isNegative;
break;
case RoundToZero:
break;
case RoundCompatible:
incr = guard >= oneHalf;
break;
}
if (incr) {
if (fraction == mask) {
// rounding causes a carry
++expo;
fraction = topBit;
} else {
++fraction;
}
}
if (expo == 1 && fraction < topBit) {
expo = 0; // subnormal
}
if (expo >= Binary::maxExponent) {
expo = Binary::maxExponent; // Inf
flags |= Overflow;
fraction = 0;
}
using Raw = typename Binary::RawType;
Raw raw = static_cast<Raw>(isNegative) << (Binary::bits - 1);
raw |= static_cast<Raw>(expo) << Binary::significandBits;
if constexpr (Binary::isImplicitMSB) {
fraction &= ~topBit;
}
raw |= fraction;
return {Binary(raw), static_cast<enum ConversionResultFlags>(flags)};
}
template <int PREC, int LOG10RADIX>
ConversionToBinaryResult<PREC>
BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
// On entry, *this holds a multi-precision integer value in a radix of a
// large power of ten. Its radix point is defined to be to the right of its
// digits, and "exponent_" is the power of ten by which it is to be scaled.
Normalize();
if (digits_ == 0) { // zero value
return {Real{SignBit()}};
}
// The value is not zero: x = D. * 10.**E
// Shift our perspective on the radix (& decimal) point so that
// it sits to the *left* of the digits: i.e., x = .D * 10.**E
exponent_ += digits_ * log10Radix;
// Sanity checks for ridiculous exponents
static constexpr int crazy{2 * Real::decimalRange + log10Radix};
if (exponent_ < -crazy) { // underflow to +/-0.
return {Real{SignBit()}, Inexact};
} else if (exponent_ > crazy) { // overflow to +/-Inf.
return {Real{Infinity()}, Overflow};
}
// Apply any negative decimal exponent by multiplication
// by a power of two, adjusting the binary exponent to compensate.
IntermediateFloat<PREC> f;
while (exponent_ < log10Radix) {
// x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9)
f.AdjustExponent(-9);
digitLimit_ = digits_;
if (int carry{MultiplyWithoutNormalization<512>()}) {
// x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
PushCarry(carry);
exponent_ += log10Radix;
}
}
// Apply any positive decimal exponent greater than
// is needed to treat the topmost digit as an integer
// part by multiplying by 10 or 10000 repeatedly.
while (exponent_ > log10Radix) {
digitLimit_ = digits_;
int carry;
if (exponent_ >= log10Radix + 4) {
// x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4)
exponent_ -= 4;
carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>();
f.AdjustExponent(4);
} else {
// x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1)
--exponent_;
carry = MultiplyWithoutNormalization<5>();
f.AdjustExponent(1);
}
if (carry != 0) {
// x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
PushCarry(carry);
exponent_ += log10Radix;
}
}
// So exponent_ is now log10Radix, meaning that the
// MSD can be taken as an integer part and transferred
// to the binary result.
// x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex)
int guardShift{f.SetTo(digit_[--digits_])};
// Transfer additional bits until the result is normal.
digitLimit_ = digits_;
while (!f.IsFull()) {
// x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1)
f.AdjustExponent(-1);
std::uint32_t carry = MultiplyWithoutNormalization<2>();
f.ShiftIn(carry);
}
// Get the next few bits for rounding. Allow for some guard bits
// that may have already been set in f.SetTo() above.
int guard{0};
if (guardShift == 0) {
guard = MultiplyWithoutNormalization<4>();
} else if (guardShift == 1) {
guard = MultiplyWithoutNormalization<2>();
}
guard = guard + guard + !IsZero();
f.SetGuard(guard);
return f.ToBinary(isNegative_, rounding_);
}
template <int PREC, int LOG10RADIX>
ConversionToBinaryResult<PREC>
BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary(const char *&p) {
bool inexact{false};
if (ParseNumber(p, inexact)) {
auto result{ConvertToBinary()};
if (inexact) {
result.flags =
static_cast<enum ConversionResultFlags>(result.flags | Inexact);
}
return result;
} else {
// Could not parse a decimal floating-point number. p has been
// advanced over any leading spaces.
if (toupper(p[0]) == 'N' && toupper(p[1]) == 'A' && toupper(p[2]) == 'N') {
// NaN
p += 3;
return {Real{NaN()}};
} else {
// Try to parse Inf, maybe with a sign
const char *q{p};
isNegative_ = *q == '-';
if (*q == '-' || *q == '+') {
++q;
}
if (toupper(q[0]) == 'I' && toupper(q[1]) == 'N' &&
toupper(q[2]) == 'F') {
p = q + 3;
return {Real{Infinity()}};
} else {
// Invalid input
return {Real{NaN()}, Invalid};
}
}
}
}
template <int PREC>
ConversionToBinaryResult<PREC> ConvertToBinary(
const char *&p, enum FortranRounding rounding) {
return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p);
}
template ConversionToBinaryResult<8> ConvertToBinary<8>(
const char *&, enum FortranRounding);
template ConversionToBinaryResult<11> ConvertToBinary<11>(
const char *&, enum FortranRounding);
template ConversionToBinaryResult<24> ConvertToBinary<24>(
const char *&, enum FortranRounding);
template ConversionToBinaryResult<53> ConvertToBinary<53>(
const char *&, enum FortranRounding);
template ConversionToBinaryResult<64> ConvertToBinary<64>(
const char *&, enum FortranRounding);
template ConversionToBinaryResult<113> ConvertToBinary<113>(
const char *&, enum FortranRounding);
extern "C" {
enum ConversionResultFlags ConvertDecimalToFloat(
const char **p, float *f, enum FortranRounding rounding) {
auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)};
std::memcpy(reinterpret_cast<void *>(f),
reinterpret_cast<const void *>(&result.binary), sizeof *f);
return result.flags;
}
enum ConversionResultFlags ConvertDecimalToDouble(
const char **p, double *d, enum FortranRounding rounding) {
auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)};
std::memcpy(reinterpret_cast<void *>(d),
reinterpret_cast<const void *>(&result.binary), sizeof *d);
return result.flags;
}
#if __x86_64__ && !defined(_MSC_VER)
enum ConversionResultFlags ConvertDecimalToLongDouble(
const char **p, long double *ld, enum FortranRounding rounding) {
auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)};
std::memcpy(reinterpret_cast<void *>(ld),
reinterpret_cast<const void *>(&result.binary), sizeof *ld);
return result.flags;
}
#endif
}
} // namespace Fortran::decimal