forked from Perl/perl5
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnumeric.c
1987 lines (1710 loc) · 61.3 KB
/
numeric.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* numeric.c
*
* Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
* 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
*
*/
/*
* "That only makes eleven (plus one mislaid) and not fourteen,
* unless wizards count differently to other people." --Beorn
*
* [p.115 of _The Hobbit_: "Queer Lodgings"]
*/
/*
=for apidoc_section Numeric Functions
=cut
This file contains all the stuff needed by perl for manipulating numeric
values, including such things as replacements for the OS's atof() function
*/
#include "EXTERN.h"
#define PERL_IN_NUMERIC_C
#include "perl.h"
#ifdef Perl_strtod
PERL_STATIC_INLINE NV
S_strtod(pTHX_ const char * const s, char ** e)
{
DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
NV result;
STORE_LC_NUMERIC_SET_TO_NEEDED();
# ifdef USE_QUADMATH
result = strtoflt128(s, e);
# elif defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE) \
&& defined(USE_LONG_DOUBLE)
# if defined(__MINGW64_VERSION_MAJOR)
/***********************************************
We are unable to use strtold because of
https://sourceforge.net/p/mingw-w64/bugs/711/
&
https://sourceforge.net/p/mingw-w64/bugs/725/
but __mingw_strtold is fine.
***********************************************/
result = __mingw_strtold(s, e);
# else
result = strtold(s, e);
# endif
# elif defined(HAS_STRTOD)
result = strtod(s, e);
# else
# error No strtod() equivalent found
# endif
RESTORE_LC_NUMERIC();
return result;
}
#endif /* #ifdef Perl_strtod */
/*
=for apidoc my_strtod
This function is equivalent to the libc strtod() function, and is available
even on platforms that lack plain strtod(). Its return value is the best
available precision depending on platform capabilities and F<Configure>
options.
It properly handles the locale radix character, meaning it expects a dot except
when called from within the scope of S<C<use locale>>, in which case the radix
character should be that specified by the current locale.
The synonym Strtod() may be used instead.
=cut
*/
NV
Perl_my_strtod(const char * const s, char **e)
{
dTHX;
PERL_ARGS_ASSERT_MY_STRTOD;
#ifdef Perl_strtod
return S_strtod(aTHX_ s, e);
#else
{
NV result;
char ** end_ptr = NULL;
*end_ptr = my_atof2(s, &result);
if (e) {
*e = *end_ptr;
}
if (! *end_ptr) {
result = 0.0;
}
return result;
}
#endif
}
U32
Perl_cast_ulong(NV f)
{
if (f < 0.0)
return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
if (f < U32_MAX_P1) {
#if CASTFLAGS & 2
if (f < U32_MAX_P1_HALF)
return (U32) f;
f -= U32_MAX_P1_HALF;
return ((U32) f) | (1 + (U32_MAX >> 1));
#else
return (U32) f;
#endif
}
return f > 0 ? U32_MAX : 0 /* NaN */;
}
I32
Perl_cast_i32(NV f)
{
if (f < I32_MAX_P1)
return f < I32_MIN ? I32_MIN : (I32) f;
if (f < U32_MAX_P1) {
#if CASTFLAGS & 2
if (f < U32_MAX_P1_HALF)
return (I32)(U32) f;
f -= U32_MAX_P1_HALF;
return (I32)(((U32) f) | (1 + (U32_MAX >> 1)));
#else
return (I32)(U32) f;
#endif
}
return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
}
IV
Perl_cast_iv(NV f)
{
if (f < IV_MAX_P1)
return f < IV_MIN ? IV_MIN : (IV) f;
if (f < UV_MAX_P1) {
#if CASTFLAGS & 2
/* For future flexibility allowing for sizeof(UV) >= sizeof(IV) */
if (f < UV_MAX_P1_HALF)
return (IV)(UV) f;
f -= UV_MAX_P1_HALF;
return (IV)(((UV) f) | (1 + (UV_MAX >> 1)));
#else
return (IV)(UV) f;
#endif
}
return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
}
UV
Perl_cast_uv(NV f)
{
if (f < 0.0)
return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
if (f < UV_MAX_P1) {
#if CASTFLAGS & 2
if (f < UV_MAX_P1_HALF)
return (UV) f;
f -= UV_MAX_P1_HALF;
return ((UV) f) | (1 + (UV_MAX >> 1));
#else
return (UV) f;
#endif
}
return f > 0 ? UV_MAX : 0 /* NaN */;
}
/*
=for apidoc grok_bin
converts a string representing a binary number to numeric form.
On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The
scan stops at the end of the string, or at just before the first invalid
character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
encountering an invalid character (except NUL) will also trigger a warning. On
return C<*len_p> is set to the length of the scanned string, and C<*flags>
gives output flags.
If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_bin>
returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
and writes an approximation of the correct value into C<*result> (which is an
NV; or the approximation is discarded if C<result> is NULL).
The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless
C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.
If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
digits may be separated from each other by a single underscore; also a single
leading underscore is accepted.
=for apidoc Amnh||PERL_SCAN_ALLOW_UNDERSCORES
=for apidoc Amnh||PERL_SCAN_DISALLOW_PREFIX
=for apidoc Amnh||PERL_SCAN_GREATER_THAN_UV_MAX
=for apidoc Amnh||PERL_SCAN_SILENT_ILLDIGIT
=cut
Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
which suppresses any message for non-portable numbers that are still valid
on this platform.
*/
UV
Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
PERL_ARGS_ASSERT_GROK_BIN;
return grok_bin(start, len_p, flags, result);
}
/*
=for apidoc grok_hex
converts a string representing a hex number to numeric form.
On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The
scan stops at the end of the string, or at just before the first invalid
character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
encountering an invalid character (except NUL) will also trigger a warning. On
return C<*len_p> is set to the length of the scanned string, and C<*flags>
gives output flags.
If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_hex>
returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
and writes an approximation of the correct value into C<*result> (which is an
NV; or the approximation is discarded if C<result> is NULL).
The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless
C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.
If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
digits may be separated from each other by a single underscore; also a single
leading underscore is accepted.
=cut
Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
which suppresses any message for non-portable numbers, but which are valid
on this platform. But, C<*flags> will have the corresponding flag bit set.
*/
UV
Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
PERL_ARGS_ASSERT_GROK_HEX;
return grok_hex(start, len_p, flags, result);
}
/*
=for apidoc grok_oct
converts a string representing an octal number to numeric form.
On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The
scan stops at the end of the string, or at just before the first invalid
character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
encountering an invalid character (except NUL) will also trigger a warning. On
return C<*len_p> is set to the length of the scanned string, and C<*flags>
gives output flags.
If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_oct>
returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
and writes an approximation of the correct value into C<*result> (which is an
NV; or the approximation is discarded if C<result> is NULL).
If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
digits may be separated from each other by a single underscore; also a single
leading underscore is accepted.
The C<PERL_SCAN_DISALLOW_PREFIX> flag is always treated as being set for
this function.
=cut
Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
which suppresses any message for non-portable numbers, but which are valid
on this platform.
*/
UV
Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
PERL_ARGS_ASSERT_GROK_OCT;
return grok_oct(start, len_p, flags, result);
}
STATIC void
S_output_non_portable(pTHX_ const U8 base)
{
/* Display the proper message for a number in the given input base not
* fitting in 32 bits */
const char * which = (base == 2)
? "Binary number > 0b11111111111111111111111111111111"
: (base == 8)
? "Octal number > 037777777777"
: "Hexadecimal number > 0xffffffff";
PERL_ARGS_ASSERT_OUTPUT_NON_PORTABLE;
/* Also there are listings for the other two. That's because, since they
* are the first word, it would be hard for a user to find them there
* starting with a %s */
/* diag_listed_as: Hexadecimal number > 0xffffffff non-portable */
Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), "%s non-portable", which);
}
UV
Perl_grok_bin_oct_hex(pTHX_ const char *start,
STRLEN *len_p,
I32 *flags,
NV *result,
const unsigned shift, /* 1 for binary; 3 for octal;
4 for hex */
const U8 class_bit,
const char prefix
)
{
const char *s0 = start;
const char *s;
STRLEN len = *len_p;
STRLEN bytes_so_far; /* How many real digits have been processed */
UV value = 0;
NV value_nv = 0;
const PERL_UINT_FAST8_T base = 1 << shift; /* 2, 8, or 16 */
const UV max_div= UV_MAX / base; /* Value above which, the next digit
processed would overflow */
const I32 input_flags = *flags;
const bool allow_underscores =
cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES);
bool overflowed = FALSE;
/* In overflows, this keeps track of how much to multiply the overflowed NV
* by as we continue to parse the remaining digits */
NV factor = 0;
/* This function unifies the core of grok_bin, grok_oct, and grok_hex. It
* is optimized for hex conversion. For example, it uses XDIGIT_VALUE to
* find the numeric value of a digit. That requires more instructions than
* OCTAL_VALUE would, but gives the same result for the narrowed range of
* octal digits; same for binary. If it were ever critical to squeeze more
* performance from this, the function could become grok_hex, and a regen
* perl script could scan it and write out two edited copies for the other
* two functions. That would improve the performance of all three
* somewhat. Besides eliminating XDIGIT_VALUE for the other two, extra
* parameters are now passed to this to avoid conditionals. Those could
* become declared consts, like:
* const U8 base = 16;
* const U8 base = 8;
* ...
*/
PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX;
ASSUME(inRANGE(shift, 1, 4) && shift != 2);
/* Clear output flags; unlikely to find a problem that sets them */
*flags = 0;
if (!(input_flags & PERL_SCAN_DISALLOW_PREFIX)) {
/* strip off leading b or 0b; x or 0x.
for compatibility silently suffer "b" and "0b" as valid binary; "x"
and "0x" as valid hex numbers. */
if (len >= 1) {
if (isALPHA_FOLD_EQ(s0[0], prefix)) {
s0++;
len--;
}
else if (len >= 2 && s0[0] == '0' && (isALPHA_FOLD_EQ(s0[1], prefix))) {
s0+=2;
len-=2;
}
}
}
s = s0; /* s0 potentially advanced from 'start' */
/* Unroll the loop so that the first 8 digits are branchless except for the
* switch. A ninth hex one overflows a 32 bit word. */
switch (len) {
case 0:
return 0;
default:
if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
value = (value << shift) | XDIGIT_VALUE(*s);
s++;
/* FALLTHROUGH */
case 7:
if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
value = (value << shift) | XDIGIT_VALUE(*s);
s++;
/* FALLTHROUGH */
case 6:
if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
value = (value << shift) | XDIGIT_VALUE(*s);
s++;
/* FALLTHROUGH */
case 5:
if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
value = (value << shift) | XDIGIT_VALUE(*s);
s++;
/* FALLTHROUGH */
case 4:
if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
value = (value << shift) | XDIGIT_VALUE(*s);
s++;
/* FALLTHROUGH */
case 3:
if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
value = (value << shift) | XDIGIT_VALUE(*s);
s++;
/* FALLTHROUGH */
case 2:
if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
value = (value << shift) | XDIGIT_VALUE(*s);
s++;
/* FALLTHROUGH */
case 1:
if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
value = (value << shift) | XDIGIT_VALUE(*s);
if (LIKELY(len <= 8)) {
return value;
}
s++;
break;
}
bytes_so_far = s - s0;
factor = shift << bytes_so_far;
len -= bytes_so_far;
for (; len--; s++) {
if (_generic_isCC(*s, class_bit)) {
/* Write it in this wonky order with a goto to attempt to get the
compiler to make the common case integer-only loop pretty tight.
With gcc seems to be much straighter code than old scan_hex.
(khw suspects that adding a LIKELY() just above would do the
same thing) */
redo:
if (LIKELY(value <= max_div)) {
value = (value << shift) | XDIGIT_VALUE(*s);
/* Note XDIGIT_VALUE() is branchless, works on binary
* and octal as well, so can be used here, without
* slowing those down */
factor *= 1 << shift;
continue;
}
/* Bah. We are about to overflow. Instead, add the unoverflowed
* value to an NV that contains an approximation to the correct
* value. Each time through the loop we have increased 'factor' so
* that it gives how much the current approximation needs to
* effectively be shifted to make room for this new value */
value_nv *= factor;
value_nv += (NV) value;
/* Then we keep accumulating digits, until all are parsed. We
* start over using the current input value. This will be added to
* 'value_nv' eventually, either when all digits are gone, or we
* have overflowed this fresh start. */
value = XDIGIT_VALUE(*s);
factor = 1 << shift;
if (! overflowed) {
overflowed = TRUE;
if ( ! (input_flags & PERL_SCAN_SILENT_OVERFLOW)
&& ckWARN_d(WARN_OVERFLOW))
{
Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
"Integer overflow in %s number",
(base == 16) ? "hexadecimal"
: (base == 2)
? "binary"
: "octal");
}
}
continue;
}
if ( *s == '_'
&& len
&& allow_underscores
&& _generic_isCC(s[1], class_bit)
/* Don't allow a leading underscore if the only-medial bit is
* set */
&& ( LIKELY(s > s0)
|| UNLIKELY((input_flags & PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)
!= PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)))
{
--len;
++s;
goto redo;
}
if (*s) {
if ( ! (input_flags & PERL_SCAN_SILENT_ILLDIGIT)
&& ckWARN(WARN_DIGIT))
{
if (base != 8) {
Perl_warner(aTHX_ packWARN(WARN_DIGIT),
"Illegal %s digit '%c' ignored",
((base == 2)
? "binary"
: "hexadecimal"),
*s);
}
else if (isDIGIT(*s)) { /* octal base */
/* Allow \octal to work the DWIM way (that is, stop
* scanning as soon as non-octal characters are seen,
* complain only if someone seems to want to use the digits
* eight and nine. Since we know it is not octal, then if
* isDIGIT, must be an 8 or 9). */
Perl_warner(aTHX_ packWARN(WARN_DIGIT),
"Illegal octal digit '%c' ignored", *s);
}
}
if (input_flags & PERL_SCAN_NOTIFY_ILLDIGIT) {
*flags |= PERL_SCAN_NOTIFY_ILLDIGIT;
}
}
break;
}
*len_p = s - start;
if (LIKELY(! overflowed)) {
#if UVSIZE > 4
if ( UNLIKELY(value > 0xffffffff)
&& ! (input_flags & PERL_SCAN_SILENT_NON_PORTABLE))
{
output_non_portable(base);
*flags |= PERL_SCAN_SILENT_NON_PORTABLE;
}
#endif
return value;
}
/* Overflowed: Calculate the final overflow approximation */
value_nv *= factor;
value_nv += (NV) value;
output_non_portable(base);
*flags |= PERL_SCAN_GREATER_THAN_UV_MAX
| PERL_SCAN_SILENT_NON_PORTABLE;
if (result)
*result = value_nv;
return UV_MAX;
}
/*
=for apidoc scan_bin
For backwards compatibility. Use C<grok_bin> instead.
=for apidoc scan_hex
For backwards compatibility. Use C<grok_hex> instead.
=for apidoc scan_oct
For backwards compatibility. Use C<grok_oct> instead.
=cut
*/
NV
Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
{
NV rnv;
I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
const UV ruv = grok_bin (start, &len, &flags, &rnv);
PERL_ARGS_ASSERT_SCAN_BIN;
*retlen = len;
return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
}
NV
Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
{
NV rnv;
I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
const UV ruv = grok_oct (start, &len, &flags, &rnv);
PERL_ARGS_ASSERT_SCAN_OCT;
*retlen = len;
return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
}
NV
Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
{
NV rnv;
I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
const UV ruv = grok_hex (start, &len, &flags, &rnv);
PERL_ARGS_ASSERT_SCAN_HEX;
*retlen = len;
return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
}
/*
=for apidoc grok_numeric_radix
Scan and skip for a numeric decimal separator (radix).
=cut
*/
bool
Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
{
PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
#ifdef USE_LOCALE_NUMERIC
if (IN_LC(LC_NUMERIC)) {
STRLEN len;
char * radix;
bool matches_radix = FALSE;
DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
radix = SvPV(PL_numeric_radix_sv, len);
radix = savepvn(radix, len);
RESTORE_LC_NUMERIC();
if (*sp + len <= send) {
matches_radix = memEQ(*sp, radix, len);
}
Safefree(radix);
if (matches_radix) {
*sp += len;
return TRUE;
}
}
#endif
/* always try "." if numeric radix didn't match because
* we may have data from different locales mixed */
if (*sp < send && **sp == '.') {
++*sp;
return TRUE;
}
return FALSE;
}
/*
=for apidoc grok_infnan
Helper for C<grok_number()>, accepts various ways of spelling "infinity"
or "not a number", and returns one of the following flag combinations:
IS_NUMBER_INFINITY
IS_NUMBER_NAN
IS_NUMBER_INFINITY | IS_NUMBER_NEG
IS_NUMBER_NAN | IS_NUMBER_NEG
0
possibly |-ed with C<IS_NUMBER_TRAILING>.
If an infinity or a not-a-number is recognized, C<*sp> will point to
one byte past the end of the recognized string. If the recognition fails,
zero is returned, and C<*sp> will not move.
=for apidoc Amnh|bool|IS_NUMBER_GREATER_THAN_UV_MAX
=for apidoc Amnh|bool|IS_NUMBER_INFINITY
=for apidoc Amnh|bool|IS_NUMBER_IN_UV
=for apidoc Amnh|bool|IS_NUMBER_NAN
=for apidoc Amnh|bool|IS_NUMBER_NEG
=for apidoc Amnh|bool|IS_NUMBER_NOT_INT
=cut
*/
int
Perl_grok_infnan(pTHX_ const char** sp, const char* send)
{
const char* s = *sp;
int flags = 0;
#if defined(NV_INF) || defined(NV_NAN)
bool odh = FALSE; /* one-dot-hash: 1.#INF */
PERL_ARGS_ASSERT_GROK_INFNAN;
if (*s == '+') {
s++; if (s == send) return 0;
}
else if (*s == '-') {
flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */
s++; if (s == send) return 0;
}
if (*s == '1') {
/* Visual C: 1.#SNAN, -1.#QNAN, 1#INF, 1.#IND (maybe also 1.#NAN)
* Let's keep the dot optional. */
s++; if (s == send) return 0;
if (*s == '.') {
s++; if (s == send) return 0;
}
if (*s == '#') {
s++; if (s == send) return 0;
} else
return 0;
odh = TRUE;
}
if (isALPHA_FOLD_EQ(*s, 'I')) {
/* INF or IND (1.#IND is "indeterminate", a certain type of NAN) */
s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
s++; if (s == send) return 0;
if (isALPHA_FOLD_EQ(*s, 'F')) {
s++;
if (s < send && (isALPHA_FOLD_EQ(*s, 'I'))) {
int fail =
flags | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT | IS_NUMBER_TRAILING;
s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return fail;
s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return fail;
s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return fail;
s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail;
s++;
} else if (odh) {
while (s < send && *s == '0') { /* 1.#INF00 */
s++;
}
}
while (s < send && isSPACE(*s))
s++;
if (s < send && *s) {
flags |= IS_NUMBER_TRAILING;
}
flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
}
else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */
s++;
flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
while (s < send && *s == '0') { /* 1.#IND00 */
s++;
}
if (s < send && *s) {
flags |= IS_NUMBER_TRAILING;
}
} else
return 0;
}
else {
/* Maybe NAN of some sort */
if (isALPHA_FOLD_EQ(*s, 'S') || isALPHA_FOLD_EQ(*s, 'Q')) {
/* snan, qNaN */
/* XXX do something with the snan/qnan difference */
s++; if (s == send) return 0;
}
if (isALPHA_FOLD_EQ(*s, 'N')) {
s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return 0;
s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
s++;
flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
if (s == send) {
return flags;
}
/* NaN can be followed by various stuff (NaNQ, NaNS), but
* there are also multiple different NaN values, and some
* implementations output the "payload" values,
* e.g. NaN123, NAN(abc), while some legacy implementations
* have weird stuff like NaN%. */
if (isALPHA_FOLD_EQ(*s, 'q') ||
isALPHA_FOLD_EQ(*s, 's')) {
/* "nanq" or "nans" are ok, though generating
* these portably is tricky. */
s++;
if (s == send) {
return flags;
}
}
if (*s == '(') {
/* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */
const char *t;
s++;
if (s == send) {
return flags | IS_NUMBER_TRAILING;
}
t = s + 1;
while (t < send && *t && *t != ')') {
t++;
}
if (t == send) {
return flags | IS_NUMBER_TRAILING;
}
if (*t == ')') {
int nantype;
UV nanval;
if (s[0] == '0' && s + 2 < t &&
isALPHA_FOLD_EQ(s[1], 'x') &&
isXDIGIT(s[2])) {
STRLEN len = t - s;
I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
nanval = grok_hex(s, &len, &flags, NULL);
if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
nantype = 0;
} else {
nantype = IS_NUMBER_IN_UV;
}
s += len;
} else if (s[0] == '0' && s + 2 < t &&
isALPHA_FOLD_EQ(s[1], 'b') &&
(s[2] == '0' || s[2] == '1')) {
STRLEN len = t - s;
I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
nanval = grok_bin(s, &len, &flags, NULL);
if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
nantype = 0;
} else {
nantype = IS_NUMBER_IN_UV;
}
s += len;
} else {
const char *u;
nantype =
grok_number_flags(s, t - s, &nanval,
PERL_SCAN_TRAILING |
PERL_SCAN_ALLOW_UNDERSCORES);
/* Unfortunately grok_number_flags() doesn't
* tell how far we got and the ')' will always
* be "trailing", so we need to double-check
* whether we had something dubious. */
for (u = s; u < t; u++) {
if (!isDIGIT(*u)) {
flags |= IS_NUMBER_TRAILING;
break;
}
}
s = u;
}
/* XXX Doesn't do octal: nan("0123").
* Probably not a big loss. */
if ((nantype & IS_NUMBER_NOT_INT) ||
!(nantype && IS_NUMBER_IN_UV)) {
/* XXX the nanval is currently unused, that is,
* not inserted as the NaN payload of the NV.
* But the above code already parses the C99
* nan(...) format. See below, and see also
* the nan() in POSIX.xs.
*
* Certain configuration combinations where
* NVSIZE is greater than UVSIZE mean that
* a single UV cannot contain all the possible
* NaN payload bits. There would need to be
* some more generic syntax than "nan($uv)".
*
* Issues to keep in mind:
*
* (1) In most common cases there would
* not be an integral number of bytes that
* could be set, only a certain number of bits.
* For example for the common case of
* NVSIZE == UVSIZE == 8 there is room for 52
* bits in the payload, but the most significant
* bit is commonly reserved for the
* signaling/quiet bit, leaving 51 bits.
* Furthermore, the C99 nan() is supposed
* to generate quiet NaNs, so it is doubtful
* whether it should be able to generate
* signaling NaNs. For the x86 80-bit doubles
* (if building a long double Perl) there would
* be 62 bits (s/q bit being the 63rd).
*
* (2) Endianness of the payload bits. If the
* payload is specified as an UV, the low-order
* bits of the UV are naturally little-endianed
* (rightmost) bits of the payload. The endianness
* of UVs and NVs can be different. */
return 0;
}
if (s < t) {
flags |= IS_NUMBER_TRAILING;
}
} else {
/* Looked like nan(...), but no close paren. */
flags |= IS_NUMBER_TRAILING;
}
} else {
while (s < send && isSPACE(*s))
s++;
if (s < send && *s) {
/* Note that we here implicitly accept (parse as
* "nan", but with warnings) also any other weird
* trailing stuff for "nan". In the above we just
* check that if we got the C99-style "nan(...)",
* the "..." looks sane.
* If in future we accept more ways of specifying
* the nan payload, the accepting would happen around
* here. */
flags |= IS_NUMBER_TRAILING;
}
}
s = send;
}
else
return 0;
}
while (s < send && isSPACE(*s))
s++;
#else
PERL_UNUSED_ARG(send);
#endif /* #if defined(NV_INF) || defined(NV_NAN) */
*sp = s;
return flags;
}
/*
=for apidoc grok_number_flags
Recognise (or not) a number. The type of the number is returned
(0 if unrecognised), otherwise it is a bit-ORed combination of
C<IS_NUMBER_IN_UV>, C<IS_NUMBER_GREATER_THAN_UV_MAX>, C<IS_NUMBER_NOT_INT>,
C<IS_NUMBER_NEG>, C<IS_NUMBER_INFINITY>, C<IS_NUMBER_NAN> (defined in perl.h).
If the value of the number can fit in a UV, it is returned in C<*valuep>.
C<IS_NUMBER_IN_UV> will be set to indicate that C<*valuep> is valid, C<IS_NUMBER_IN_UV>
will never be set unless C<*valuep> is valid, but C<*valuep> may have been assigned
to during processing even though C<IS_NUMBER_IN_UV> is not set on return.
If C<valuep> is C<NULL>, C<IS_NUMBER_IN_UV> will be set for the same cases as when
C<valuep> is non-C<NULL>, but no actual assignment (or SEGV) will occur.
C<IS_NUMBER_NOT_INT> will be set with C<IS_NUMBER_IN_UV> if trailing decimals were
seen (in which case C<*valuep> gives the true value truncated to an integer), and
C<IS_NUMBER_NEG> if the number is negative (in which case C<*valuep> holds the
absolute value). C<IS_NUMBER_IN_UV> is not set if e notation was used or the