forked from Perl/perl5
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlocale.c
5729 lines (4650 loc) · 207 KB
/
locale.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* locale.c
*
* Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
* 2002, 2003, 2005, 2006, 2007, 2008 by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
*
*/
/*
* A Elbereth Gilthoniel,
* silivren penna míriel
* o menel aglar elenath!
* Na-chaered palan-díriel
* o galadhremmin ennorath,
* Fanuilos, le linnathon
* nef aear, si nef aearon!
*
* [p.238 of _The Lord of the Rings_, II/i: "Many Meetings"]
*/
/* utility functions for handling locale-specific stuff like what
* character represents the decimal point.
*
* All C programs have an underlying locale. Perl code generally doesn't pay
* any attention to it except within the scope of a 'use locale'. For most
* categories, it accomplishes this by just using different operations if it is
* in such scope than if not. However, various libc functions called by Perl
* are affected by the LC_NUMERIC category, so there are macros in perl.h that
* are used to toggle between the current locale and the C locale depending on
* the desired behavior of those functions at the moment. And, LC_MESSAGES is
* switched to the C locale for outputting the message unless within the scope
* of 'use locale'.
*
* This code now has multi-thread-safe locale handling on systems that support
* that. This is completely transparent to most XS code. On earlier systems,
* it would be possible to emulate thread-safe locales, but this likely would
* involve a lot of locale switching, and would require XS code changes.
* Macros could be written so that the code wouldn't have to know which type of
* system is being used. It's unlikely that we would ever do that, since most
* modern systems support thread-safe locales, but there was code written to
* this end, and is retained, #ifdef'd out.
*/
#include "EXTERN.h"
#define PERL_IN_LOCALE_C
#include "perl_langinfo.h"
#include "perl.h"
#include "reentr.h"
#ifdef I_WCHAR
# include <wchar.h>
#endif
#ifdef I_WCTYPE
# include <wctype.h>
#endif
/* If the environment says to, we can output debugging information during
* initialization. This is done before option parsing, and before any thread
* creation, so can be a file-level static */
#if ! defined(DEBUGGING)
# define debug_initialization 0
# define DEBUG_INITIALIZATION_set(v)
#else
static bool debug_initialization = FALSE;
# define DEBUG_INITIALIZATION_set(v) (debug_initialization = v)
#endif
/* Returns the Unix errno portion; ignoring any others. This is a macro here
* instead of putting it into perl.h, because unclear to khw what should be
* done generally. */
#define GET_ERRNO saved_errno
/* strlen() of a literal string constant. We might want this more general,
* but using it in just this file for now. A problem with more generality is
* the compiler warnings about comparing unlike signs */
#define STRLENs(s) (sizeof("" s "") - 1)
/* Is the C string input 'name' "C" or "POSIX"? If so, and 'name' is the
* return of setlocale(), then this is extremely likely to be the C or POSIX
* locale. However, the output of setlocale() is documented to be opaque, but
* the odds are extremely small that it would return these two strings for some
* other locale. Note that VMS in these two locales includes many non-ASCII
* characters as controls and punctuation (below are hex bytes):
* cntrl: 84-97 9B-9F
* punct: A1-A3 A5 A7-AB B0-B3 B5-B7 B9-BD BF-CF D1-DD DF-EF F1-FD
* Oddly, none there are listed as alphas, though some represent alphabetics
* http://www.nntp.perl.org/group/perl.perl5.porters/2013/02/msg198753.html */
#define isNAME_C_OR_POSIX(name) \
( (name) != NULL \
&& (( *(name) == 'C' && (*(name + 1)) == '\0') \
|| strEQ((name), "POSIX")))
#ifdef USE_LOCALE
/* This code keeps a LRU cache of the UTF-8ness of the locales it has so-far
* looked up. This is in the form of a C string: */
#define UTF8NESS_SEP "\v"
#define UTF8NESS_PREFIX "\f"
/* So, the string looks like:
*
* \vC\a0\vPOSIX\a0\vam_ET\a0\vaf_ZA.utf8\a1\ven_US.UTF-8\a1\0
*
* where the digit 0 after the \a indicates that the locale starting just
* after the preceding \v is not UTF-8, and the digit 1 mean it is. */
STATIC_ASSERT_DECL(STRLENs(UTF8NESS_SEP) == 1);
STATIC_ASSERT_DECL(STRLENs(UTF8NESS_PREFIX) == 1);
#define C_and_POSIX_utf8ness UTF8NESS_SEP "C" UTF8NESS_PREFIX "0" \
UTF8NESS_SEP "POSIX" UTF8NESS_PREFIX "0"
/* The cache is initialized to C_and_POSIX_utf8ness at start up. These are
* kept there always. The remining portion of the cache is LRU, with the
* oldest looked-up locale at the tail end */
STATIC char *
S_stdize_locale(pTHX_ char *locs)
{
/* Standardize the locale name from a string returned by 'setlocale',
* possibly modifying that string.
*
* The typical return value of setlocale() is either
* (1) "xx_YY" if the first argument of setlocale() is not LC_ALL
* (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL
* (the space-separated values represent the various sublocales,
* in some unspecified order). This is not handled by this function.
*
* In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n",
* which is harmful for further use of the string in setlocale(). This
* function removes the trailing new line and everything up through the '='
* */
const char * const s = strchr(locs, '=');
bool okay = TRUE;
PERL_ARGS_ASSERT_STDIZE_LOCALE;
if (s) {
const char * const t = strchr(s, '.');
okay = FALSE;
if (t) {
const char * const u = strchr(t, '\n');
if (u && (u[1] == 0)) {
const STRLEN len = u - s;
Move(s + 1, locs, len, char);
locs[len] = 0;
okay = TRUE;
}
}
}
if (!okay)
Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs);
return locs;
}
/* Two parallel arrays; first the locale categories Perl uses on this system;
* the second array is their names. These arrays are in mostly arbitrary
* order. */
const int categories[] = {
# ifdef USE_LOCALE_NUMERIC
LC_NUMERIC,
# endif
# ifdef USE_LOCALE_CTYPE
LC_CTYPE,
# endif
# ifdef USE_LOCALE_COLLATE
LC_COLLATE,
# endif
# ifdef USE_LOCALE_TIME
LC_TIME,
# endif
# ifdef USE_LOCALE_MESSAGES
LC_MESSAGES,
# endif
# ifdef USE_LOCALE_MONETARY
LC_MONETARY,
# endif
# ifdef USE_LOCALE_ADDRESS
LC_ADDRESS,
# endif
# ifdef USE_LOCALE_IDENTIFICATION
LC_IDENTIFICATION,
# endif
# ifdef USE_LOCALE_MEASUREMENT
LC_MEASUREMENT,
# endif
# ifdef USE_LOCALE_PAPER
LC_PAPER,
# endif
# ifdef USE_LOCALE_TELEPHONE
LC_TELEPHONE,
# endif
# ifdef USE_LOCALE_SYNTAX
LC_SYNTAX,
# endif
# ifdef USE_LOCALE_TOD
LC_TOD,
# endif
# ifdef LC_ALL
LC_ALL,
# endif
-1 /* Placeholder because C doesn't allow a
trailing comma, and it would get complicated
with all the #ifdef's */
};
/* The top-most real element is LC_ALL */
const char * const category_names[] = {
# ifdef USE_LOCALE_NUMERIC
"LC_NUMERIC",
# endif
# ifdef USE_LOCALE_CTYPE
"LC_CTYPE",
# endif
# ifdef USE_LOCALE_COLLATE
"LC_COLLATE",
# endif
# ifdef USE_LOCALE_TIME
"LC_TIME",
# endif
# ifdef USE_LOCALE_MESSAGES
"LC_MESSAGES",
# endif
# ifdef USE_LOCALE_MONETARY
"LC_MONETARY",
# endif
# ifdef USE_LOCALE_ADDRESS
"LC_ADDRESS",
# endif
# ifdef USE_LOCALE_IDENTIFICATION
"LC_IDENTIFICATION",
# endif
# ifdef USE_LOCALE_MEASUREMENT
"LC_MEASUREMENT",
# endif
# ifdef USE_LOCALE_PAPER
"LC_PAPER",
# endif
# ifdef USE_LOCALE_TELEPHONE
"LC_TELEPHONE",
# endif
# ifdef USE_LOCALE_SYNTAX
"LC_SYNTAX",
# endif
# ifdef USE_LOCALE_TOD
"LC_TOD",
# endif
# ifdef LC_ALL
"LC_ALL",
# endif
NULL /* Placeholder */
};
# ifdef LC_ALL
/* On systems with LC_ALL, it is kept in the highest index position. (-2
* to account for the final unused placeholder element.) */
# define NOMINAL_LC_ALL_INDEX (C_ARRAY_LENGTH(categories) - 2)
# else
/* On systems without LC_ALL, we pretend it is there, one beyond the real
* top element, hence in the unused placeholder element. */
# define NOMINAL_LC_ALL_INDEX (C_ARRAY_LENGTH(categories) - 1)
# endif
/* Pretending there is an LC_ALL element just above allows us to avoid most
* special cases. Most loops through these arrays in the code below are
* written like 'for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++)'. They will work
* on either type of system. But the code must be written to not access the
* element at 'LC_ALL_INDEX' except on platforms that have it. This can be
* checked for at compile time by using the #define LC_ALL_INDEX which is only
* defined if we do have LC_ALL. */
STATIC const char *
S_category_name(const int category)
{
unsigned int i;
#ifdef LC_ALL
if (category == LC_ALL) {
return "LC_ALL";
}
#endif
for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) {
if (category == categories[i]) {
return category_names[i];
}
}
{
const char suffix[] = " (unknown)";
int temp = category;
Size_t length = sizeof(suffix) + 1;
char * unknown;
dTHX;
if (temp < 0) {
length++;
temp = - temp;
}
/* Calculate the number of digits */
while (temp >= 10) {
temp /= 10;
length++;
}
Newx(unknown, length, char);
my_snprintf(unknown, length, "%d%s", category, suffix);
SAVEFREEPV(unknown);
return unknown;
}
}
/* Now create LC_foo_INDEX #defines for just those categories on this system */
# ifdef USE_LOCALE_NUMERIC
# define LC_NUMERIC_INDEX 0
# define _DUMMY_NUMERIC LC_NUMERIC_INDEX
# else
# define _DUMMY_NUMERIC -1
# endif
# ifdef USE_LOCALE_CTYPE
# define LC_CTYPE_INDEX _DUMMY_NUMERIC + 1
# define _DUMMY_CTYPE LC_CTYPE_INDEX
# else
# define _DUMMY_CTYPE _DUMMY_NUMERIC
# endif
# ifdef USE_LOCALE_COLLATE
# define LC_COLLATE_INDEX _DUMMY_CTYPE + 1
# define _DUMMY_COLLATE LC_COLLATE_INDEX
# else
# define _DUMMY_COLLATE _DUMMY_CTYPE
# endif
# ifdef USE_LOCALE_TIME
# define LC_TIME_INDEX _DUMMY_COLLATE + 1
# define _DUMMY_TIME LC_TIME_INDEX
# else
# define _DUMMY_TIME _DUMMY_COLLATE
# endif
# ifdef USE_LOCALE_MESSAGES
# define LC_MESSAGES_INDEX _DUMMY_TIME + 1
# define _DUMMY_MESSAGES LC_MESSAGES_INDEX
# else
# define _DUMMY_MESSAGES _DUMMY_TIME
# endif
# ifdef USE_LOCALE_MONETARY
# define LC_MONETARY_INDEX _DUMMY_MESSAGES + 1
# define _DUMMY_MONETARY LC_MONETARY_INDEX
# else
# define _DUMMY_MONETARY _DUMMY_MESSAGES
# endif
# ifdef USE_LOCALE_ADDRESS
# define LC_ADDRESS_INDEX _DUMMY_MONETARY + 1
# define _DUMMY_ADDRESS LC_ADDRESS_INDEX
# else
# define _DUMMY_ADDRESS _DUMMY_MONETARY
# endif
# ifdef USE_LOCALE_IDENTIFICATION
# define LC_IDENTIFICATION_INDEX _DUMMY_ADDRESS + 1
# define _DUMMY_IDENTIFICATION LC_IDENTIFICATION_INDEX
# else
# define _DUMMY_IDENTIFICATION _DUMMY_ADDRESS
# endif
# ifdef USE_LOCALE_MEASUREMENT
# define LC_MEASUREMENT_INDEX _DUMMY_IDENTIFICATION + 1
# define _DUMMY_MEASUREMENT LC_MEASUREMENT_INDEX
# else
# define _DUMMY_MEASUREMENT _DUMMY_IDENTIFICATION
# endif
# ifdef USE_LOCALE_PAPER
# define LC_PAPER_INDEX _DUMMY_MEASUREMENT + 1
# define _DUMMY_PAPER LC_PAPER_INDEX
# else
# define _DUMMY_PAPER _DUMMY_MEASUREMENT
# endif
# ifdef USE_LOCALE_TELEPHONE
# define LC_TELEPHONE_INDEX _DUMMY_PAPER + 1
# define _DUMMY_TELEPHONE LC_TELEPHONE_INDEX
# else
# define _DUMMY_TELEPHONE _DUMMY_PAPER
# endif
# ifdef USE_LOCALE_SYNTAX
# define LC_SYNTAX_INDEX _DUMMY_TELEPHONE + 1
# define _DUMMY_SYNTAX LC_SYNTAX_INDEX
# else
# define _DUMMY_SYNTAX _DUMMY_TELEPHONE
# endif
# ifdef USE_LOCALE_TOD
# define LC_TOD_INDEX _DUMMY_SYNTAX + 1
# define _DUMMY_TOD LC_TOD_INDEX
# else
# define _DUMMY_TOD _DUMMY_SYNTAX
# endif
# ifdef LC_ALL
# define LC_ALL_INDEX _DUMMY_TOD + 1
# endif
#endif /* ifdef USE_LOCALE */
/* Windows requres a customized base-level setlocale() */
#ifdef WIN32
# define my_setlocale(cat, locale) win32_setlocale(cat, locale)
#else
# define my_setlocale(cat, locale) setlocale(cat, locale)
#endif
#ifndef USE_POSIX_2008_LOCALE
/* "do_setlocale_c" is intended to be called when the category is a constant
* known at compile time; "do_setlocale_r", not known until run time */
# define do_setlocale_c(cat, locale) my_setlocale(cat, locale)
# define do_setlocale_r(cat, locale) my_setlocale(cat, locale)
# define FIX_GLIBC_LC_MESSAGES_BUG(i)
#else /* Below uses POSIX 2008 */
/* We emulate setlocale with our own function. LC_foo is not valid for the
* POSIX 2008 functions. Instead LC_foo_MASK is used, which we use an array
* lookup to convert to. At compile time we have defined LC_foo_INDEX as the
* proper offset into the array 'category_masks[]'. At runtime, we have to
* search through the array (as the actual numbers may not be small contiguous
* positive integers which would lend themselves to array lookup). */
# define do_setlocale_c(cat, locale) \
emulate_setlocale(cat, locale, cat ## _INDEX, TRUE)
# define do_setlocale_r(cat, locale) emulate_setlocale(cat, locale, 0, FALSE)
# if ! defined(__GLIBC__) || ! defined(USE_LOCALE_MESSAGES)
# define FIX_GLIBC_LC_MESSAGES_BUG(i)
# else /* Invalidate glibc cache of loaded translations, see [perl #134264] */
# include <libintl.h>
# define FIX_GLIBC_LC_MESSAGES_BUG(i) \
STMT_START { \
if ((i) == LC_MESSAGES_INDEX) { \
textdomain(textdomain(NULL)); \
} \
} STMT_END
# endif
/* A third array, parallel to the ones above to map from category to its
* equivalent mask */
const int category_masks[] = {
# ifdef USE_LOCALE_NUMERIC
LC_NUMERIC_MASK,
# endif
# ifdef USE_LOCALE_CTYPE
LC_CTYPE_MASK,
# endif
# ifdef USE_LOCALE_COLLATE
LC_COLLATE_MASK,
# endif
# ifdef USE_LOCALE_TIME
LC_TIME_MASK,
# endif
# ifdef USE_LOCALE_MESSAGES
LC_MESSAGES_MASK,
# endif
# ifdef USE_LOCALE_MONETARY
LC_MONETARY_MASK,
# endif
# ifdef USE_LOCALE_ADDRESS
LC_ADDRESS_MASK,
# endif
# ifdef USE_LOCALE_IDENTIFICATION
LC_IDENTIFICATION_MASK,
# endif
# ifdef USE_LOCALE_MEASUREMENT
LC_MEASUREMENT_MASK,
# endif
# ifdef USE_LOCALE_PAPER
LC_PAPER_MASK,
# endif
# ifdef USE_LOCALE_TELEPHONE
LC_TELEPHONE_MASK,
# endif
# ifdef USE_LOCALE_SYNTAX
LC_SYNTAX_MASK,
# endif
# ifdef USE_LOCALE_TOD
LC_TOD_MASK,
# endif
/* LC_ALL can't be turned off by a Configure
* option, and in Posix 2008, should always be
* here, so compile it in unconditionally.
* This could catch some glitches at compile
* time */
LC_ALL_MASK
};
STATIC const char *
S_emulate_setlocale(const int category,
const char * locale,
unsigned int index,
const bool is_index_valid
)
{
/* This function effectively performs a setlocale() on just the current
* thread; thus it is thread-safe. It does this by using the POSIX 2008
* locale functions to emulate the behavior of setlocale(). Similar to
* regular setlocale(), the return from this function points to memory that
* can be overwritten by other system calls, so needs to be copied
* immediately if you need to retain it. The difference here is that
* system calls besides another setlocale() can overwrite it.
*
* By doing this, most locale-sensitive functions become thread-safe. The
* exceptions are mostly those that return a pointer to static memory.
*
* This function takes the same parameters, 'category' and 'locale', that
* the regular setlocale() function does, but it also takes two additional
* ones. This is because the 2008 functions don't use a category; instead
* they use a corresponding mask. Because this function operates in both
* worlds, it may need one or the other or both. This function can
* calculate the mask from the input category, but to avoid this
* calculation, if the caller knows at compile time what the mask is, it
* can pass it, setting 'is_index_valid' to TRUE; otherwise the mask
* parameter is ignored.
*
* POSIX 2008, for some sick reason, chose not to provide a method to find
* the category name of a locale. Some vendors have created a
* querylocale() function to do just that. This function is a lot simpler
* to implement on systems that have this. Otherwise, we have to keep
* track of what the locale has been set to, so that we can return its
* name to emulate setlocale(). It's also possible for C code in some
* library to change the locale without us knowing it, though as of
* September 2017, there are no occurrences in CPAN of uselocale(). Some
* libraries do use setlocale(), but that changes the global locale, and
* threads using per-thread locales will just ignore those changes.
* Another problem is that without querylocale(), we have to guess at what
* was meant by setting a locale of "". We handle this by not actually
* ever setting to "" (unless querylocale exists), but to emulate what we
* think should happen for "".
*/
int mask;
locale_t old_obj;
locale_t new_obj;
dTHX;
# ifdef DEBUGGING
if (DEBUG_Lv_TEST || debug_initialization) {
PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale input=%d (%s), \"%s\", %d, %d\n", __FILE__, __LINE__, category, category_name(category), locale, index, is_index_valid);
}
# endif
/* If the input mask might be incorrect, calculate the correct one */
if (! is_index_valid) {
unsigned int i;
# ifdef DEBUGGING
if (DEBUG_Lv_TEST || debug_initialization) {
PerlIO_printf(Perl_debug_log, "%s:%d: finding index of category %d (%s)\n", __FILE__, __LINE__, category, category_name(category));
}
# endif
for (i = 0; i <= LC_ALL_INDEX; i++) {
if (category == categories[i]) {
index = i;
goto found_index;
}
}
/* Here, we don't know about this category, so can't handle it.
* Fallback to the early POSIX usages */
Perl_warner(aTHX_ packWARN(WARN_LOCALE),
"Unknown locale category %d; can't set it to %s\n",
category, locale);
return NULL;
found_index: ;
# ifdef DEBUGGING
if (DEBUG_Lv_TEST || debug_initialization) {
PerlIO_printf(Perl_debug_log, "%s:%d: index is %d for %s\n", __FILE__, __LINE__, index, category_name(category));
}
# endif
}
mask = category_masks[index];
# ifdef DEBUGGING
if (DEBUG_Lv_TEST || debug_initialization) {
PerlIO_printf(Perl_debug_log, "%s:%d: category name is %s; mask is 0x%x\n", __FILE__, __LINE__, category_names[index], mask);
}
# endif
/* If just querying what the existing locale is ... */
if (locale == NULL) {
locale_t cur_obj = uselocale((locale_t) 0);
# ifdef DEBUGGING
if (DEBUG_Lv_TEST || debug_initialization) {
PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale querying %p\n", __FILE__, __LINE__, cur_obj);
}
# endif
if (cur_obj == LC_GLOBAL_LOCALE) {
return my_setlocale(category, NULL);
}
# ifdef HAS_QUERYLOCALE
return (char *) querylocale(mask, cur_obj);
# else
/* If this assert fails, adjust the size of curlocales in intrpvar.h */
STATIC_ASSERT_STMT(C_ARRAY_LENGTH(PL_curlocales) > LC_ALL_INDEX);
# if defined(_NL_LOCALE_NAME) \
&& defined(DEBUGGING) \
&& ! defined(SETLOCALE_ACCEPTS_ANY_LOCALE_NAME)
/* On systems that accept any locale name, the real underlying locale
* is often returned by this internal function, so we can't use it */
{
/* Internal glibc for querylocale(), but doesn't handle
* empty-string ("") locale properly; who knows what other
* glitches. Check for it now, under debug. */
char * temp_name = nl_langinfo_l(_NL_LOCALE_NAME(category),
uselocale((locale_t) 0));
/*
PerlIO_printf(Perl_debug_log, "%s:%d: temp_name=%s\n", __FILE__, __LINE__, temp_name ? temp_name : "NULL");
PerlIO_printf(Perl_debug_log, "%s:%d: index=%d\n", __FILE__, __LINE__, index);
PerlIO_printf(Perl_debug_log, "%s:%d: PL_curlocales[index]=%s\n", __FILE__, __LINE__, PL_curlocales[index]);
*/
if (temp_name && PL_curlocales[index] && strNE(temp_name, "")) {
if ( strNE(PL_curlocales[index], temp_name)
&& ! ( isNAME_C_OR_POSIX(temp_name)
&& isNAME_C_OR_POSIX(PL_curlocales[index]))) {
# ifdef USE_C_BACKTRACE
dump_c_backtrace(Perl_debug_log, 20, 1);
# endif
Perl_croak(aTHX_ "panic: Mismatch between what Perl thinks %s is"
" (%s) and what internal glibc thinks"
" (%s)\n", category_names[index],
PL_curlocales[index], temp_name);
}
return temp_name;
}
}
# endif
/* Without querylocale(), we have to use our record-keeping we've
* done. */
if (category != LC_ALL) {
# ifdef DEBUGGING
if (DEBUG_Lv_TEST || debug_initialization) {
PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale returning %s\n", __FILE__, __LINE__, PL_curlocales[index]);
}
# endif
return PL_curlocales[index];
}
else { /* For LC_ALL */
unsigned int i;
Size_t names_len = 0;
char * all_string;
bool are_all_categories_the_same_locale = TRUE;
/* If we have a valid LC_ALL value, just return it */
if (PL_curlocales[LC_ALL_INDEX]) {
# ifdef DEBUGGING
if (DEBUG_Lv_TEST || debug_initialization) {
PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale returning %s\n", __FILE__, __LINE__, PL_curlocales[LC_ALL_INDEX]);
}
# endif
return PL_curlocales[LC_ALL_INDEX];
}
/* Otherwise, we need to construct a string of name=value pairs.
* We use the glibc syntax, like
* LC_NUMERIC=C;LC_TIME=en_US.UTF-8;...
* First calculate the needed size. Along the way, check if all
* the locale names are the same */
for (i = 0; i < LC_ALL_INDEX; i++) {
# ifdef DEBUGGING
if (DEBUG_Lv_TEST || debug_initialization) {
PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale i=%d, name=%s, locale=%s\n", __FILE__, __LINE__, i, category_names[i], PL_curlocales[i]);
}
# endif
names_len += strlen(category_names[i])
+ 1 /* '=' */
+ strlen(PL_curlocales[i])
+ 1; /* ';' */
if (i > 0 && strNE(PL_curlocales[i], PL_curlocales[i-1])) {
are_all_categories_the_same_locale = FALSE;
}
}
/* If they are the same, we don't actually have to construct the
* string; we just make the entry in LC_ALL_INDEX valid, and be
* that single name */
if (are_all_categories_the_same_locale) {
PL_curlocales[LC_ALL_INDEX] = savepv(PL_curlocales[0]);
return PL_curlocales[LC_ALL_INDEX];
}
names_len++; /* Trailing '\0' */
SAVEFREEPV(Newx(all_string, names_len, char));
*all_string = '\0';
/* Then fill in the string */
for (i = 0; i < LC_ALL_INDEX; i++) {
# ifdef DEBUGGING
if (DEBUG_Lv_TEST || debug_initialization) {
PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale i=%d, name=%s, locale=%s\n", __FILE__, __LINE__, i, category_names[i], PL_curlocales[i]);
}
# endif
my_strlcat(all_string, category_names[i], names_len);
my_strlcat(all_string, "=", names_len);
my_strlcat(all_string, PL_curlocales[i], names_len);
my_strlcat(all_string, ";", names_len);
}
# ifdef DEBUGGING
if (DEBUG_L_TEST || debug_initialization) {
PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale returning %s\n", __FILE__, __LINE__, all_string);
}
#endif
return all_string;
}
# ifdef EINVAL
SETERRNO(EINVAL, LIB_INVARG);
# endif
return NULL;
# endif
} /* End of this being setlocale(LC_foo, NULL) */
/* Here, we are switching locales. */
# ifndef HAS_QUERYLOCALE
if (strEQ(locale, "")) {
/* For non-querylocale() systems, we do the setting of "" ourselves to
* be sure that we really know what's going on. We follow the Linux
* documented behavior (but if that differs from the actual behavior,
* this won't work exactly as the OS implements). We go out and
* examine the environment based on our understanding of how the system
* works, and use that to figure things out */
const char * const lc_all = PerlEnv_getenv("LC_ALL");
/* Use any "LC_ALL" environment variable, as it overrides everything
* else. */
if (lc_all && strNE(lc_all, "")) {
locale = lc_all;
}
else {
/* Otherwise, we need to dig deeper. Unless overridden, the
* default is the LANG environment variable; if it doesn't exist,
* then "C" */
const char * default_name;
default_name = PerlEnv_getenv("LANG");
if (! default_name || strEQ(default_name, "")) {
default_name = "C";
}
if (category != LC_ALL) {
const char * const name = PerlEnv_getenv(category_names[index]);
/* Here we are setting a single category. Assume will have the
* default name */
locale = default_name;
/* But then look for an overriding environment variable */
if (name && strNE(name, "")) {
locale = name;
}
}
else {
bool did_override = FALSE;
unsigned int i;
/* Here, we are getting LC_ALL. Any categories that don't have
* a corresponding environment variable set should be set to
* LANG, or to "C" if there is no LANG. If no individual
* categories differ from this, we can just set LC_ALL. This
* is buggy on systems that have extra categories that we don't
* know about. If there is an environment variable that sets
* that category, we won't know to look for it, and so our use
* of LANG or "C" improperly overrides it. On the other hand,
* if we don't do what is done here, and there is no
* environment variable, the category's locale should be set to
* LANG or "C". So there is no good solution. khw thinks the
* best is to look at systems to see what categories they have,
* and include them, and then to assume that we know the
* complete set */
for (i = 0; i < LC_ALL_INDEX; i++) {
const char * const env_override
= PerlEnv_getenv(category_names[i]);
const char * this_locale = ( env_override
&& strNE(env_override, ""))
? env_override
: default_name;
if (! emulate_setlocale(categories[i], this_locale, i, TRUE))
{
return NULL;
}
if (strNE(this_locale, default_name)) {
did_override = TRUE;
}
}
/* If all the categories are the same, we can set LC_ALL to
* that */
if (! did_override) {
locale = default_name;
}
else {
/* Here, LC_ALL is no longer valid, as some individual
* categories don't match it. We call ourselves
* recursively, as that will execute the code that
* generates the proper locale string for this situation.
* We don't do the remainder of this function, as that is
* to update our records, and we've just done that for the
* individual categories in the loop above, and doing so
* would cause LC_ALL to be done as well */
return emulate_setlocale(LC_ALL, NULL, LC_ALL_INDEX, TRUE);
}
}
}
} /* End of this being setlocale(LC_foo, "") */
else if (strchr(locale, ';')) {
/* LC_ALL may actually incude a conglomeration of various categories.
* Without querylocale, this code uses the glibc (as of this writing)
* syntax for representing that, but that is not a stable API, and
* other platforms do it differently, so we have to handle all cases
* ourselves */
unsigned int i;
const char * s = locale;
const char * e = locale + strlen(locale);
const char * p = s;
const char * category_end;
const char * name_start;
const char * name_end;
/* If the string that gives what to set doesn't include all categories,
* the omitted ones get set to "C". To get this behavior, first set
* all the individual categories to "C", and override the furnished
* ones below */
for (i = 0; i < LC_ALL_INDEX; i++) {
if (! emulate_setlocale(categories[i], "C", i, TRUE)) {
return NULL;
}
}
while (s < e) {
/* Parse through the category */
while (isWORDCHAR(*p)) {
p++;
}
category_end = p;
if (*p++ != '=') {
Perl_croak(aTHX_
"panic: %s: %d: Unexpected character in locale name '%02X",
__FILE__, __LINE__, *(p-1));
}
/* Parse through the locale name */
name_start = p;
while (p < e && *p != ';') {
if (! isGRAPH(*p)) {
Perl_croak(aTHX_
"panic: %s: %d: Unexpected character in locale name '%02X",
__FILE__, __LINE__, *(p-1));
}
p++;
}
name_end = p;
/* Space past the semi-colon */
if (p < e) {
p++;
}
/* Find the index of the category name in our lists */
for (i = 0; i < LC_ALL_INDEX; i++) {
char * individ_locale;
/* Keep going if this isn't the index. The strnNE() avoids a
* Perl_form(), but would fail if ever a category name could be
* a substring of another one, like if there were a
* "LC_TIME_DATE" */
if strnNE(s, category_names[i], category_end - s) {
continue;
}
/* If this index is for the single category we're changing, we
* have found the locale to set it to. */
if (category == categories[i]) {
locale = Perl_form(aTHX_ "%.*s",
(int) (name_end - name_start),
name_start);
goto ready_to_set;
}
assert(category == LC_ALL);
individ_locale = Perl_form(aTHX_ "%.*s",
(int) (name_end - name_start), name_start);
if (! emulate_setlocale(categories[i], individ_locale, i, TRUE))
{
return NULL;
}
}
s = p;
}
/* Here we have set all the individual categories by recursive calls.
* These collectively should have fixed up LC_ALL, so can just query
* what that now is */
assert(category == LC_ALL);
return do_setlocale_c(LC_ALL, NULL);
} /* End of this being setlocale(LC_ALL,
"LC_CTYPE=foo;LC_NUMERIC=bar;...") */
ready_to_set: ;
/* Here at the end of having to deal with the absence of querylocale().
* Some cases have already been fully handled by recursive calls to this
* function. But at this point, we haven't dealt with those, but are now
* prepared to, knowing what the locale name to set this category to is.
* This would have come for free if this system had had querylocale() */
# endif /* end of ! querylocale */