Skip to content

Commit af51989

Browse files
committed
ICU 74.1 Patches (cont'd)
1 parent 83b326a commit af51989

20 files changed

+98
-86
lines changed

.devel/sphinx/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@ def get_package_version():
1515
copyright_year = "2013–2023"
1616
html_baseurl = "https://stringi.gagolewski.com/"
1717
html_logo = "https://www.gagolewski.com/_static/img/stringi.png"
18+
html_favicon = "https://www.gagolewski.com/_static/img/stringi.png"
1819
github_url = "https://github.com/gagolews/stringi"
1920
github_star_repo = "gagolews/stringi"
2021
analytics_id = None # don't use it! this site does not track its users
2122
author = "Marek Gagolewski"
2223
copyright = f"{copyright_year}"
2324
html_title = f"R Package {pkg_title}"
2425
html_short_title = f"{pkg_title}"
25-
html_favicon = "_static/favicon.png"
2626

2727
html_version_text = f'\
2828
R Package<br />\

.devel/sphinx/news.md

+10-5
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@
55

66
* [GENERAL] ICU bundle updated to version 74.1 (Unicode 15.1, CLDR 44).
77

8-
* [BUILD TIME] As per the suggestion of Prof. Brian Ripley, `icudt74l` (ICU data)
9-
is now included in the source tarball (compressed with xz to save space).
10-
This allows for building *stringi* on systems with no internet access.
8+
* [BUILD TIME] As per the suggestion of Prof. Brian Ripley, `icudt74l`
9+
(ICU data -- little endian) is now included in the source tarball (compressed
10+
with xz to save space). This allows for building *stringi* on systems with
11+
no internet access.
1112

1213
* [BUILD TIME] Support for Solaris has now been dropped. The package is no
13-
longer shipped with the very outdated ICU55 bundle. A compiler supporting
14-
at least C++11 is now required. ICU >= 61 is now required.
14+
longer shipped with the very outdated ICU55 bundle. A compiler supporting
15+
at least C++11 is now required. ICU >= 61 is now required.
1516

1617

1718

@@ -24,6 +25,10 @@
2425

2526
* [NEW FEATURE] TODO... #476 U_USING_DEFAULT_ERROR on unknown locales
2627

28+
* TODO.... ...#490?
29+
30+
* TODO.... ...#491?
31+
2732

2833

2934
## 1.7.12 (2023-01-09)

.devel/sphinx/rapi/stri_datetime_add.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,15 @@ print(x)
6868
```
6969

7070
```
71-
## [1] "2024-01-05 11:12:24 AEDT"
71+
## [1] "2024-01-05 14:22:31 AEDT"
7272
```
7373

7474
```r
7575
stri_datetime_add(x, -2, units='months')
7676
```
7777

7878
```
79-
## [1] "2023-11-05 11:12:24 AEDT"
79+
## [1] "2023-11-05 14:22:31 AEDT"
8080
```
8181

8282
```r

.devel/sphinx/rapi/stri_datetime_fields.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,9 @@ stri_datetime_fields(stri_datetime_now())
7777

7878
```
7979
## Year Month Day Hour Minute Second Millisecond WeekOfYear WeekOfMonth
80-
## 1 2023 11 5 11 12 24 801 45 1
80+
## 1 2023 11 5 14 22 31 945 45 1
8181
## DayOfYear DayOfWeek Hour12 AmPm Era
82-
## 1 309 1 11 1 2
82+
## 1 309 1 2 2 2
8383
```
8484

8585
```r
@@ -88,9 +88,9 @@ stri_datetime_fields(stri_datetime_now(), locale='@calendar=hebrew')
8888

8989
```
9090
## Year Month Day Hour Minute Second Millisecond WeekOfYear WeekOfMonth
91-
## 1 5784 2 21 11 12 24 805 8 3
91+
## 1 5784 2 21 14 22 31 950 8 3
9292
## DayOfYear DayOfWeek Hour12 AmPm Era
93-
## 1 51 1 11 1 1
93+
## 1 51 1 2 2 1
9494
```
9595

9696
```r

.devel/sphinx/rapi/stri_datetime_format.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -189,15 +189,15 @@ stri_datetime_parse(x, 'yyyy-MM-dd')
189189
```
190190

191191
```
192-
## [1] "2015-02-28 11:12:24 AEDT" NA
192+
## [1] "2015-02-28 14:22:32 AEDT" NA
193193
```
194194

195195
```r
196196
stri_datetime_parse(x, 'yyyy-MM-dd', lenient=TRUE)
197197
```
198198

199199
```
200-
## [1] "2015-02-28 11:12:24 AEDT" "2015-03-01 11:12:24 AEDT"
200+
## [1] "2015-02-28 14:22:32 AEDT" "2015-03-01 14:22:32 AEDT"
201201
```
202202

203203
```r
@@ -213,13 +213,13 @@ stri_datetime_parse('19 lipca 2015', 'date_long', locale='pl_PL')
213213
```
214214

215215
```
216-
## [1] "2015-07-19 11:12:24 AEST"
216+
## [1] "2015-07-19 14:22:32 AEST"
217217
```
218218

219219
```r
220220
stri_datetime_format(stri_datetime_now(), 'datetime_relative_medium')
221221
```
222222

223223
```
224-
## [1] "today, 11:12:24 am"
224+
## [1] "today, 2:22:32 pm"
225225
```

.devel/sphinx/rapi/stri_locale_set.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ oldloc <- stri_locale_set('pt_BR')
5353
```
5454

5555
```
56-
## You are now working with stringi_1.7.9002 (pt_BR.UTF-8; ICU4C 74.1 [bundle]; Unicode 15.1)
56+
## You are now working with stringi_1.7.9003 (pt_BR.UTF-8; ICU4C 74.1 [bundle]; Unicode 15.1)
5757
```
5858

5959
```r
@@ -65,7 +65,7 @@ stri_locale_set(oldloc) # restore the previous default locale
6565
```
6666

6767
```
68-
## You are now working with stringi_1.7.9002 (en_AU.UTF-8; ICU4C 74.1 [bundle]; Unicode 15.1)
68+
## You are now working with stringi_1.7.9003 (en_AU.UTF-8; ICU4C 74.1 [bundle]; Unicode 15.1)
6969
```
7070

7171
```r

.devel/sphinx/rapi/stri_sprintf.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ stri_sprintf("UNIX time %1$f is %1$s.", Sys.time())
188188
```
189189

190190
```
191-
## [1] "UNIX time 1699143154.428132 is 2023-11-05 11:12:34.428132."
191+
## [1] "UNIX time 1699154561.661119 is 2023-11-05 14:22:41.661119."
192192
```
193193

194194
```r
@@ -213,7 +213,7 @@ stri_sprintf("%1$s is %1$f UNIX time.", Sys.time()) # re-coercion needed
213213
```
214214

215215
```
216-
## [1] "2023-11-05 11:12:34.429878 is 1699143154.429878 UNIX time."
216+
## [1] "2023-11-05 14:22:41.662826 is 1699154561.662826 UNIX time."
217217
```
218218

219219
```r

.devel/tinytest/test-sprintf.R

+11-3
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,15 @@ expect_identical(stri_sprintf("%0000000000000000001$#0+ *0000002$.*003$e", 1.234
2222

2323
f <- c("%10.3f", "%010.3f", "%+10.3f", "%- 10.3f")
2424
x <- c(-Inf, -0, 0, Inf, NaN, NA_real_)
25-
expect_identical(outer(f, x, stri_sprintf, na_string="NA"), outer(f, x, sprintf))
25+
expect_identical(
26+
as.character(outer(f, x, stri_sprintf, na_string="NA")),
27+
c(" -Inf", " -Inf", " -Inf", "-Inf ",
28+
" -0.000", "-00000.000", " -0.000", "-0.000 ",
29+
" 0.000", "000000.000", " +0.000", " 0.000 ",
30+
" Inf", " Inf", " +Inf", " Inf ",
31+
" NaN", " NaN", " NaN", " NaN ",
32+
" NA", " NA", " NA", " NA ")
33+
)
2634

2735
expect_identical(stri_sprintf("% .0f", x, na_string=NA, inf_string=NA, nan_string=NA),
2836
c(NA, "-0", " 0", NA, NA, NA))
@@ -200,7 +208,7 @@ expect_identical(stri_sprintf("%- 5d", c(-123, 123, 0)), sprintf("%- 5d", c(-123
200208
expect_identical(stri_sprintf("%-+5d", c(-123, 123, 0)), sprintf("%-+5d", c(-123, 123, 0)))
201209
expect_identical(stri_sprintf("%-0+5d", c(-123, 123, 0)), sprintf("%-0+5d", c(-123, 123, 0)))
202210
expect_identical(stri_sprintf("%-0 5d", c(-123, 123, 0)), sprintf("%-0 5d", c(-123, 123, 0)))
203-
expect_identical(stri_sprintf("%08s", "abc"), sprintf("%08s", "abc"))
211+
#expect_identical(stri_sprintf("%08s", "abc"), sprintf("%08s", "abc")) # undefined behaviour for strings according to man 3 sprintf
204212
expect_identical(stri_sprintf("%-8s", "abc"), sprintf("%-8s", "abc"))
205213
expect_identical(stri_sprintf("%+8s", "abc"), sprintf("%+8s", "abc"))
206214
expect_identical(stri_sprintf("%1$s %s %2$s %s", 1, 2), sprintf("%1$s %s %2$s %s", 1, 2))
@@ -234,7 +242,7 @@ expect_identical(stri_sprintf("%G", 1e-6 * pi), sprintf("%G", 1e-6 * pi))
234242
expect_identical(stri_sprintf("%1.f", 101), sprintf("%1.f", 101))
235243
expect_identical(stri_sprintf("%1$d %1$x %1$X", 0:15), sprintf("%1$d %1$x %1$X", 0:15))
236244
expect_identical(stri_sprintf("min 10-char string '%10s'", c("a", "ABC", "and an even longer one")), sprintf("min 10-char string '%10s'", c("a", "ABC", "and an even longer one")))
237-
expect_identical(stri_sprintf("%09s", month.name), sprintf("%09s", month.name))
245+
expect_identical(stri_sprintf("% 9s", month.name), sprintf("% 9s", month.name))
238246
expect_identical(stri_sprintf(paste0("e with %2d digits = %.", 1:18, "g"), 1:18, exp(1)), sprintf(paste0("e with %2d digits = %.", 1:18, "g"), 1:18, exp(1)))
239247
expect_identical(stri_sprintf("second %2$1.0f, first %1$5.2f, third %3$1.0f", pi, 2, 3), sprintf("second %2$1.0f, first %1$5.2f, third %3$1.0f", pi, 2, 3))
240248
expect_identical(stri_sprintf("precision %.*f, width '%*.3f'", 3, pi, 8, pi), sprintf("precision %.*f, width '%*.3f'", 3, pi, 8, pi))

.devel/tinytest/test-uloc.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ library("stringi")
44

55
expect_true(length(stri_locale_list()) > 0)
66

7-
expect_warning(stri_locale_set("XX_YY"))
7+
stri_locale_set("XX_YY")
88
suppressMessages(expect_true(substr(stri_locale_set("pl_PL"), 1, 5) == "xx_YY"))
99
suppressMessages(expect_true(substr(stri_locale_set("pl_PL"), 1, 5) == "pl_PL"))
1010

.github/workflows/r-icu-bundle.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ jobs:
1515
run: |
1616
sudo apt-get update -qq
1717
# https://cran.r-project.org/bin/linux/ubuntu/
18+
sudo echo "pl_PL.UTF-8" >> /etc/locale.gen
19+
sudo locale-gen
1820
sudo apt install --no-install-recommends software-properties-common dirmngr
1921
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
2022
sudo add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/"
@@ -26,6 +28,6 @@ jobs:
2628
sudo R CMD INSTALL . --configure-args='--disable-pkg-config'
2729
- name: Test stringi
2830
run: |
29-
sudo Rscript -e 'source(".devel/tinytest.R")'
31+
Rscript -e 'source(".devel/tinytest.R")'
3032
LC_ALL="pl_PL.UTF-8" Rscript -e 'source(".devel/tinytest.R")'
3133
LC_ALL="C" Rscript -e 'source(".devel/tinytest.R")'

.github/workflows/r-icu-system.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,15 @@ jobs:
2525
run: |
2626
sudo apt-get update
2727
sudo apt-get -y upgrade
28+
sudo echo "pl_PL.UTF-8" >> /etc/locale.gen
29+
sudo locale-gen
2830
sudo apt-get -y install libcurl4-openssl-dev r-base-dev devscripts libicu-dev language-pack-pl
2931
sudo Rscript -e "install.packages(c('tinytest', 'Rcpp'))"
3032
- name: Install stringi
3133
run: |
3234
sudo R CMD INSTALL .
3335
- name: Test stringi
3436
run: |
35-
sudo Rscript -e 'source(".devel/tinytest.R")'
37+
Rscript -e 'source(".devel/tinytest.R")'
3638
LC_ALL="pl_PL.UTF-8" Rscript -e 'source(".devel/tinytest.R")'
3739
LC_ALL="C" Rscript -e 'source(".devel/tinytest.R")'

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Package: stringi
2-
Version: 1.7.9002
2+
Version: 1.7.9003
33
Date: 2023-11-05
44
Title: Fast and Portable Character String Processing Facilities
55
Description: A collection of character string/text/natural language

NEWS

+16-11
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,35 @@
55

66
* [GENERAL] ICU bundle updated to version 74.1 (Unicode 15.1, CLDR 44).
77

8-
* [BUILD TIME] As per the suggestion of Prof. Brian Ripley, `icudt74l` (ICU data)
9-
is now included in the source tarball (compressed with xz to save space).
10-
This allows for building *stringi* on systems with no internet access.
8+
* [BUILD TIME] As per the suggestion of Prof. Brian Ripley, `icudt74l`
9+
(ICU data -- little endian) is now included in the source tarball (compressed
10+
with xz to save space). This allows for building *stringi* on systems with
11+
no internet access.
1112

1213
* [BUILD TIME] Support for Solaris has now been dropped. The package is no
13-
longer shipped with the very outdated ICU55 bundle. A compiler supporting
14-
at least C++11 is now required. ICU >= 61 is now required.
14+
longer shipped with the very outdated ICU55 bundle. A compiler supporting
15+
at least C++11 is now required. ICU >= 61 is now required.
1516

1617

1718

1819
* [NEW FEATURE] TODO.... #469: `stri_datetime_parse` .. new argument -
19-
`default_time`
20-
a Calendar set on input to the date and time to be used for missing values in the date/time string being parsed
20+
`default_time` a Calendar set on input to the date and time to be used
21+
for missing values in the date/time string being parsed
2122

22-
* [BUGFIX] TODO.... #469: `stri_datetime_parse` did not reset the `Calendar` object
23-
when parsing multiple dates.
23+
* [BUGFIX] TODO.... #469: `stri_datetime_parse` did not reset the `Calendar`
24+
object when parsing multiple dates.
2425

2526
* [NEW FEATURE] TODO... #476 U_USING_DEFAULT_ERROR on unknown locales
2627

28+
* TODO.... ...#490?
29+
30+
* TODO.... ...#491?
31+
2732

2833

2934
## 1.7.12 (2023-01-09)
3035

31-
* [BUGFIX] Fixed some potential problems reported by `rchk`.
36+
* [BUGFIX] Fixed a few issues reported by `rchk`.
3237

3338
* [NOTE] [BACKWARD INCOMPATIBLE CHANGE IF ICU >= 72]
3439
If building against ICU >= 72, note a backward incompatible change:
@@ -42,7 +47,7 @@
4247
the *Journal of Statistical Software*;
4348
see <https://doi.org/10.18637/jss.v103.i02>.
4449

45-
* [BUGFIX] #473, #397: Fixed buffer overflow in `stri_dup`.
50+
* [BUGFIX] #473, #397: Fixed buffer overflow in `stri_dup`; Also,
4651
`stri_dup`, `stri_paste`, ... fail more graciously on attempts to
4752
generate strings of length >= 2^31 each.
4853

R/ICU_settings.R

-5
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,6 @@ stri_info <- function(short = FALSE)
8282
"Consider switching to UTF-8."))
8383
}
8484

85-
if (!(locale %in% loclist))
86-
warning(stri_paste("Your current locale is not on the list of ",
87-
"available locales; see stri_locale_list(). ",
88-
"Some functions may not work properly. "))
89-
9085
if (!short)
9186
return(info) else {
9287
return(sprintf("stringi_%s (%s.%s; ICU4C %s [%s%s]; Unicode %s)", as.character(packageVersion("stringi")),

TODO

+3-4
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,9 @@ Contributions are welcome!
1010

1111
* [NEW FEATURE] TODO... #445 stri_dup: add sep argument
1212

13-
* [BUILD TIME] TODO..
14-
Tomas Kalibera suggests to use the Rtools version of ICU for Windows:
15-
"Rtools43 has icu 72.1, Rtools42 has 71.1."
16-
see also some notes in configure.ac
13+
* [BUILD TIME] TODO.. Tomas Kalibera suggests to use the Rtools version of ICU
14+
for Windows: "Rtools43 has icu 72.1, Rtools42 has 71.1."; see also some notes
15+
in configure.ac
1716

1817
* [LATER] compress, move, remove src/icu55*, src/icu61*, src/icu69*;
1918
the *.dat files may still be downloaded by old R/install.R files

src/icu74/common/unistr.cpp

+1-6
Original file line numberDiff line numberDiff line change
@@ -2019,12 +2019,7 @@ The vector deleting destructor is already a part of UObject,
20192019
but defining it here makes sure that it is included with this object file.
20202020
This makes sure that static library dependencies are kept to a minimum.
20212021
*/
2022-
#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 1100
2023-
#pragma GCC diagnostic push
2024-
#pragma GCC diagnostic ignored "-Wunused-function"
2025-
static void uprv_UnicodeStringDummy() {
2022+
void uprv_UnicodeStringDummy(void) {
20262023
delete [] (new UnicodeString[2]);
20272024
}
2028-
#pragma GCC diagnostic pop
2029-
#endif
20302025
#endif

src/icu74/i18n/formattedvalue.cpp

+20-20
Original file line numberDiff line numberDiff line change
@@ -193,26 +193,26 @@ ucfpos_close(UConstrainedFieldPosition* ptr) {
193193
}
194194

195195

196-
U_CAPI const char16_t* U_EXPORT2
197-
ufmtval_getString(
198-
const UFormattedValue* ufmtval,
199-
int32_t* pLength,
200-
UErrorCode* ec) {
201-
const auto* impl = UFormattedValueApiHelper::validate(ufmtval, *ec);
202-
if (U_FAILURE(*ec)) {
203-
return nullptr;
204-
}
205-
UnicodeString readOnlyAlias = impl->fFormattedValue->toTempString(*ec);
206-
if (U_FAILURE(*ec)) {
207-
return nullptr;
208-
}
209-
if (pLength != nullptr) {
210-
*pLength = readOnlyAlias.length();
211-
}
212-
// Note: this line triggers -Wreturn-local-addr, but it is safe because toTempString is
213-
// defined to return memory owned by the ufmtval argument.
214-
return readOnlyAlias.getBuffer();
215-
}
196+
// U_CAPI const char16_t* U_EXPORT2
197+
// ufmtval_getString(
198+
// const UFormattedValue* ufmtval,
199+
// int32_t* pLength,
200+
// UErrorCode* ec) {
201+
// const auto* impl = UFormattedValueApiHelper::validate(ufmtval, *ec);
202+
// if (U_FAILURE(*ec)) {
203+
// return nullptr;
204+
// }
205+
// UnicodeString readOnlyAlias = impl->fFormattedValue->toTempString(*ec);
206+
// if (U_FAILURE(*ec)) {
207+
// return nullptr;
208+
// }
209+
// if (pLength != nullptr) {
210+
// *pLength = readOnlyAlias.length();
211+
// }
212+
// // Note: this line triggers -Wreturn-local-addr, but it is safe because toTempString is
213+
// // defined to return memory owned by the ufmtval argument.
214+
// return readOnlyAlias.getBuffer();
215+
// }
216216

217217

218218
U_CAPI UBool U_EXPORT2

0 commit comments

Comments
 (0)