mirror of
https://github.com/boostorg/locale.git
synced 2025-05-09 15:14:03 +00:00
Implement support for uint64_t values in ICU backend
ICU doesn't support uint64_t directly but provides access to formatting and parsing of decimal number strings. Use Boost.Charconv to interface with that. Fixes #235
This commit is contained in:
parent
211734c2c8
commit
42e65d0d3d
@ -1,6 +1,6 @@
|
||||
# Copyright 2003 John Maddock
|
||||
# Copyright 2010 Artyom Beilis
|
||||
# Copyright 2021 - 2022 Alexander Grund
|
||||
# Copyright 2021 - 2024 Alexander Grund
|
||||
#
|
||||
# Distributed under the Boost Software License, Version 1.0.
|
||||
# https://www.boost.org/LICENSE_1_0.txt.
|
||||
|
@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
|
||||
// Copyright (c) 2021-2023 Alexander Grund
|
||||
// Copyright (c) 2021-2024 Alexander Grund
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// https://www.boost.org/LICENSE_1_0.txt
|
||||
@ -13,8 +13,12 @@
|
||||
#include "icu_util.hpp"
|
||||
#include "time_zone.hpp"
|
||||
#include "uconv.hpp"
|
||||
#include <boost/assert.hpp>
|
||||
#include <boost/charconv/from_chars.hpp>
|
||||
#include <boost/charconv/to_chars.hpp>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#ifdef BOOST_MSVC
|
||||
# pragma warning(push)
|
||||
# pragma warning(disable : 4251) // "identifier" : class "type" needs to have dll-interface...
|
||||
@ -62,35 +66,69 @@ namespace boost { namespace locale { namespace impl_icu {
|
||||
string_type format(int64_t value, size_t& code_points) const override { return do_format(value, code_points); }
|
||||
string_type format(int32_t value, size_t& code_points) const override { return do_format(value, code_points); }
|
||||
size_t parse(const string_type& str, double& value) const override { return do_parse(str, value); }
|
||||
size_t parse(const string_type& str, uint64_t& value) const override { return do_parse(str, value); }
|
||||
size_t parse(const string_type& str, int64_t& value) const override { return do_parse(str, value); }
|
||||
size_t parse(const string_type& str, int32_t& value) const override { return do_parse(str, value); }
|
||||
|
||||
string_type format(const uint64_t value, size_t& code_points) const override
|
||||
{
|
||||
// ICU only supports int64_t as the largest integer type
|
||||
if(value <= static_cast<uint64_t>(std::numeric_limits<int64_t>::max()))
|
||||
return format(static_cast<int64_t>(value), code_points);
|
||||
|
||||
// Fallback to using a StringPiece (decimal number) as input
|
||||
char buffer[std::numeric_limits<uint64_t>::digits10 + 2];
|
||||
auto res = boost::charconv::to_chars(buffer, std::end(buffer), value);
|
||||
BOOST_ASSERT(res);
|
||||
*res.ptr = '\0'; // ICU expects a NULL-terminated string even for the StringPiece
|
||||
icu::UnicodeString tmp;
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
icu_fmt_.format(icu::StringPiece(buffer, res.ptr - buffer), tmp, nullptr, err);
|
||||
check_and_throw_icu_error(err);
|
||||
code_points = tmp.countChar32();
|
||||
return cvt_.std(tmp);
|
||||
}
|
||||
|
||||
private:
|
||||
bool get_value(double& v, icu::Formattable& fmt) const
|
||||
{
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
v = fmt.getDouble(err);
|
||||
if(U_FAILURE(err))
|
||||
return false;
|
||||
return true;
|
||||
return U_SUCCESS(err);
|
||||
}
|
||||
|
||||
bool get_value(int64_t& v, icu::Formattable& fmt) const
|
||||
{
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
v = fmt.getInt64(err);
|
||||
return U_SUCCESS(err);
|
||||
}
|
||||
|
||||
bool get_value(uint64_t& v, icu::Formattable& fmt) const
|
||||
{
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
// ICU only supports int64_t as the largest integer type
|
||||
const int64_t tmp = fmt.getInt64(err);
|
||||
if(U_SUCCESS(err)) {
|
||||
if(tmp < 0)
|
||||
return false;
|
||||
v = static_cast<uint64_t>(tmp);
|
||||
return true;
|
||||
}
|
||||
// Get value as a decimal number and parse that
|
||||
err = U_ZERO_ERROR;
|
||||
const auto decimals = fmt.getDecimalNumber(err);
|
||||
if(U_FAILURE(err))
|
||||
return false;
|
||||
return true;
|
||||
return false; // Not a number
|
||||
const auto res = boost::charconv::from_chars({decimals.data(), static_cast<size_t>(decimals.length())}, v);
|
||||
return static_cast<bool>(res);
|
||||
}
|
||||
|
||||
bool get_value(int32_t& v, icu::Formattable& fmt) const
|
||||
{
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
v = fmt.getLong(err);
|
||||
if(U_FAILURE(err))
|
||||
return false;
|
||||
return true;
|
||||
return U_SUCCESS(err);
|
||||
}
|
||||
|
||||
template<typename ValueType>
|
||||
@ -114,14 +152,11 @@ namespace boost { namespace locale { namespace impl_icu {
|
||||
icu_fmt_.setParseIntegerOnly(std::is_integral<ValueType>::value && isNumberOnly_);
|
||||
icu_fmt_.parse(tmp, val, pp);
|
||||
|
||||
ValueType tmp_v;
|
||||
|
||||
if(pp.getIndex() == 0 || !get_value(tmp_v, val))
|
||||
if(pp.getIndex() == 0 || !get_value(v, val))
|
||||
return 0;
|
||||
size_t cut = cvt_.cut(tmp, str.data(), str.data() + str.size(), pp.getIndex());
|
||||
if(cut == 0)
|
||||
return 0;
|
||||
v = tmp_v;
|
||||
return cut;
|
||||
}
|
||||
|
||||
@ -136,11 +171,11 @@ namespace boost { namespace locale { namespace impl_icu {
|
||||
typedef std::basic_string<CharType> string_type;
|
||||
|
||||
string_type format(double value, size_t& code_points) const override { return do_format(value, code_points); }
|
||||
string_type format(uint64_t value, size_t& code_points) const override { return do_format(value, code_points); }
|
||||
string_type format(int64_t value, size_t& code_points) const override { return do_format(value, code_points); }
|
||||
|
||||
string_type format(int32_t value, size_t& code_points) const override { return do_format(value, code_points); }
|
||||
|
||||
size_t parse(const string_type& str, double& value) const override { return do_parse(str, value); }
|
||||
size_t parse(const string_type& str, uint64_t& value) const override { return do_parse(str, value); }
|
||||
size_t parse(const string_type& str, int64_t& value) const override { return do_parse(str, value); }
|
||||
size_t parse(const string_type& str, int32_t& value) const override { return do_parse(str, value); }
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
|
||||
// Copyright (c) 2024 Alexander Grund
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// https://www.boost.org/LICENSE_1_0.txt
|
||||
@ -31,6 +32,8 @@ namespace boost { namespace locale { namespace impl_icu {
|
||||
/// Format the value and return the number of Unicode code points
|
||||
virtual string_type format(double value, size_t& code_points) const = 0;
|
||||
/// Format the value and return the number of Unicode code points
|
||||
virtual string_type format(uint64_t value, size_t& code_points) const = 0;
|
||||
/// Format the value and return the number of Unicode code points
|
||||
virtual string_type format(int64_t value, size_t& code_points) const = 0;
|
||||
/// Format the value and return the number of Unicode code points
|
||||
virtual string_type format(int32_t value, size_t& code_points) const = 0;
|
||||
@ -40,6 +43,9 @@ namespace boost { namespace locale { namespace impl_icu {
|
||||
virtual size_t parse(const string_type& str, double& value) const = 0;
|
||||
/// Parse the string and return the number of used characters. If it returns 0
|
||||
/// then parsing failed.
|
||||
virtual size_t parse(const string_type& str, uint64_t& value) const = 0;
|
||||
/// Parse the string and return the number of used characters. If it returns 0
|
||||
/// then parsing failed.
|
||||
virtual size_t parse(const string_type& str, int64_t& value) const = 0;
|
||||
/// Parse the string and return the number of used characters. If it returns 0
|
||||
/// then parsing failed.
|
||||
|
@ -1,5 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
|
||||
// Copyright (c) 2024 Alexander Grund
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// https://www.boost.org/LICENSE_1_0.txt
|
||||
@ -19,41 +20,31 @@
|
||||
namespace boost { namespace locale { namespace impl_icu {
|
||||
|
||||
namespace detail {
|
||||
template<typename T, bool integer = std::numeric_limits<T>::is_integer>
|
||||
struct icu_format_type;
|
||||
template<typename T, typename PreferredType, typename AlternativeType>
|
||||
struct choose_type_by_digits
|
||||
: std::conditional<std::numeric_limits<T>::digits <= std::numeric_limits<PreferredType>::digits,
|
||||
PreferredType,
|
||||
AlternativeType> {};
|
||||
|
||||
template<typename T>
|
||||
struct icu_format_type<T, true> {
|
||||
// ICU supports 32 and 64 bit ints, use the former as long as it fits, else the latter
|
||||
typedef typename std::conditional<std::numeric_limits<T>::digits <= 31, int32_t, int64_t>::type type;
|
||||
template<typename T, bool integer = std::numeric_limits<T>::is_integer>
|
||||
struct icu_format_type {
|
||||
static_assert(sizeof(T) <= sizeof(int64_t), "Only up to 64 bit integer types are supported by ICU");
|
||||
// ICU supports (only) int32_t and int64_t, use the former as long as it fits, else the latter
|
||||
using large_type = typename choose_type_by_digits<T, int64_t, uint64_t>::type;
|
||||
using type = typename choose_type_by_digits<T, int32_t, large_type>::type;
|
||||
};
|
||||
template<typename T>
|
||||
struct icu_format_type<T, false> {
|
||||
// Only float type ICU supports is double
|
||||
typedef double type;
|
||||
};
|
||||
|
||||
// ICU does not support uint64_t values so fall back to the parent/std formatting
|
||||
// if the number is to large to fit into an int64_t
|
||||
template<typename T,
|
||||
bool BigUInt = !std::numeric_limits<T>::is_signed && std::numeric_limits<T>::is_integer
|
||||
&& (sizeof(T) >= sizeof(uint64_t))>
|
||||
struct use_parent_traits {
|
||||
static bool use(T /*v*/) { return false; }
|
||||
};
|
||||
template<typename T>
|
||||
struct use_parent_traits<T, true> {
|
||||
static bool use(T v) { return v > static_cast<T>(std::numeric_limits<int64_t>::max()); }
|
||||
using type = double;
|
||||
};
|
||||
|
||||
template<typename ValueType>
|
||||
static bool use_parent(std::ios_base& ios, ValueType v)
|
||||
static bool use_parent(std::ios_base& ios)
|
||||
{
|
||||
const uint64_t flg = ios_info::get(ios).display_flags();
|
||||
if(flg == flags::posix)
|
||||
return true;
|
||||
if(use_parent_traits<ValueType>::use(v))
|
||||
return true;
|
||||
|
||||
if(!std::numeric_limits<ValueType>::is_integer)
|
||||
return false;
|
||||
@ -105,7 +96,7 @@ namespace boost { namespace locale { namespace impl_icu {
|
||||
template<typename ValueType>
|
||||
iter_type do_real_put(iter_type out, std::ios_base& ios, CharType fill, ValueType val) const
|
||||
{
|
||||
if(detail::use_parent(ios, val))
|
||||
if(detail::use_parent<ValueType>(ios))
|
||||
return std::num_put<CharType>::do_put(out, ios, fill, val);
|
||||
|
||||
const std::unique_ptr<formatter_type> formatter = formatter_type::create(ios, loc_, enc_);
|
||||
@ -240,7 +231,7 @@ namespace boost { namespace locale { namespace impl_icu {
|
||||
do_real_get(iter_type in, iter_type end, std::ios_base& ios, std::ios_base::iostate& err, ValueType& val) const
|
||||
{
|
||||
stream_type* stream_ptr = dynamic_cast<stream_type*>(&ios);
|
||||
if(!stream_ptr || detail::use_parent(ios, ValueType(0)))
|
||||
if(!stream_ptr || detail::use_parent<ValueType>(ios))
|
||||
return std::num_get<CharType>::do_get(in, end, ios, err, val);
|
||||
|
||||
const std::unique_ptr<formatter_type> formatter = formatter_type::create(ios, loc_, enc_);
|
||||
|
@ -380,18 +380,22 @@ void test_manip(std::string e_charset = "UTF-8")
|
||||
TEST_MIN_MAX(int16_t, "-32,768", "32,767");
|
||||
TEST_MIN_MAX(uint16_t, "0", "65,535");
|
||||
TEST_PARSE_FAILS(as::number, "-1", uint16_t);
|
||||
TEST_PARSE_FAILS(as::number, "-32,767", uint16_t);
|
||||
if(stdlib_correctly_errors_on_out_of_range_int16())
|
||||
TEST_PARSE_FAILS(as::number, "65,535", int16_t);
|
||||
|
||||
TEST_MIN_MAX(int32_t, "-2,147,483,648", "2,147,483,647");
|
||||
TEST_MIN_MAX(uint32_t, "0", "4,294,967,295");
|
||||
TEST_PARSE_FAILS(as::number, "-1", uint32_t);
|
||||
TEST_PARSE_FAILS(as::number, "-2,147,483,647", uint32_t);
|
||||
TEST_PARSE_FAILS(as::number, "4,294,967,295", int32_t);
|
||||
|
||||
TEST_MIN_MAX(int64_t, "-9,223,372,036,854,775,808", "9,223,372,036,854,775,807");
|
||||
// ICU does not support uint64, but we have a fallback to format it at least
|
||||
TEST_MIN_MAX_FMT(as::number, uint64_t, "0", "18446744073709551615");
|
||||
TEST_MIN_MAX(uint64_t, "0", "18,446,744,073,709,551,615");
|
||||
TEST_PARSE_FAILS(as::number, "-1", uint64_t);
|
||||
TEST_PARSE_FAILS(as::number, "-9,223,372,036,854,775,807", uint64_t);
|
||||
TEST_PARSE_FAILS(as::number, "18,446,744,073,709,551,615", int64_t);
|
||||
TEST_PARSE_FAILS(as::number, "18,446,744,073,709,551,616", uint64_t);
|
||||
|
||||
TEST_FMT_PARSE_3(as::number, std::left, std::setw(3), 15, "15 ");
|
||||
TEST_FMT_PARSE_3(as::number, std::right, std::setw(3), 15, " 15");
|
||||
@ -857,6 +861,55 @@ void test_format_class(std::string charset = "UTF-8")
|
||||
TEST_FORMAT_CLS("{1,gmt,ftime='%D'}", a_datetime, "12/31/13");
|
||||
}
|
||||
|
||||
/// Test formatting and parsing of uint64_t values that are not natively supported by ICU.
|
||||
/// They use a custom code path which gets exercised by this.
|
||||
void test_uint64_format()
|
||||
{
|
||||
#ifdef BOOST_LOCALE_WITH_ICU
|
||||
std::set<std::string> tested_langs;
|
||||
int32_t count;
|
||||
auto* cur_locale = icu::Locale::getAvailableLocales(count);
|
||||
constexpr uint64_t value = std::numeric_limits<int64_t>::max() + uint64_t(3);
|
||||
const std::string posix_value = as_posix_string(value);
|
||||
constexpr int32_t short_value = std::numeric_limits<int32_t>::max();
|
||||
const std::string posix_short_value = as_posix_string(short_value);
|
||||
boost::locale::generator g;
|
||||
const std::string utf8 = ".UTF-8";
|
||||
// Test with each language supported by ICU to ensure the implementation really
|
||||
// is independent of the language and doesn't fail e.g. for different separators.
|
||||
for(int i = 0; i < count; i++, cur_locale++) {
|
||||
if(!tested_langs.insert(cur_locale->getLanguage()).second)
|
||||
continue;
|
||||
TEST_CONTEXT(cur_locale->getName());
|
||||
UErrorCode err{};
|
||||
std::unique_ptr<icu::NumberFormat> fmt{icu::NumberFormat::createInstance(*cur_locale, err)};
|
||||
icu::UnicodeString s;
|
||||
fmt->format(short_value, s, nullptr, err);
|
||||
if(U_FAILURE(err))
|
||||
continue;
|
||||
const std::string icu_value = boost::locale::conv::utf_to_utf<char>(s.getBuffer(), s.getBuffer() + s.length());
|
||||
std::stringstream ss;
|
||||
ss.imbue(g(cur_locale->getName() + utf8));
|
||||
ss << boost::locale::as::number;
|
||||
// Sanity check
|
||||
ss << short_value;
|
||||
TEST_EQ(ss.str(), icu_value);
|
||||
|
||||
// Assumption: Either both the int32 and uint64 values are in POSIX format, or neither are
|
||||
// This is the case if separators are used and/or numbers are not ASCII
|
||||
empty_stream(ss) << value;
|
||||
if(icu_value == posix_short_value)
|
||||
TEST_EQ(ss.str(), posix_value);
|
||||
else
|
||||
TEST_NE(ss.str(), posix_value);
|
||||
|
||||
uint64_t parsed_value{};
|
||||
TEST(ss >> parsed_value);
|
||||
TEST_EQ(parsed_value, value);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
BOOST_LOCALE_DISABLE_UNREACHABLE_CODE_WARNING
|
||||
void test_main(int argc, char** argv)
|
||||
{
|
||||
@ -867,6 +920,8 @@ void test_main(int argc, char** argv)
|
||||
std::cout << "ICU is not build... Skipping\n";
|
||||
return;
|
||||
#endif
|
||||
test_uint64_format();
|
||||
|
||||
boost::locale::time_zone::global("GMT+4:00");
|
||||
std::cout << "Testing char, UTF-8" << std::endl;
|
||||
test_manip<char>();
|
||||
|
Loading…
x
Reference in New Issue
Block a user