1
0
mirror of https://github.com/CLIUtils/CLI11.git synced 2025-05-05 22:53:52 +00:00

regular and literal strings (#964)

Add escaping to quoted strings, differentiate between literal and
regular strings.

The goal is to make string processing in config files as close as
possible to toml standards. This means handing escape sequences
including unicode, and differentiating between literal strings and
regular strings in files and when splitting the command line. Also
allowing variable names in the files to be quoted.

This PR gets partway there. Removes some hacks from the previous PR to
deal with unusual option names and replaces with the quoted names.

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Philip Top 2023-12-30 06:54:41 -08:00 committed by GitHub
parent ba833f02ed
commit 91220babfc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 466 additions and 153 deletions

View File

@ -26,7 +26,7 @@ namespace detail {
std::string convert_arg_for_ini(const std::string &arg,
char stringQuote = '"',
char characterQuote = '\'',
char literalQuote = '\'',
bool disable_multi_line = false);
/// Comma separated join, adds quotes if needed
@ -35,7 +35,7 @@ std::string ini_join(const std::vector<std::string> &args,
char arrayStart = '[',
char arrayEnd = ']',
char stringQuote = '"',
char characterQuote = '\'');
char literalQuote = '\'');
std::vector<std::string> generate_parents(const std::string &section, std::string &name, char parentSeparator);

View File

@ -92,8 +92,8 @@ class ConfigBase : public Config {
char valueDelimiter = '=';
/// the character to use around strings
char stringQuote = '"';
/// the character to use around single characters
char characterQuote = '\'';
/// the character to use around single characters and literal strings
char literalQuote = '\'';
/// the maximum number of layers to allow
uint8_t maximumLayers{255};
/// the separator used to separator parent layers
@ -132,7 +132,7 @@ class ConfigBase : public Config {
/// Specify the quote characters used around strings and characters
ConfigBase *quoteCharacter(char qString, char qChar) {
stringQuote = qString;
characterQuote = qChar;
literalQuote = qChar;
return this;
}
/// Specify the maximum number of parents

View File

@ -120,6 +120,9 @@ inline std::string trim_copy(const std::string &str) {
/// remove quotes at the front and back of a string either '"' or '\''
CLI11_INLINE std::string &remove_quotes(std::string &str);
/// remove quotes from all elements of a string vector and process escaped components
CLI11_INLINE void remove_quotes(std::vector<std::string> &args);
/// Add a leader to the beginning of all new lines (nothing is added
/// at the start of the first line). `"; "` would be for ini files
///
@ -212,9 +215,13 @@ template <typename Callable> inline std::string find_and_modify(std::string str,
return str;
}
/// close a sequence of characters indicated by a closure character. Brackets allows sub sequences
/// recognized bracket sequences include "'`[(<{ other closure characters are assumed to be literal strings
CLI11_INLINE std::size_t close_sequence(const std::string &str, std::size_t start, char closure_char);
/// Split a string '"one two" "three"' into 'one two', 'three'
/// Quote characters can be ` ' or " or bracket characters [{(< with matching to the matching bracket
CLI11_INLINE std::vector<std::string> split_up(std::string str, char delimiter = '\0', bool removeQuotes = true);
CLI11_INLINE std::vector<std::string> split_up(std::string str, char delimiter = '\0');
/// get the value of an environmental variable or empty string if empty
CLI11_INLINE std::string get_environment_value(const std::string &env_name);
@ -246,6 +253,9 @@ CLI11_INLINE bool is_binary_escaped_string(const std::string &escaped_string);
/// extract an escaped binary_string
CLI11_INLINE std::string extract_binary_string(const std::string &escaped_string);
/// process a quoted string, remove the quotes and if appropriate handle escaped characters
CLI11_INLINE bool process_quoted_string(std::string &str, char string_char = '\"', char literal_char = '\'');
} // namespace detail
// [CLI11:string_tools_hpp:end]

View File

@ -579,6 +579,11 @@ CLI11_INLINE void App::parse(std::string commandline, bool program_name_included
auto args = detail::split_up(std::move(commandline));
// remove all empty strings
args.erase(std::remove(args.begin(), args.end(), std::string{}), args.end());
try {
detail::remove_quotes(args);
} catch(const std::invalid_argument &arg) {
throw CLI::ParseError(arg.what(), CLI::ExitCodes::InvalidError);
}
std::reverse(args.begin(), args.end());
parse(std::move(args));
}
@ -1569,7 +1574,7 @@ CLI11_INLINE bool App::_parse_single(std::vector<std::string> &args, bool &posit
case detail::Classifier::SHORT:
case detail::Classifier::WINDOWS_STYLE:
// If already parsed a subcommand, don't accept options_
_parse_arg(args, classifier, false);
retval = _parse_arg(args, classifier, false);
break;
case detail::Classifier::NONE:
// Probably a positional or something for a parent (sub)command

View File

@ -19,18 +19,19 @@
namespace CLI {
// [CLI11:config_inl_hpp:verbatim]
static constexpr auto triple_quote = R"(""")";
static constexpr auto multiline_literal_quote = R"(''')";
static constexpr auto multiline_string_quote = R"(""")";
namespace detail {
CLI11_INLINE bool is_printable(const std::string &test_string) {
return std::all_of(test_string.begin(), test_string.end(), [](char x) {
return (isprint(static_cast<unsigned char>(x)) != 0 || x == '\n');
return (isprint(static_cast<unsigned char>(x)) != 0 || x == '\n' || x == '\t');
});
}
CLI11_INLINE std::string
convert_arg_for_ini(const std::string &arg, char stringQuote, char characterQuote, bool disable_multi_line) {
convert_arg_for_ini(const std::string &arg, char stringQuote, char literalQuote, bool disable_multi_line) {
if(arg.empty()) {
return std::string(2, stringQuote);
}
@ -53,13 +54,10 @@ convert_arg_for_ini(const std::string &arg, char stringQuote, char characterQuot
if(isprint(static_cast<unsigned char>(arg.front())) == 0) {
return binary_escape_string(arg);
}
if(arg == "\\") {
return std::string(1, stringQuote) + "\\\\" + stringQuote;
}
if(arg == "'") {
return std::string(1, stringQuote) + "'" + stringQuote;
}
return std::string(1, characterQuote) + arg + characterQuote;
return std::string(1, literalQuote) + arg + literalQuote;
}
// handle hex, binary or octal arguments
if(arg.front() == '0') {
@ -82,13 +80,10 @@ convert_arg_for_ini(const std::string &arg, char stringQuote, char characterQuot
if(!is_printable(arg)) {
return binary_escape_string(arg);
}
if(arg.find_first_of('\n') != std::string::npos) {
if(disable_multi_line) {
return binary_escape_string(arg);
}
return std::string(triple_quote) + arg + triple_quote;
}
if(detail::has_escapable_character(arg)) {
if(arg.size() > 100 && !disable_multi_line) {
return std::string(multiline_literal_quote) + arg + multiline_literal_quote;
}
return std::string(1, stringQuote) + detail::add_escaped_characters(arg) + stringQuote;
}
return std::string(1, stringQuote) + arg + stringQuote;
@ -99,7 +94,7 @@ CLI11_INLINE std::string ini_join(const std::vector<std::string> &args,
char arrayStart,
char arrayEnd,
char stringQuote,
char characterQuote) {
char literalQuote) {
bool disable_multi_line{false};
std::string joined;
if(args.size() > 1 && arrayStart != '\0') {
@ -114,7 +109,7 @@ CLI11_INLINE std::string ini_join(const std::vector<std::string> &args,
joined.push_back(' ');
}
}
joined.append(convert_arg_for_ini(arg, stringQuote, characterQuote, disable_multi_line));
joined.append(convert_arg_for_ini(arg, stringQuote, literalQuote, disable_multi_line));
}
if(args.size() > 1 && arrayEnd != '\0') {
joined.push_back(arrayEnd);
@ -233,7 +228,7 @@ inline std::vector<ConfigItem> ConfigBase::from_config(std::istream &input) cons
if(len < 3) {
continue;
}
if(line.compare(0, 3, triple_quote) == 0 || line.compare(0, 3, "'''") == 0) {
if(line.compare(0, 3, multiline_string_quote) == 0 || line.compare(0, 3, multiline_literal_quote) == 0) {
inMLineComment = true;
auto cchar = line.front();
while(inMLineComment) {
@ -277,19 +272,15 @@ inline std::vector<ConfigItem> ConfigBase::from_config(std::istream &input) cons
// comment lines
if(line.front() == ';' || line.front() == '#' || line.front() == commentChar) {
if(line.compare(2, 13, "cli11:literal") == 0) {
literalName = true;
getline(input, buffer);
line = detail::trim_copy(buffer);
} else {
continue;
}
continue;
}
std::size_t search_start = 0;
if(line.front() == stringQuote || line.front() == literalQuote || line.front() == '`') {
search_start = detail::close_sequence(line, 0, line.front());
}
// Find = in string, split and recombine
auto delimiter_pos = line.find_first_of(valueDelimiter, 1);
auto comment_pos = (literalName) ? std::string::npos : line.find_first_of(commentChar);
auto delimiter_pos = line.find_first_of(valueDelimiter, search_start + 1);
auto comment_pos = line.find_first_of(commentChar, search_start);
if(comment_pos < delimiter_pos) {
delimiter_pos = std::string::npos;
}
@ -297,9 +288,10 @@ inline std::vector<ConfigItem> ConfigBase::from_config(std::istream &input) cons
name = detail::trim_copy(line.substr(0, delimiter_pos));
std::string item = detail::trim_copy(line.substr(delimiter_pos + 1, std::string::npos));
bool mlquote = (item.compare(0, 3, "'''") == 0 || item.compare(0, 3, triple_quote) == 0);
bool mlquote =
(item.compare(0, 3, multiline_literal_quote) == 0 || item.compare(0, 3, multiline_string_quote) == 0);
if(!mlquote && comment_pos != std::string::npos && !literalName) {
auto citems = detail::split_up(item, commentChar, false);
auto citems = detail::split_up(item, commentChar);
item = detail::trim_copy(citems.front());
}
if(mlquote) {
@ -337,6 +329,9 @@ inline std::vector<ConfigItem> ConfigBase::from_config(std::istream &input) cons
if(!item.empty() && item.back() == '\n') {
item.pop_back();
}
if(keyChar == '\"') {
item = detail::remove_escaped_characters(item);
}
} else {
if(lineExtension) {
detail::trim(l2);
@ -358,11 +353,11 @@ inline std::vector<ConfigItem> ConfigBase::from_config(std::istream &input) cons
detail::trim(multiline);
item += multiline;
}
items_buffer = detail::split_up(item.substr(1, item.length() - 2), aSep, false);
items_buffer = detail::split_up(item.substr(1, item.length() - 2), aSep);
} else if((isDefaultArray || isINIArray) && item.find_first_of(aSep) != std::string::npos) {
items_buffer = detail::split_up(item, aSep, false);
items_buffer = detail::split_up(item, aSep);
} else if((isDefaultArray || isINIArray) && item.find_first_of(' ') != std::string::npos) {
items_buffer = detail::split_up(item, '\0', false);
items_buffer = detail::split_up(item, '\0');
} else {
items_buffer = {item};
}
@ -370,17 +365,15 @@ inline std::vector<ConfigItem> ConfigBase::from_config(std::istream &input) cons
name = detail::trim_copy(line.substr(0, comment_pos));
items_buffer = {"true"};
}
if(name.find(parentSeparatorChar) == std::string::npos) {
if(!literalName) {
detail::remove_quotes(name);
}
}
// clean up quotes on the items and check for escaped strings
for(auto &it : items_buffer) {
detail::remove_quotes(it);
if(detail::is_binary_escaped_string(it)) {
it = detail::extract_binary_string(it);
try {
literalName = detail::process_quoted_string(name, stringQuote, literalQuote);
// clean up quotes on the items and check for escaped strings
for(auto &it : items_buffer) {
detail::process_quoted_string(it, stringQuote, literalQuote);
}
} catch(const std::invalid_argument &ia) {
throw CLI::ParseError(ia.what(), CLI::ExitCodes::InvalidError);
}
std::vector<std::string> parents;
if(literalName) {
@ -461,16 +454,17 @@ ConfigBase::to_config(const App *app, bool default_also, bool write_description,
continue;
}
}
std::string name = prefix + opt->get_single_name();
if(name == prefix) {
std::string single_name = opt->get_single_name();
if(single_name.empty()) {
continue;
}
std::string value = detail::ini_join(
opt->reduced_results(), arraySeparator, arrayStart, arrayEnd, stringQuote, characterQuote);
opt->reduced_results(), arraySeparator, arrayStart, arrayEnd, stringQuote, literalQuote);
if(value.empty() && default_also) {
if(!opt->get_default_str().empty()) {
value = detail::convert_arg_for_ini(opt->get_default_str(), stringQuote, characterQuote, false);
value = detail::convert_arg_for_ini(opt->get_default_str(), stringQuote, literalQuote, false);
} else if(opt->get_expected_min() == 0) {
value = "false";
} else if(opt->get_run_callback_for_default()) {
@ -479,15 +473,16 @@ ConfigBase::to_config(const App *app, bool default_also, bool write_description,
}
if(!value.empty()) {
if(!opt->get_fnames().empty()) {
try {
value = opt->get_flag_value(name, value);
value = opt->get_flag_value(single_name, value);
} catch(const CLI::ArgumentMismatch &) {
bool valid{false};
for(const auto &test_name : opt->get_fnames()) {
try {
value = opt->get_flag_value(test_name, value);
name = test_name;
single_name = test_name;
valid = true;
} catch(const CLI::ArgumentMismatch &) {
continue;
@ -495,7 +490,7 @@ ConfigBase::to_config(const App *app, bool default_also, bool write_description,
}
if(!valid) {
value = detail::ini_join(
opt->results(), arraySeparator, arrayStart, arrayEnd, stringQuote, characterQuote);
opt->results(), arraySeparator, arrayStart, arrayEnd, stringQuote, literalQuote);
}
}
}
@ -503,13 +498,27 @@ ConfigBase::to_config(const App *app, bool default_also, bool write_description,
out << '\n';
out << commentLead << detail::fix_newlines(commentLead, opt->get_description()) << '\n';
}
if(name.find_first_of(commentTest) != std::string::npos || name.compare(0, 3, triple_quote) == 0 ||
name.compare(0, 3, "'''") == 0 || (name.front() == '[' && name.back() == ']') ||
(name.front() == stringQuote && name.back() == stringQuote) ||
(name.front() == characterQuote && name.back() == characterQuote) ||
(name.front() == '`' && name.back() == '`')) {
out << commentChar << " cli11:literal\n";
if(single_name.find_first_of(commentTest) != std::string::npos ||
single_name.compare(0, 3, multiline_string_quote) == 0 ||
single_name.compare(0, 3, multiline_literal_quote) == 0 ||
(single_name.front() == '[' && single_name.back() == ']') ||
(single_name.find_first_of(stringQuote) != std::string::npos) ||
(single_name.find_first_of(literalQuote) != std::string::npos) ||
(single_name.find_first_of('`') != std::string::npos)) {
if(single_name.find_first_of(literalQuote) == std::string::npos) {
single_name.insert(0, 1, literalQuote);
single_name.push_back(literalQuote);
} else {
if(detail::has_escapable_character(single_name)) {
single_name = detail::add_escaped_characters(single_name);
}
single_name.insert(0, 1, stringQuote);
single_name.push_back(stringQuote);
}
}
std::string name = prefix + single_name;
out << name << valueDelimiter << value << '\n';
}
}

View File

@ -609,7 +609,12 @@ CLI11_INLINE void Option::_reduce_results(results_t &out, const results_t &origi
throw ArgumentMismatch::AtLeast(get_name(), static_cast<int>(num_min), original.size());
}
if(original.size() > num_max) {
throw ArgumentMismatch::AtMost(get_name(), static_cast<int>(num_max), original.size());
if(original.size() == 2 && num_max == 1 && original[1] == "%%" && original[0] == "{}") {
// this condition is a trap for the following empty indicator check on config files
out = original;
} else {
throw ArgumentMismatch::AtMost(get_name(), static_cast<int>(num_max), original.size());
}
}
break;
}

View File

@ -61,7 +61,17 @@ CLI11_INLINE std::string &rtrim(std::string &str, const std::string &filter) {
}
CLI11_INLINE std::string &remove_quotes(std::string &str) {
if(str.length() > 1 && (str.front() == '"' || str.front() == '\'')) {
if(str.length() > 1 && (str.front() == '"' || str.front() == '\'' || str.front() == '`')) {
if(str.front() == str.back()) {
str.pop_back();
str.erase(str.begin(), str.begin() + 1);
}
}
return str;
}
CLI11_INLINE std::string &remove_outer(std::string &str, char key) {
if(str.length() > 1 && (str.front() == key)) {
if(str.front() == str.back()) {
str.pop_back();
str.erase(str.begin(), str.begin() + 1);
@ -181,9 +191,10 @@ find_member(std::string name, const std::vector<std::string> names, bool ignore_
return (it != std::end(names)) ? (it - std::begin(names)) : (-1);
}
static const std::string escapedChars("'\"`])>}\\");
static const std::string bracketChars{"'\"`[(<{"};
static const std::string matchBracketChars("'\"`])>}");
static const std::string escapedChars("\b\t\n\f\r\"\\");
static const std::string escapedCharsCode("btnfr\"\\");
static const std::string bracketChars{"\"'`[(<{"};
static const std::string matchBracketChars("\"'`])>}");
CLI11_INLINE bool has_escapable_character(const std::string &str) {
return (str.find_first_of(escapedChars) != std::string::npos);
@ -193,25 +204,109 @@ CLI11_INLINE std::string add_escaped_characters(const std::string &str) {
std::string out;
out.reserve(str.size() + 4);
for(char s : str) {
if(escapedChars.find_first_of(s) != std::string::npos) {
auto sloc = escapedChars.find_first_of(s);
if(sloc != std::string::npos) {
out.push_back('\\');
out.push_back(escapedCharsCode[sloc]);
} else {
out.push_back(s);
}
out.push_back(s);
}
return out;
}
CLI11_INLINE std::uint32_t hexConvert(char hc) {
int hcode{0};
if(hc >= '0' && hc <= '9') {
hcode = (hc - '0');
} else if(hc >= 'A' && hc <= 'F') {
hcode = (hc - 'A' + 10);
} else if(hc >= 'a' && hc <= 'f') {
hcode = (hc - 'a' + 10);
} else {
hcode = -1;
}
return static_cast<uint32_t>(hcode);
}
CLI11_INLINE char make_char(std::uint32_t code) { return static_cast<char>(static_cast<unsigned char>(code)); }
CLI11_INLINE void append_codepoint(std::string &str, std::uint32_t code) {
if(code < 0x80) { // ascii code equivalent
str.push_back(static_cast<char>(code));
} else if(code < 0x800) { // \u0080 to \u07FF
// 110yyyyx 10xxxxxx; 0x3f == 0b0011'1111
str.push_back(make_char(0xC0 | code >> 6));
str.push_back(make_char(0x80 | (code & 0x3F)));
} else if(code < 0x10000) { // U+0800...U+FFFF
if(0xD800 <= code && code <= 0xDFFF) {
throw std::invalid_argument("[0xD800, 0xDFFF] are not valid UTF-8.");
}
// 1110yyyy 10yxxxxx 10xxxxxx
str.push_back(make_char(0xE0 | code >> 12));
str.push_back(make_char(0x80 | (code >> 6 & 0x3F)));
str.push_back(make_char(0x80 | (code & 0x3F)));
} else if(code < 0x110000) { // U+010000 ... U+10FFFF
// 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
str.push_back(make_char(0xF0 | code >> 18));
str.push_back(make_char(0x80 | (code >> 12 & 0x3F)));
str.push_back(make_char(0x80 | (code >> 6 & 0x3F)));
str.push_back(make_char(0x80 | (code & 0x3F)));
}
}
CLI11_INLINE std::string remove_escaped_characters(const std::string &str) {
std::string out;
out.reserve(str.size());
for(auto loc = str.begin(); loc < str.end(); ++loc) {
if(*loc == '\\') {
if(escapedChars.find_first_of(*(loc + 1)) != std::string::npos) {
out.push_back(*(loc + 1));
if(str.end() - loc < 2) {
throw std::invalid_argument("invalid escape sequence " + str);
}
auto ecloc = escapedCharsCode.find_first_of(*(loc + 1));
if(ecloc != std::string::npos) {
out.push_back(escapedChars[ecloc]);
++loc;
} else if(*(loc + 1) == 'u') {
// must have 4 hex characters
if(str.end() - loc < 6) {
throw std::invalid_argument("unicode sequence must have 4 hex codes " + str);
}
std::uint32_t code{0};
std::uint32_t mplier{16 * 16 * 16};
for(int ii = 2; ii < 6; ++ii) {
std::uint32_t res = hexConvert(*(loc + ii));
if(res > 0x0F) {
throw std::invalid_argument("unicode sequence must have 4 hex codes " + str);
}
code += res * mplier;
mplier = mplier / 16;
}
append_codepoint(out, code);
loc += 5;
} else if(*(loc + 1) == 'U') {
// must have 8 hex characters
if(str.end() - loc < 10) {
throw std::invalid_argument("unicode sequence must have 8 hex codes " + str);
}
std::uint32_t code{0};
std::uint32_t mplier{16 * 16 * 16 * 16 * 16 * 16 * 16};
for(int ii = 2; ii < 10; ++ii) {
std::uint32_t res = hexConvert(*(loc + ii));
if(res > 0x0F) {
throw std::invalid_argument("unicode sequence must have 8 hex codes " + str);
}
code += res * mplier;
mplier = mplier / 16;
}
append_codepoint(out, code);
loc += 9;
} else if(*(loc + 1) == '0') {
out.push_back('\0');
++loc;
} else {
out.push_back(*loc);
throw std::invalid_argument(std::string("unrecognized escape sequence \\") + *(loc + 1) + " in " + str);
}
} else {
out.push_back(*loc);
@ -220,39 +315,73 @@ CLI11_INLINE std::string remove_escaped_characters(const std::string &str) {
return out;
}
CLI11_INLINE std::pair<std::size_t, bool> close_sequence(const std::string &str, std::size_t start, char closure_char) {
std::string closures;
closures.push_back(closure_char);
CLI11_INLINE std::size_t close_string_quote(const std::string &str, std::size_t start, char closure_char) {
std::size_t loc{0};
for(loc = start + 1; loc < str.size(); ++loc) {
if(str[loc] == closure_char) {
break;
}
if(str[loc] == '\\') {
// skip the next character for escaped sequences
++loc;
}
}
return loc;
}
CLI11_INLINE std::size_t close_literal_quote(const std::string &str, std::size_t start, char closure_char) {
auto loc = str.find_first_of(closure_char, start + 1);
return (loc != std::string::npos ? loc : str.size());
}
CLI11_INLINE std::size_t close_sequence(const std::string &str, std::size_t start, char closure_char) {
auto bracket_loc = matchBracketChars.find(closure_char);
switch(bracket_loc) {
case 0:
return close_string_quote(str, start, closure_char);
case 1:
case 2:
case std::string::npos:
return close_literal_quote(str, start, closure_char);
default:
break;
}
std::string closures(1, closure_char);
auto loc = start + 1;
bool inQuote = closure_char == '"' || closure_char == '\'' || closure_char == '`';
bool hasControlSequence{false};
while(loc < str.size()) {
if(str[loc] == closures.back()) {
closures.pop_back();
if(closures.empty()) {
return {loc, hasControlSequence};
return loc;
}
inQuote = false;
}
if(str[loc] == '\\') {
if(inQuote) {
hasControlSequence = true;
}
++loc;
}
if(!inQuote) {
auto bracket_loc = bracketChars.find(str[loc]);
if(bracket_loc != std::string::npos) {
bracket_loc = bracketChars.find(str[loc]);
if(bracket_loc != std::string::npos) {
switch(bracket_loc) {
case 0:
loc = close_string_quote(str, loc, str[loc]);
break;
case 1:
case 2:
loc = close_literal_quote(str, loc, str[loc]);
break;
default:
closures.push_back(matchBracketChars[bracket_loc]);
inQuote = (bracket_loc <= 2);
break;
}
}
++loc;
}
return {loc, hasControlSequence};
if(loc > str.size()) {
loc = str.size();
}
return loc;
}
CLI11_INLINE std::vector<std::string> split_up(std::string str, char delimiter, bool removeQuotes) {
CLI11_INLINE std::vector<std::string> split_up(std::string str, char delimiter) {
auto find_ws = [delimiter](char ch) {
return (delimiter == '\0') ? std::isspace<char>(ch, std::locale()) : (ch == delimiter);
@ -260,20 +389,22 @@ CLI11_INLINE std::vector<std::string> split_up(std::string str, char delimiter,
trim(str);
std::vector<std::string> output;
bool embeddedQuote = false;
std::size_t adjust = removeQuotes ? 1 : 0;
while(!str.empty()) {
if(bracketChars.find_first_of(str[0]) != std::string::npos) {
auto bracketLoc = bracketChars.find_first_of(str[0]);
auto closure = close_sequence(str, 0, matchBracketChars[bracketLoc]);
auto end = closure.first;
output.push_back(str.substr(adjust, end + 1 - 2 * adjust));
if(end + 2 < str.size()) {
str = str.substr(end + 2);
} else {
auto end = close_sequence(str, 0, matchBracketChars[bracketLoc]);
if(end >= str.size()) {
output.push_back(std::move(str));
str.clear();
} else {
output.push_back(str.substr(0, end + 1));
if(end + 2 < str.size()) {
str = str.substr(end + 2);
} else {
str.clear();
}
}
embeddedQuote = embeddedQuote || closure.second;
} else {
auto it = std::find_if(std::begin(str), std::end(str), find_ws);
if(it != std::end(str)) {
@ -285,11 +416,6 @@ CLI11_INLINE std::vector<std::string> split_up(std::string str, char delimiter,
str.clear();
}
}
// transform any embedded quotes into the regular character if the quotes are removed
if(embeddedQuote && removeQuotes) {
output.back() = remove_escaped_characters(output.back());
embeddedQuote = false;
}
trim(str);
}
return output;
@ -373,30 +499,12 @@ CLI11_INLINE std::string extract_binary_string(const std::string &escaped_string
if(escaped_string[loc] == '\\' && (escaped_string[loc + 1] == 'x' || escaped_string[loc + 1] == 'X')) {
auto c1 = escaped_string[loc + 2];
auto c2 = escaped_string[loc + 3];
int res{0};
bool invalid{false};
if(c1 >= '0' && c1 <= '9') {
res = (c1 - '0') * 16;
} else if(c1 >= 'A' && c1 <= 'F') {
res = (c1 - 'A' + 10) * 16;
} else if(c1 >= 'a' && c1 <= 'f') {
res = (c1 - 'a' + 10) * 16;
} else {
invalid = true;
}
if(c2 >= '0' && c2 <= '9') {
res += (c2 - '0');
} else if(c2 >= 'A' && c2 <= 'F') {
res += (c2 - 'A' + 10);
} else if(c2 >= 'a' && c2 <= 'f') {
res += (c2 - 'a' + 10);
} else {
invalid = true;
}
if(!invalid) {
std::uint32_t res1 = hexConvert(c1);
std::uint32_t res2 = hexConvert(c2);
if(res1 <= 0x0F && res2 <= 0x0F) {
loc += 4;
outstring.push_back(static_cast<char>(res));
outstring.push_back(static_cast<char>(res1 * 16 + res2));
continue;
}
}
@ -406,6 +514,40 @@ CLI11_INLINE std::string extract_binary_string(const std::string &escaped_string
return outstring;
}
CLI11_INLINE void remove_quotes(std::vector<std::string> &args) {
for(auto &arg : args) {
if(arg.front() == '\"' && arg.back() == '\"') {
remove_quotes(arg);
// only remove escaped for string arguments not literal strings
arg = remove_escaped_characters(arg);
} else {
remove_quotes(arg);
}
}
}
CLI11_INLINE bool process_quoted_string(std::string &str, char string_char, char literal_char) {
if(str.size() <= 1) {
return false;
}
if(detail::is_binary_escaped_string(str)) {
str = detail::extract_binary_string(str);
return true;
}
if(str.front() == string_char && str.back() == string_char) {
detail::remove_outer(str, string_char);
if(str.find_first_of('\\') != std::string::npos) {
str = detail::remove_escaped_characters(str);
}
return true;
}
if((str.front() == literal_char || str.front() == '`') && str.back() == str.front()) {
detail::remove_outer(str, str.front());
return true;
}
return false;
}
std::string get_environment_value(const std::string &env_name) {
char *buffer = nullptr;
std::string ename_string;

View File

@ -414,10 +414,10 @@ TEST_CASE_METHOD(TApp, "OneStringEqualVersionSingleStringQuotedEscapedCharacters
app.add_option("-s,--string", str);
app.add_option("-t,--tstr", str2);
app.add_option("-m,--mstr", str3);
app.parse(R"raw(--string="this is my \"quoted\" string" -t 'qst\'ring 2' -m=`"quoted\` string"`")raw");
CHECK("this is my \"quoted\" string" == str);
CHECK("qst\'ring 2" == str2);
CHECK("\"quoted` string\"" == str3);
app.parse(R"raw(--string="this is my \n\"quoted\" string" -t 'qst\ring 2' -m=`"quoted\n string"`")raw");
CHECK("this is my \n\"quoted\" string" == str); // escaped
CHECK("qst\\ring 2" == str2); // literal
CHECK("\"quoted\\n string\"" == str3); // double quoted literal
}
TEST_CASE_METHOD(TApp, "OneStringEqualVersionSingleStringQuotedMultipleWithEqual", "[app]") {

View File

@ -27,6 +27,15 @@ TEST_CASE("StringBased: convert_arg_for_ini", "[config]") {
CHECK("-22E14" == CLI::detail::convert_arg_for_ini("-22E14"));
CHECK("'a'" == CLI::detail::convert_arg_for_ini("a"));
CHECK("'\\'" == CLI::detail::convert_arg_for_ini("\\"));
CHECK("\"'\"" == CLI::detail::convert_arg_for_ini("'"));
std::string tstring1;
tstring1.push_back('\0');
// binary string conversion single character
CHECK("'B\"(\\x00)\"'" == CLI::detail::convert_arg_for_ini(tstring1));
// hex
CHECK("0x5461FAED" == CLI::detail::convert_arg_for_ini("0x5461FAED"));
// hex fail
@ -2713,7 +2722,8 @@ TEST_CASE_METHOD(TApp, "TomlOutputMultilineString", "[config]") {
std::string desc = "flag";
app.add_option("--opt", desc);
std::string argString = "this is a very long string \n that covers multiple lines \n and should be long";
std::string argString = "this is a very long string \n that covers multiple lines \nand should be longer than 100 "
"characters \nto trigger the multiline string";
args = {"--opt", argString};
run();

View File

@ -50,7 +50,7 @@ TEST_CASE("file_fail") {
CLI::FuzzApp fuzzdata;
auto app = fuzzdata.generateApp();
int index = GENERATE(range(1, 3));
int index = GENERATE(range(1, 5));
auto parseData = loadFailureFile("fuzz_file_fail", index);
std::stringstream out(parseData);
try {
@ -63,7 +63,7 @@ TEST_CASE("app_file_gen_fail") {
CLI::FuzzApp fuzzdata;
auto app = fuzzdata.generateApp();
int index = GENERATE(range(1, 33));
int index = GENERATE(range(1, 40));
std::string optionString, flagString;
auto parseData = loadFailureFile("fuzz_app_file_fail", index);
if(parseData.size() > 25) {

View File

@ -165,6 +165,7 @@ TEST_CASE("String: InvalidName", "[helpers]") {
CHECK(CLI::detail::valid_name_string("b@d2?"));
CHECK(CLI::detail::valid_name_string("2vali?d"));
CHECK_FALSE(CLI::detail::valid_name_string("!valid"));
CHECK_FALSE(CLI::detail::valid_name_string("!va\nlid"));
}
TEST_CASE("StringTools: Modify", "[helpers]") {
@ -250,6 +251,11 @@ TEST_CASE("StringTools: binaryEscapseConversion", "[helpers]") {
std::string rstring = CLI::detail::extract_binary_string(estring);
CHECK(rstring == testString2);
CLI::detail::remove_quotes(estring);
CHECK(CLI::detail::is_binary_escaped_string(estring));
std::string rstringrq = CLI::detail::extract_binary_string(estring);
CHECK(rstringrq == testString2);
testString2.push_back(0);
testString2.push_back(static_cast<char>(197));
testString2.push_back(78);
@ -272,11 +278,13 @@ TEST_CASE("StringTools: binaryStrings", "[helpers]") {
CHECK(CLI::detail::extract_binary_string(rstring).empty());
rstring = "B\"(\\x35\\xa7)\"";
CHECK(CLI::detail::is_binary_escaped_string(rstring));
auto result = CLI::detail::extract_binary_string(rstring);
CHECK(result[0] == static_cast<char>(0x35));
CHECK(result[1] == static_cast<char>(0xa7));
rstring = "B\"(\\x3e\\xf7)\"";
rstring = "'B\"(\\x3e\\xf7)\"'";
CHECK(CLI::detail::is_binary_escaped_string(rstring));
result = CLI::detail::extract_binary_string(rstring);
CHECK(result[0] == static_cast<char>(0x3e));
CHECK(result[1] == static_cast<char>(0xf7));
@ -300,12 +308,126 @@ TEST_CASE("StringTools: binaryStrings", "[helpers]") {
CHECK(result == "\\XEM\\X7K");
}
/// these are provided for compatibility with the char8_t for C++20 that breaks stuff
std::string from_u8string(const std::string &s) { return s; }
std::string from_u8string(std::string &&s) { return std::move(s); }
#if defined(__cpp_lib_char8_t)
std::string from_u8string(const std::u8string &s) { return std::string(s.begin(), s.end()); }
#elif defined(__cpp_char8_t)
std::string from_u8string(const char8_t *s) { return std::string(reinterpret_cast<const char *>(s)); }
#endif
TEST_CASE("StringTools: escapeConversion", "[helpers]") {
CHECK(CLI::detail::remove_escaped_characters("test\\\"") == "test\"");
CHECK(CLI::detail::remove_escaped_characters("test\\}") == "test}");
CHECK(CLI::detail::remove_escaped_characters("test\\\\") == "test\\");
CHECK(CLI::detail::remove_escaped_characters("test\\\\") == "test\\");
CHECK(CLI::detail::remove_escaped_characters("test\\k") == "test\\k");
CHECK(CLI::detail::remove_escaped_characters("test\\b") == "test\b");
CHECK(CLI::detail::remove_escaped_characters("test\\t") == "test\t");
CHECK(CLI::detail::remove_escaped_characters("test\\n\\r\\t\\f") == "test\n\r\t\f");
CHECK(CLI::detail::remove_escaped_characters("test\\r") == "test\r");
CHECK(CLI::detail::remove_escaped_characters("test\\f") == "test\f");
std::string zstring = "test";
zstring.push_back('\0');
zstring.append("test\n");
CHECK(CLI::detail::remove_escaped_characters("test\\0test\\n") == zstring);
CHECK_THROWS_AS(CLI::detail::remove_escaped_characters("test\\m_bad"), std::invalid_argument);
CHECK_THROWS_AS(CLI::detail::remove_escaped_characters("test\\"), std::invalid_argument);
}
TEST_CASE("StringTools: quotedString", "[helpers]") {
std::string rstring = "'B\"(\\x35\\xa7)\"'";
auto s2 = rstring;
CLI::detail::process_quoted_string(s2);
CHECK(s2[0] == static_cast<char>(0x35));
CHECK(s2[1] == static_cast<char>(0xa7));
s2 = rstring;
CLI::detail::remove_quotes(s2);
CLI::detail::process_quoted_string(s2);
CHECK(s2[0] == static_cast<char>(0x35));
CHECK(s2[1] == static_cast<char>(0xa7));
std::string qbase = R"("this\nis\na\nfour\tline test")";
std::string qresult = "this\nis\na\nfour\tline test";
std::string q1 = qbase;
// test remove quotes and escape processing
CLI::detail::process_quoted_string(q1);
CHECK(q1 == qresult);
std::string q2 = qbase;
q2.front() = '\'';
q2.pop_back();
q2.push_back('\'');
std::string qliteral = qbase.substr(1);
qliteral.pop_back();
// test remove quotes for literal string
CHECK(CLI::detail::process_quoted_string(q2));
CHECK(q2 == qliteral);
std::string q3 = qbase;
q3.front() = '`';
q3.pop_back();
q3.push_back('`');
// test remove quotes for literal string
CHECK(CLI::detail::process_quoted_string(q3));
CHECK(q3 == qliteral);
std::string q4 = qbase;
q4.front() = '|';
q4.pop_back();
q4.push_back('|');
// check that it doesn't process
CHECK_FALSE(CLI::detail::process_quoted_string(q4));
// test custom string quote character
CHECK(CLI::detail::process_quoted_string(q4, '|'));
CHECK(q4 == qresult);
std::string q5 = qbase;
q5.front() = '?';
q5.pop_back();
q5.push_back('?');
// test custom literal quote character
CHECK(CLI::detail::process_quoted_string(q5, '|', '?'));
CHECK(q5 == qliteral);
q3 = qbase;
q3.front() = '`';
q3.pop_back();
q3.push_back('`');
// test that '`' still works regardless of the other specified characters
CHECK(CLI::detail::process_quoted_string(q3));
CHECK(q3 == qliteral);
}
TEST_CASE("StringTools: unicode_literals", "[helpers]") {
CHECK(CLI::detail::remove_escaped_characters("test\\u03C0\\u00e9") == from_u8string(u8"test\u03C0\u00E9"));
CHECK(CLI::detail::remove_escaped_characters("test\\u73C0\\u0057") == from_u8string(u8"test\u73C0\u0057"));
CHECK(CLI::detail::remove_escaped_characters("test\\U0001F600\\u00E9") == from_u8string(u8"test\U0001F600\u00E9"));
CHECK_THROWS_AS(CLI::detail::remove_escaped_characters("test\\U0001M600\\u00E9"), std::invalid_argument);
CHECK_THROWS_AS(CLI::detail::remove_escaped_characters("test\\U0001E600\\u00M9"), std::invalid_argument);
CHECK_THROWS_AS(CLI::detail::remove_escaped_characters("test\\U0001E600\\uD8E9"), std::invalid_argument);
CHECK_THROWS_AS(CLI::detail::remove_escaped_characters("test\\U0001E600\\uD8"), std::invalid_argument);
CHECK_THROWS_AS(CLI::detail::remove_escaped_characters("test\\U0001E60"), std::invalid_argument);
}
TEST_CASE("StringTools: close_sequence", "[helpers]") {
CHECK(CLI::detail::close_sequence("[test]", 0, ']') == 5U);
CHECK(CLI::detail::close_sequence("[\"test]\"]", 0, ']') == 8U);
CHECK(CLI::detail::close_sequence("[\"test]\"],[t2]", 0, ']') == 8U);
CHECK(CLI::detail::close_sequence("[\"test]\"],[t2]", 10, ']') == 13U);
CHECK(CLI::detail::close_sequence("{\"test]\"],[t2]", 0, '}') == 14U);
CHECK(CLI::detail::close_sequence("[(),(),{},\"]]52{}\",[],[54],[[],[],()]]", 0, ']') == 37U);
}
TEST_CASE("Trim: Various", "[helpers]") {
@ -967,35 +1089,35 @@ TEST_CASE("Join: Backward", "[helpers]") {
}
TEST_CASE("SplitUp: Simple", "[helpers]") {
std::vector<std::string> oput = {"one", "two three"};
std::vector<std::string> oput = {"one", "\"two three\""};
std::string orig{R"(one "two three")"};
std::vector<std::string> result = CLI::detail::split_up(orig);
CHECK(result == oput);
}
TEST_CASE("SplitUp: SimpleDifferentQuotes", "[helpers]") {
std::vector<std::string> oput = {"one", "two three"};
std::vector<std::string> oput = {"one", "`two three`"};
std::string orig{R"(one `two three`)"};
std::vector<std::string> result = CLI::detail::split_up(orig);
CHECK(result == oput);
}
TEST_CASE("SplitUp: SimpleMissingQuotes", "[helpers]") {
std::vector<std::string> oput = {"one", "two three"};
std::vector<std::string> oput = {"one", "`two three"};
std::string orig{R"(one `two three)"};
std::vector<std::string> result = CLI::detail::split_up(orig);
CHECK(result == oput);
}
TEST_CASE("SplitUp: SimpleMissingQuotesEscaped", "[helpers]") {
std::vector<std::string> oput = {"one", "two three`"};
std::string orig{R"(one `two three\`)"};
std::vector<std::string> oput = {"one", R"("two three\"")"};
std::string orig{R"(one "two three\"")"};
std::vector<std::string> result = CLI::detail::split_up(orig);
CHECK(result == oput);
}
TEST_CASE("SplitUp: SimpleDifferentQuotes2", "[helpers]") {
std::vector<std::string> oput = {"one", "two three"};
std::vector<std::string> oput = {"one", "'two three'"};
std::string orig{R"(one 'two three')"};
std::vector<std::string> result = CLI::detail::split_up(orig);
CHECK(result == oput);
@ -1004,59 +1126,59 @@ TEST_CASE("SplitUp: SimpleDifferentQuotes2", "[helpers]") {
TEST_CASE("SplitUp: Bracket1", "[helpers]") {
std::vector<std::string> oput = {"one", "[two, three]"};
std::string orig{"one, [two, three]"};
std::vector<std::string> result = CLI::detail::split_up(orig, ',', false);
std::vector<std::string> result = CLI::detail::split_up(orig, ',');
CHECK(result == oput);
}
TEST_CASE("SplitUp: Bracket2", "[helpers]") {
std::vector<std::string> oput = {"one", "<two, three>"};
std::string orig{"one, <two, three>"};
std::vector<std::string> result = CLI::detail::split_up(orig, ',', false);
std::vector<std::string> result = CLI::detail::split_up(orig, ',');
CHECK(result == oput);
}
TEST_CASE("SplitUp: Bracket3", "[helpers]") {
std::vector<std::string> oput = {"one", "(two, three)"};
std::string orig{"one, (two, three)"};
std::vector<std::string> result = CLI::detail::split_up(orig, ',', false);
std::vector<std::string> result = CLI::detail::split_up(orig, ',');
CHECK(result == oput);
}
TEST_CASE("SplitUp: Bracket4", "[helpers]") {
std::vector<std::string> oput = {"one", "{two, three}"};
std::string orig{"one, {two, three}"};
std::vector<std::string> result = CLI::detail::split_up(orig, ',', false);
std::vector<std::string> result = CLI::detail::split_up(orig, ',');
CHECK(result == oput);
}
TEST_CASE("SplitUp: Comment", "[helpers]") {
std::vector<std::string> oput = {R"(["quote1", "#"])"};
std::string orig{R"(["quote1", "#"])"};
std::vector<std::string> result = CLI::detail::split_up(orig, '#', false);
std::vector<std::string> result = CLI::detail::split_up(orig, '#');
CHECK(result == oput);
}
TEST_CASE("SplitUp: Layered", "[helpers]") {
std::vector<std::string> output = {R"(one 'two three')"};
std::vector<std::string> output = {R"("one 'two three'")"};
std::string orig{R"("one 'two three'")"};
std::vector<std::string> result = CLI::detail::split_up(orig);
CHECK(result == output);
}
TEST_CASE("SplitUp: Spaces", "[helpers]") {
std::vector<std::string> oput = {"one", " two three"};
std::vector<std::string> oput = {"one", "\" two three\""};
std::string orig{R"( one " two three" )"};
std::vector<std::string> result = CLI::detail::split_up(orig);
CHECK(result == oput);
}
TEST_CASE("SplitUp: BadStrings", "[helpers]") {
std::vector<std::string> oput = {"one", " two three"};
std::vector<std::string> oput = {"one", "\" two three"};
std::string orig{R"( one " two three )"};
std::vector<std::string> result = CLI::detail::split_up(orig);
CHECK(result == oput);
oput = {"one", " two three"};
oput = {"one", "' two three"};
orig = R"( one ' two three )";
result = CLI::detail::split_up(orig);
CHECK(result == oput);

View File

@ -0,0 +1,2 @@
'''-$<24>
$

View File

@ -0,0 +1 @@
" (\\\,"<22>窿

View File

@ -0,0 +1 @@
'^^^^^^^\^^^^^^''''''@''i¦

View File

@ -0,0 +1 @@
"\ "

View File

@ -0,0 +1 @@
"Ü-t2ÿÿÿÿp'--vopt1'â''e#ÿÿ'â''e

View File

@ -0,0 +1 @@
ParseErrorEF'' --vo-d{}

View File

@ -0,0 +1 @@
[--'

View File

@ -0,0 +1 @@
"\<5C>"

View File

@ -0,0 +1 @@
""\"