mirror of
https://github.com/CLIUtils/CLI11.git
synced 2025-04-29 20:23:55 +00:00
This adds a round trip test for config file generation to the fuzzer. (the next step after this PR will be a fuzzer that verifies that the round trip actually matches the results. This change ended up requiring quite a few minor changes to fix the ambiguities between the config file generation and config file reader. 1). There was a number of potential conflicts between positional names and regular option names that could be triggered in config files, this required a number of additional checks on the positional naming to ensure no conflicts. 2). flag options with disable flag override can produce output results that are not valid by themselves, resolving this required flag input to be able to handle an array and output the original value set of results. 3). strings with non-printable characters could cause all sorts of chaos in the config files. This was resolved by generating a binary string conversion format and handling multiline comments and characters, and handling escaped characters. Note; I think a better solution is to move to fully supporting string formatting and escaping along with the binary strings from TOML now that TOML 1.0 is finalized. That will not be this PR though, maybe the next one. 4). Lot of ambiguities and edge cases in the string splitter, this was reworked 5). handling of comments was not done well, especially comment characters in the name of the option which is allowed. 6). non printable characters in the option naming. This would be weird in practice but it also cause some big holes in the config file generation, so the restricted character set for option naming was expanded. (don't allow spaces or control characters). --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
433 lines
14 KiB
C++
433 lines
14 KiB
C++
// Copyright (c) 2017-2023, University of Cincinnati, developed by Henry Schreiner
|
|
// under NSF AWARD 1414736 and by the respective contributors.
|
|
// All rights reserved.
|
|
//
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
#pragma once
|
|
|
|
// This include is only needed for IDEs to discover symbols
|
|
#include <CLI/StringTools.hpp>
|
|
|
|
// [CLI11:public_includes:set]
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
// [CLI11:public_includes:end]
|
|
|
|
namespace CLI {
|
|
// [CLI11:string_tools_inl_hpp:verbatim]
|
|
|
|
namespace detail {
|
|
CLI11_INLINE std::vector<std::string> split(const std::string &s, char delim) {
|
|
std::vector<std::string> elems;
|
|
// Check to see if empty string, give consistent result
|
|
if(s.empty()) {
|
|
elems.emplace_back();
|
|
} else {
|
|
std::stringstream ss;
|
|
ss.str(s);
|
|
std::string item;
|
|
while(std::getline(ss, item, delim)) {
|
|
elems.push_back(item);
|
|
}
|
|
}
|
|
return elems;
|
|
}
|
|
|
|
CLI11_INLINE std::string <rim(std::string &str) {
|
|
auto it = std::find_if(str.begin(), str.end(), [](char ch) { return !std::isspace<char>(ch, std::locale()); });
|
|
str.erase(str.begin(), it);
|
|
return str;
|
|
}
|
|
|
|
CLI11_INLINE std::string <rim(std::string &str, const std::string &filter) {
|
|
auto it = std::find_if(str.begin(), str.end(), [&filter](char ch) { return filter.find(ch) == std::string::npos; });
|
|
str.erase(str.begin(), it);
|
|
return str;
|
|
}
|
|
|
|
CLI11_INLINE std::string &rtrim(std::string &str) {
|
|
auto it = std::find_if(str.rbegin(), str.rend(), [](char ch) { return !std::isspace<char>(ch, std::locale()); });
|
|
str.erase(it.base(), str.end());
|
|
return str;
|
|
}
|
|
|
|
CLI11_INLINE std::string &rtrim(std::string &str, const std::string &filter) {
|
|
auto it =
|
|
std::find_if(str.rbegin(), str.rend(), [&filter](char ch) { return filter.find(ch) == std::string::npos; });
|
|
str.erase(it.base(), str.end());
|
|
return str;
|
|
}
|
|
|
|
CLI11_INLINE std::string &remove_quotes(std::string &str) {
|
|
if(str.length() > 1 && (str.front() == '"' || str.front() == '\'')) {
|
|
if(str.front() == str.back()) {
|
|
str.pop_back();
|
|
str.erase(str.begin(), str.begin() + 1);
|
|
}
|
|
}
|
|
return str;
|
|
}
|
|
|
|
CLI11_INLINE std::string fix_newlines(const std::string &leader, std::string input) {
|
|
std::string::size_type n = 0;
|
|
while(n != std::string::npos && n < input.size()) {
|
|
n = input.find('\n', n);
|
|
if(n != std::string::npos) {
|
|
input = input.substr(0, n + 1) + leader + input.substr(n + 1);
|
|
n += leader.size();
|
|
}
|
|
}
|
|
return input;
|
|
}
|
|
|
|
CLI11_INLINE std::ostream &
|
|
format_help(std::ostream &out, std::string name, const std::string &description, std::size_t wid) {
|
|
name = " " + name;
|
|
out << std::setw(static_cast<int>(wid)) << std::left << name;
|
|
if(!description.empty()) {
|
|
if(name.length() >= wid)
|
|
out << "\n" << std::setw(static_cast<int>(wid)) << "";
|
|
for(const char c : description) {
|
|
out.put(c);
|
|
if(c == '\n') {
|
|
out << std::setw(static_cast<int>(wid)) << "";
|
|
}
|
|
}
|
|
}
|
|
out << "\n";
|
|
return out;
|
|
}
|
|
|
|
CLI11_INLINE std::ostream &format_aliases(std::ostream &out, const std::vector<std::string> &aliases, std::size_t wid) {
|
|
if(!aliases.empty()) {
|
|
out << std::setw(static_cast<int>(wid)) << " aliases: ";
|
|
bool front = true;
|
|
for(const auto &alias : aliases) {
|
|
if(!front) {
|
|
out << ", ";
|
|
} else {
|
|
front = false;
|
|
}
|
|
out << detail::fix_newlines(" ", alias);
|
|
}
|
|
out << "\n";
|
|
}
|
|
return out;
|
|
}
|
|
|
|
CLI11_INLINE bool valid_name_string(const std::string &str) {
|
|
if(str.empty() || !valid_first_char(str[0])) {
|
|
return false;
|
|
}
|
|
auto e = str.end();
|
|
for(auto c = str.begin() + 1; c != e; ++c)
|
|
if(!valid_later_char(*c))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
CLI11_INLINE std::string find_and_replace(std::string str, std::string from, std::string to) {
|
|
|
|
std::size_t start_pos = 0;
|
|
|
|
while((start_pos = str.find(from, start_pos)) != std::string::npos) {
|
|
str.replace(start_pos, from.length(), to);
|
|
start_pos += to.length();
|
|
}
|
|
|
|
return str;
|
|
}
|
|
|
|
CLI11_INLINE void remove_default_flag_values(std::string &flags) {
|
|
auto loc = flags.find_first_of('{', 2);
|
|
while(loc != std::string::npos) {
|
|
auto finish = flags.find_first_of("},", loc + 1);
|
|
if((finish != std::string::npos) && (flags[finish] == '}')) {
|
|
flags.erase(flags.begin() + static_cast<std::ptrdiff_t>(loc),
|
|
flags.begin() + static_cast<std::ptrdiff_t>(finish) + 1);
|
|
}
|
|
loc = flags.find_first_of('{', loc + 1);
|
|
}
|
|
flags.erase(std::remove(flags.begin(), flags.end(), '!'), flags.end());
|
|
}
|
|
|
|
CLI11_INLINE std::ptrdiff_t
|
|
find_member(std::string name, const std::vector<std::string> names, bool ignore_case, bool ignore_underscore) {
|
|
auto it = std::end(names);
|
|
if(ignore_case) {
|
|
if(ignore_underscore) {
|
|
name = detail::to_lower(detail::remove_underscore(name));
|
|
it = std::find_if(std::begin(names), std::end(names), [&name](std::string local_name) {
|
|
return detail::to_lower(detail::remove_underscore(local_name)) == name;
|
|
});
|
|
} else {
|
|
name = detail::to_lower(name);
|
|
it = std::find_if(std::begin(names), std::end(names), [&name](std::string local_name) {
|
|
return detail::to_lower(local_name) == name;
|
|
});
|
|
}
|
|
|
|
} else if(ignore_underscore) {
|
|
name = detail::remove_underscore(name);
|
|
it = std::find_if(std::begin(names), std::end(names), [&name](std::string local_name) {
|
|
return detail::remove_underscore(local_name) == name;
|
|
});
|
|
} else {
|
|
it = std::find(std::begin(names), std::end(names), name);
|
|
}
|
|
|
|
return (it != std::end(names)) ? (it - std::begin(names)) : (-1);
|
|
}
|
|
|
|
static const std::string escapedChars("'\"`])>}\\");
|
|
static const std::string bracketChars{"'\"`[(<{"};
|
|
static const std::string matchBracketChars("'\"`])>}");
|
|
|
|
CLI11_INLINE bool has_escapable_character(const std::string &str) {
|
|
return (str.find_first_of(escapedChars) != std::string::npos);
|
|
}
|
|
|
|
CLI11_INLINE std::string add_escaped_characters(const std::string &str) {
|
|
std::string out;
|
|
out.reserve(str.size() + 4);
|
|
for(char s : str) {
|
|
if(escapedChars.find_first_of(s) != std::string::npos) {
|
|
out.push_back('\\');
|
|
}
|
|
out.push_back(s);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
CLI11_INLINE std::string remove_escaped_characters(const std::string &str) {
|
|
|
|
std::string out;
|
|
out.reserve(str.size());
|
|
for(auto loc = str.begin(); loc < str.end(); ++loc) {
|
|
if(*loc == '\\') {
|
|
if(escapedChars.find_first_of(*(loc + 1)) != std::string::npos) {
|
|
out.push_back(*(loc + 1));
|
|
++loc;
|
|
} else {
|
|
out.push_back(*loc);
|
|
}
|
|
} else {
|
|
out.push_back(*loc);
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
CLI11_INLINE std::pair<std::size_t, bool> close_sequence(const std::string &str, std::size_t start, char closure_char) {
|
|
std::string closures;
|
|
closures.push_back(closure_char);
|
|
auto loc = start + 1;
|
|
bool inQuote = closure_char == '"' || closure_char == '\'' || closure_char == '`';
|
|
bool hasControlSequence{false};
|
|
while(loc < str.size()) {
|
|
if(str[loc] == closures.back()) {
|
|
closures.pop_back();
|
|
if(closures.empty()) {
|
|
return {loc, hasControlSequence};
|
|
}
|
|
inQuote = false;
|
|
}
|
|
if(str[loc] == '\\') {
|
|
if(inQuote) {
|
|
hasControlSequence = true;
|
|
}
|
|
++loc;
|
|
}
|
|
if(!inQuote) {
|
|
auto bracket_loc = bracketChars.find(str[loc]);
|
|
if(bracket_loc != std::string::npos) {
|
|
closures.push_back(matchBracketChars[bracket_loc]);
|
|
inQuote = (bracket_loc <= 2);
|
|
}
|
|
}
|
|
++loc;
|
|
}
|
|
return {loc, hasControlSequence};
|
|
}
|
|
|
|
CLI11_INLINE std::vector<std::string> split_up(std::string str, char delimiter, bool removeQuotes) {
|
|
|
|
auto find_ws = [delimiter](char ch) {
|
|
return (delimiter == '\0') ? std::isspace<char>(ch, std::locale()) : (ch == delimiter);
|
|
};
|
|
trim(str);
|
|
|
|
std::vector<std::string> output;
|
|
bool embeddedQuote = false;
|
|
std::size_t adjust = removeQuotes ? 1 : 0;
|
|
while(!str.empty()) {
|
|
if(bracketChars.find_first_of(str[0]) != std::string::npos) {
|
|
auto bracketLoc = bracketChars.find_first_of(str[0]);
|
|
auto closure = close_sequence(str, 0, matchBracketChars[bracketLoc]);
|
|
auto end = closure.first;
|
|
output.push_back(str.substr(adjust, end + 1 - 2 * adjust));
|
|
if(end + 2 < str.size()) {
|
|
str = str.substr(end + 2);
|
|
} else {
|
|
str.clear();
|
|
}
|
|
embeddedQuote = embeddedQuote || closure.second;
|
|
} else {
|
|
auto it = std::find_if(std::begin(str), std::end(str), find_ws);
|
|
if(it != std::end(str)) {
|
|
std::string value = std::string(str.begin(), it);
|
|
output.push_back(value);
|
|
str = std::string(it + 1, str.end());
|
|
} else {
|
|
output.push_back(str);
|
|
str.clear();
|
|
}
|
|
}
|
|
// transform any embedded quotes into the regular character if the quotes are removed
|
|
if(embeddedQuote && removeQuotes) {
|
|
output.back() = remove_escaped_characters(output.back());
|
|
embeddedQuote = false;
|
|
}
|
|
trim(str);
|
|
}
|
|
return output;
|
|
}
|
|
|
|
CLI11_INLINE std::size_t escape_detect(std::string &str, std::size_t offset) {
|
|
auto next = str[offset + 1];
|
|
if((next == '\"') || (next == '\'') || (next == '`')) {
|
|
auto astart = str.find_last_of("-/ \"\'`", offset - 1);
|
|
if(astart != std::string::npos) {
|
|
if(str[astart] == ((str[offset] == '=') ? '-' : '/'))
|
|
str[offset] = ' '; // interpret this as a space so the split_up works properly
|
|
}
|
|
}
|
|
return offset + 1;
|
|
}
|
|
|
|
CLI11_INLINE std::string binary_escape_string(const std::string &string_to_escape) {
|
|
// s is our escaped output string
|
|
std::string escaped_string{};
|
|
// loop through all characters
|
|
for(char c : string_to_escape) {
|
|
// check if a given character is printable
|
|
// the cast is necessary to avoid undefined behaviour
|
|
if(isprint(static_cast<unsigned char>(c)) == 0) {
|
|
std::stringstream stream;
|
|
// if the character is not printable
|
|
// we'll convert it to a hex string using a stringstream
|
|
// note that since char is signed we have to cast it to unsigned first
|
|
stream << std::hex << static_cast<unsigned int>(static_cast<unsigned char>(c));
|
|
std::string code = stream.str();
|
|
escaped_string += std::string("\\x") + (code.size() < 2 ? "0" : "") + code;
|
|
|
|
} else {
|
|
escaped_string.push_back(c);
|
|
}
|
|
}
|
|
if(escaped_string != string_to_escape) {
|
|
auto sqLoc = escaped_string.find('\'');
|
|
while(sqLoc != std::string::npos) {
|
|
escaped_string.replace(sqLoc, sqLoc + 1, "\\x27");
|
|
sqLoc = escaped_string.find('\'');
|
|
}
|
|
escaped_string.insert(0, "'B\"(");
|
|
escaped_string.push_back(')');
|
|
escaped_string.push_back('"');
|
|
escaped_string.push_back('\'');
|
|
}
|
|
return escaped_string;
|
|
}
|
|
|
|
CLI11_INLINE bool is_binary_escaped_string(const std::string &escaped_string) {
|
|
size_t ssize = escaped_string.size();
|
|
if(escaped_string.compare(0, 3, "B\"(") == 0 && escaped_string.compare(ssize - 2, 2, ")\"") == 0) {
|
|
return true;
|
|
}
|
|
return (escaped_string.compare(0, 4, "'B\"(") == 0 && escaped_string.compare(ssize - 3, 3, ")\"'") == 0);
|
|
}
|
|
|
|
CLI11_INLINE std::string extract_binary_string(const std::string &escaped_string) {
|
|
std::size_t start{0};
|
|
std::size_t tail{0};
|
|
size_t ssize = escaped_string.size();
|
|
if(escaped_string.compare(0, 3, "B\"(") == 0 && escaped_string.compare(ssize - 2, 2, ")\"") == 0) {
|
|
start = 3;
|
|
tail = 2;
|
|
} else if(escaped_string.compare(0, 4, "'B\"(") == 0 && escaped_string.compare(ssize - 3, 3, ")\"'") == 0) {
|
|
start = 4;
|
|
tail = 3;
|
|
}
|
|
|
|
if(start == 0) {
|
|
return escaped_string;
|
|
}
|
|
std::string outstring;
|
|
|
|
outstring.reserve(ssize - start - tail);
|
|
std::size_t loc = start;
|
|
while(loc < ssize - tail) {
|
|
// ssize-2 to skip )" at the end
|
|
if(escaped_string[loc] == '\\' && (escaped_string[loc + 1] == 'x' || escaped_string[loc + 1] == 'X')) {
|
|
auto c1 = escaped_string[loc + 2];
|
|
auto c2 = escaped_string[loc + 3];
|
|
int res{0};
|
|
bool invalid{false};
|
|
if(c1 >= '0' && c1 <= '9') {
|
|
res = (c1 - '0') * 16;
|
|
} else if(c1 >= 'A' && c1 <= 'F') {
|
|
res = (c1 - 'A' + 10) * 16;
|
|
} else if(c1 >= 'a' && c1 <= 'f') {
|
|
res = (c1 - 'a' + 10) * 16;
|
|
} else {
|
|
invalid = true;
|
|
}
|
|
|
|
if(c2 >= '0' && c2 <= '9') {
|
|
res += (c2 - '0');
|
|
} else if(c2 >= 'A' && c2 <= 'F') {
|
|
res += (c2 - 'A' + 10);
|
|
} else if(c2 >= 'a' && c2 <= 'f') {
|
|
res += (c2 - 'a' + 10);
|
|
} else {
|
|
invalid = true;
|
|
}
|
|
if(!invalid) {
|
|
loc += 4;
|
|
outstring.push_back(static_cast<char>(res));
|
|
continue;
|
|
}
|
|
}
|
|
outstring.push_back(escaped_string[loc]);
|
|
++loc;
|
|
}
|
|
return outstring;
|
|
}
|
|
|
|
std::string get_environment_value(const std::string &env_name) {
|
|
char *buffer = nullptr;
|
|
std::string ename_string;
|
|
|
|
#ifdef _MSC_VER
|
|
// Windows version
|
|
std::size_t sz = 0;
|
|
if(_dupenv_s(&buffer, &sz, env_name.c_str()) == 0 && buffer != nullptr) {
|
|
ename_string = std::string(buffer);
|
|
free(buffer);
|
|
}
|
|
#else
|
|
// This also works on Windows, but gives a warning
|
|
buffer = std::getenv(env_name.c_str());
|
|
if(buffer != nullptr) {
|
|
ename_string = std::string(buffer);
|
|
}
|
|
#endif
|
|
return ename_string;
|
|
}
|
|
|
|
} // namespace detail
|
|
// [CLI11:string_tools_inl_hpp:end]
|
|
} // namespace CLI
|