1
0
mirror of https://github.com/CLIUtils/CLI11.git synced 2025-04-29 20:23:55 +00:00
CLI11/include/CLI/impl/StringTools_inl.hpp
Philip Top 0f5bf21e91
add some reduction methods to the options on the fuzz tests (#930)
This adds a round trip test for config file generation to the fuzzer. 

(the next step after this PR will be a fuzzer that verifies that the
round trip actually matches the results.
This change ended up requiring quite a few minor changes to fix the
ambiguities between the config file generation and config file reader.

1). There was a number of potential conflicts between positional names
and regular option names that could be triggered in config files, this
required a number of additional checks on the positional naming to
ensure no conflicts.
2). flag options with disable flag override can produce output results
that are not valid by themselves, resolving this required flag input to
be able to handle an array and output the original value set of results.
3). strings with non-printable characters could cause all sorts of chaos
in the config files. This was resolved by generating a binary string
conversion format and handling multiline comments and characters, and
handling escaped characters. Note; I think a better solution is to move
to fully supporting string formatting and escaping along with the binary
strings from TOML now that TOML 1.0 is finalized. That will not be this
PR though, maybe the next one.
4). Lot of ambiguities and edge cases in the string splitter, this was
reworked
5). handling of comments was not done well, especially comment characters in the
name of the option which is allowed.
6). non printable characters in the option naming. This would be weird
in practice but it also cause some big holes in the config file
generation, so the restricted character set for option naming was
expanded. (don't allow spaces or control characters).

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2023-12-18 05:21:32 -08:00

433 lines
14 KiB
C++

// Copyright (c) 2017-2023, University of Cincinnati, developed by Henry Schreiner
// under NSF AWARD 1414736 and by the respective contributors.
// All rights reserved.
//
// SPDX-License-Identifier: BSD-3-Clause
#pragma once
// This include is only needed for IDEs to discover symbols
#include <CLI/StringTools.hpp>
// [CLI11:public_includes:set]
#include <string>
#include <utility>
#include <vector>
// [CLI11:public_includes:end]
namespace CLI {
// [CLI11:string_tools_inl_hpp:verbatim]
namespace detail {
CLI11_INLINE std::vector<std::string> split(const std::string &s, char delim) {
std::vector<std::string> elems;
// Check to see if empty string, give consistent result
if(s.empty()) {
elems.emplace_back();
} else {
std::stringstream ss;
ss.str(s);
std::string item;
while(std::getline(ss, item, delim)) {
elems.push_back(item);
}
}
return elems;
}
CLI11_INLINE std::string &ltrim(std::string &str) {
auto it = std::find_if(str.begin(), str.end(), [](char ch) { return !std::isspace<char>(ch, std::locale()); });
str.erase(str.begin(), it);
return str;
}
CLI11_INLINE std::string &ltrim(std::string &str, const std::string &filter) {
auto it = std::find_if(str.begin(), str.end(), [&filter](char ch) { return filter.find(ch) == std::string::npos; });
str.erase(str.begin(), it);
return str;
}
CLI11_INLINE std::string &rtrim(std::string &str) {
auto it = std::find_if(str.rbegin(), str.rend(), [](char ch) { return !std::isspace<char>(ch, std::locale()); });
str.erase(it.base(), str.end());
return str;
}
CLI11_INLINE std::string &rtrim(std::string &str, const std::string &filter) {
auto it =
std::find_if(str.rbegin(), str.rend(), [&filter](char ch) { return filter.find(ch) == std::string::npos; });
str.erase(it.base(), str.end());
return str;
}
CLI11_INLINE std::string &remove_quotes(std::string &str) {
if(str.length() > 1 && (str.front() == '"' || str.front() == '\'')) {
if(str.front() == str.back()) {
str.pop_back();
str.erase(str.begin(), str.begin() + 1);
}
}
return str;
}
CLI11_INLINE std::string fix_newlines(const std::string &leader, std::string input) {
std::string::size_type n = 0;
while(n != std::string::npos && n < input.size()) {
n = input.find('\n', n);
if(n != std::string::npos) {
input = input.substr(0, n + 1) + leader + input.substr(n + 1);
n += leader.size();
}
}
return input;
}
CLI11_INLINE std::ostream &
format_help(std::ostream &out, std::string name, const std::string &description, std::size_t wid) {
name = " " + name;
out << std::setw(static_cast<int>(wid)) << std::left << name;
if(!description.empty()) {
if(name.length() >= wid)
out << "\n" << std::setw(static_cast<int>(wid)) << "";
for(const char c : description) {
out.put(c);
if(c == '\n') {
out << std::setw(static_cast<int>(wid)) << "";
}
}
}
out << "\n";
return out;
}
CLI11_INLINE std::ostream &format_aliases(std::ostream &out, const std::vector<std::string> &aliases, std::size_t wid) {
if(!aliases.empty()) {
out << std::setw(static_cast<int>(wid)) << " aliases: ";
bool front = true;
for(const auto &alias : aliases) {
if(!front) {
out << ", ";
} else {
front = false;
}
out << detail::fix_newlines(" ", alias);
}
out << "\n";
}
return out;
}
CLI11_INLINE bool valid_name_string(const std::string &str) {
if(str.empty() || !valid_first_char(str[0])) {
return false;
}
auto e = str.end();
for(auto c = str.begin() + 1; c != e; ++c)
if(!valid_later_char(*c))
return false;
return true;
}
CLI11_INLINE std::string find_and_replace(std::string str, std::string from, std::string to) {
std::size_t start_pos = 0;
while((start_pos = str.find(from, start_pos)) != std::string::npos) {
str.replace(start_pos, from.length(), to);
start_pos += to.length();
}
return str;
}
CLI11_INLINE void remove_default_flag_values(std::string &flags) {
auto loc = flags.find_first_of('{', 2);
while(loc != std::string::npos) {
auto finish = flags.find_first_of("},", loc + 1);
if((finish != std::string::npos) && (flags[finish] == '}')) {
flags.erase(flags.begin() + static_cast<std::ptrdiff_t>(loc),
flags.begin() + static_cast<std::ptrdiff_t>(finish) + 1);
}
loc = flags.find_first_of('{', loc + 1);
}
flags.erase(std::remove(flags.begin(), flags.end(), '!'), flags.end());
}
CLI11_INLINE std::ptrdiff_t
find_member(std::string name, const std::vector<std::string> names, bool ignore_case, bool ignore_underscore) {
auto it = std::end(names);
if(ignore_case) {
if(ignore_underscore) {
name = detail::to_lower(detail::remove_underscore(name));
it = std::find_if(std::begin(names), std::end(names), [&name](std::string local_name) {
return detail::to_lower(detail::remove_underscore(local_name)) == name;
});
} else {
name = detail::to_lower(name);
it = std::find_if(std::begin(names), std::end(names), [&name](std::string local_name) {
return detail::to_lower(local_name) == name;
});
}
} else if(ignore_underscore) {
name = detail::remove_underscore(name);
it = std::find_if(std::begin(names), std::end(names), [&name](std::string local_name) {
return detail::remove_underscore(local_name) == name;
});
} else {
it = std::find(std::begin(names), std::end(names), name);
}
return (it != std::end(names)) ? (it - std::begin(names)) : (-1);
}
static const std::string escapedChars("'\"`])>}\\");
static const std::string bracketChars{"'\"`[(<{"};
static const std::string matchBracketChars("'\"`])>}");
CLI11_INLINE bool has_escapable_character(const std::string &str) {
return (str.find_first_of(escapedChars) != std::string::npos);
}
CLI11_INLINE std::string add_escaped_characters(const std::string &str) {
std::string out;
out.reserve(str.size() + 4);
for(char s : str) {
if(escapedChars.find_first_of(s) != std::string::npos) {
out.push_back('\\');
}
out.push_back(s);
}
return out;
}
CLI11_INLINE std::string remove_escaped_characters(const std::string &str) {
std::string out;
out.reserve(str.size());
for(auto loc = str.begin(); loc < str.end(); ++loc) {
if(*loc == '\\') {
if(escapedChars.find_first_of(*(loc + 1)) != std::string::npos) {
out.push_back(*(loc + 1));
++loc;
} else {
out.push_back(*loc);
}
} else {
out.push_back(*loc);
}
}
return out;
}
CLI11_INLINE std::pair<std::size_t, bool> close_sequence(const std::string &str, std::size_t start, char closure_char) {
std::string closures;
closures.push_back(closure_char);
auto loc = start + 1;
bool inQuote = closure_char == '"' || closure_char == '\'' || closure_char == '`';
bool hasControlSequence{false};
while(loc < str.size()) {
if(str[loc] == closures.back()) {
closures.pop_back();
if(closures.empty()) {
return {loc, hasControlSequence};
}
inQuote = false;
}
if(str[loc] == '\\') {
if(inQuote) {
hasControlSequence = true;
}
++loc;
}
if(!inQuote) {
auto bracket_loc = bracketChars.find(str[loc]);
if(bracket_loc != std::string::npos) {
closures.push_back(matchBracketChars[bracket_loc]);
inQuote = (bracket_loc <= 2);
}
}
++loc;
}
return {loc, hasControlSequence};
}
CLI11_INLINE std::vector<std::string> split_up(std::string str, char delimiter, bool removeQuotes) {
auto find_ws = [delimiter](char ch) {
return (delimiter == '\0') ? std::isspace<char>(ch, std::locale()) : (ch == delimiter);
};
trim(str);
std::vector<std::string> output;
bool embeddedQuote = false;
std::size_t adjust = removeQuotes ? 1 : 0;
while(!str.empty()) {
if(bracketChars.find_first_of(str[0]) != std::string::npos) {
auto bracketLoc = bracketChars.find_first_of(str[0]);
auto closure = close_sequence(str, 0, matchBracketChars[bracketLoc]);
auto end = closure.first;
output.push_back(str.substr(adjust, end + 1 - 2 * adjust));
if(end + 2 < str.size()) {
str = str.substr(end + 2);
} else {
str.clear();
}
embeddedQuote = embeddedQuote || closure.second;
} else {
auto it = std::find_if(std::begin(str), std::end(str), find_ws);
if(it != std::end(str)) {
std::string value = std::string(str.begin(), it);
output.push_back(value);
str = std::string(it + 1, str.end());
} else {
output.push_back(str);
str.clear();
}
}
// transform any embedded quotes into the regular character if the quotes are removed
if(embeddedQuote && removeQuotes) {
output.back() = remove_escaped_characters(output.back());
embeddedQuote = false;
}
trim(str);
}
return output;
}
CLI11_INLINE std::size_t escape_detect(std::string &str, std::size_t offset) {
auto next = str[offset + 1];
if((next == '\"') || (next == '\'') || (next == '`')) {
auto astart = str.find_last_of("-/ \"\'`", offset - 1);
if(astart != std::string::npos) {
if(str[astart] == ((str[offset] == '=') ? '-' : '/'))
str[offset] = ' '; // interpret this as a space so the split_up works properly
}
}
return offset + 1;
}
CLI11_INLINE std::string binary_escape_string(const std::string &string_to_escape) {
// s is our escaped output string
std::string escaped_string{};
// loop through all characters
for(char c : string_to_escape) {
// check if a given character is printable
// the cast is necessary to avoid undefined behaviour
if(isprint(static_cast<unsigned char>(c)) == 0) {
std::stringstream stream;
// if the character is not printable
// we'll convert it to a hex string using a stringstream
// note that since char is signed we have to cast it to unsigned first
stream << std::hex << static_cast<unsigned int>(static_cast<unsigned char>(c));
std::string code = stream.str();
escaped_string += std::string("\\x") + (code.size() < 2 ? "0" : "") + code;
} else {
escaped_string.push_back(c);
}
}
if(escaped_string != string_to_escape) {
auto sqLoc = escaped_string.find('\'');
while(sqLoc != std::string::npos) {
escaped_string.replace(sqLoc, sqLoc + 1, "\\x27");
sqLoc = escaped_string.find('\'');
}
escaped_string.insert(0, "'B\"(");
escaped_string.push_back(')');
escaped_string.push_back('"');
escaped_string.push_back('\'');
}
return escaped_string;
}
CLI11_INLINE bool is_binary_escaped_string(const std::string &escaped_string) {
size_t ssize = escaped_string.size();
if(escaped_string.compare(0, 3, "B\"(") == 0 && escaped_string.compare(ssize - 2, 2, ")\"") == 0) {
return true;
}
return (escaped_string.compare(0, 4, "'B\"(") == 0 && escaped_string.compare(ssize - 3, 3, ")\"'") == 0);
}
CLI11_INLINE std::string extract_binary_string(const std::string &escaped_string) {
std::size_t start{0};
std::size_t tail{0};
size_t ssize = escaped_string.size();
if(escaped_string.compare(0, 3, "B\"(") == 0 && escaped_string.compare(ssize - 2, 2, ")\"") == 0) {
start = 3;
tail = 2;
} else if(escaped_string.compare(0, 4, "'B\"(") == 0 && escaped_string.compare(ssize - 3, 3, ")\"'") == 0) {
start = 4;
tail = 3;
}
if(start == 0) {
return escaped_string;
}
std::string outstring;
outstring.reserve(ssize - start - tail);
std::size_t loc = start;
while(loc < ssize - tail) {
// ssize-2 to skip )" at the end
if(escaped_string[loc] == '\\' && (escaped_string[loc + 1] == 'x' || escaped_string[loc + 1] == 'X')) {
auto c1 = escaped_string[loc + 2];
auto c2 = escaped_string[loc + 3];
int res{0};
bool invalid{false};
if(c1 >= '0' && c1 <= '9') {
res = (c1 - '0') * 16;
} else if(c1 >= 'A' && c1 <= 'F') {
res = (c1 - 'A' + 10) * 16;
} else if(c1 >= 'a' && c1 <= 'f') {
res = (c1 - 'a' + 10) * 16;
} else {
invalid = true;
}
if(c2 >= '0' && c2 <= '9') {
res += (c2 - '0');
} else if(c2 >= 'A' && c2 <= 'F') {
res += (c2 - 'A' + 10);
} else if(c2 >= 'a' && c2 <= 'f') {
res += (c2 - 'a' + 10);
} else {
invalid = true;
}
if(!invalid) {
loc += 4;
outstring.push_back(static_cast<char>(res));
continue;
}
}
outstring.push_back(escaped_string[loc]);
++loc;
}
return outstring;
}
std::string get_environment_value(const std::string &env_name) {
char *buffer = nullptr;
std::string ename_string;
#ifdef _MSC_VER
// Windows version
std::size_t sz = 0;
if(_dupenv_s(&buffer, &sz, env_name.c_str()) == 0 && buffer != nullptr) {
ename_string = std::string(buffer);
free(buffer);
}
#else
// This also works on Windows, but gives a warning
buffer = std::getenv(env_name.c_str());
if(buffer != nullptr) {
ename_string = std::string(buffer);
}
#endif
return ename_string;
}
} // namespace detail
// [CLI11:string_tools_inl_hpp:end]
} // namespace CLI