mirror of
https://github.com/boostorg/inspect.git
synced 2025-05-08 02:13:57 +00:00
Decode percent coded characters and '&' when inspecting urls.
[SVN r43752]
This commit is contained in:
parent
79843906d0
commit
e1b48e60f5
@ -9,6 +9,7 @@
|
|||||||
#include "link_check.hpp"
|
#include "link_check.hpp"
|
||||||
#include "boost/regex.hpp"
|
#include "boost/regex.hpp"
|
||||||
#include "boost/filesystem/operations.hpp"
|
#include "boost/filesystem/operations.hpp"
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
namespace fs = boost::filesystem;
|
namespace fs = boost::filesystem;
|
||||||
|
|
||||||
@ -19,6 +20,44 @@ namespace
|
|||||||
"\\s*=\\s*(['\"])(.*?)\\1",
|
"\\s*=\\s*(['\"])(.*?)\\1",
|
||||||
boost::regbase::normal | boost::regbase::icase);
|
boost::regbase::normal | boost::regbase::icase);
|
||||||
|
|
||||||
|
// Decode percent encoded characters and html escapsed ampersands,
|
||||||
|
// returns an empty string if there's an error.
|
||||||
|
// The urls should really be fully HTML decoded at the beginning.
|
||||||
|
std::string decode_url(std::string const& path) {
|
||||||
|
std::string::size_type pos = 0, next;
|
||||||
|
std::string result;
|
||||||
|
result.reserve(path.length());
|
||||||
|
|
||||||
|
while((next = path.find_first_of("&%", pos)) != std::string::npos) {
|
||||||
|
result.append(path, pos, next - pos);
|
||||||
|
pos = next;
|
||||||
|
switch(path[pos]) {
|
||||||
|
case '%': {
|
||||||
|
if(path.length() - next < 3) return "";
|
||||||
|
char hex[3] = { path[next + 1], path[next + 2], '\0' };
|
||||||
|
char* end_ptr;
|
||||||
|
result += (char) std::strtol(hex, &end_ptr, 16);
|
||||||
|
if(*end_ptr) return "";
|
||||||
|
pos = next + 3;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case '&': {
|
||||||
|
if(path.substr(pos, 5) == "&") {
|
||||||
|
result += '&'; pos += 5;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result += '&'; pos += 1;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result.append(path, pos, path.length());
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
} // unnamed namespace
|
} // unnamed namespace
|
||||||
|
|
||||||
namespace boost
|
namespace boost
|
||||||
@ -121,13 +160,22 @@ namespace boost
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string decoded_url = decode_url(plain_url);
|
||||||
|
if(decoded_url.empty()) {
|
||||||
|
if(!no_link_errors) {
|
||||||
|
++m_invalid_errors;
|
||||||
|
error( library_name, source_path, string(name()) + " invalid URL: " + url );
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// strip url of references to current dir
|
// strip url of references to current dir
|
||||||
if ( plain_url[0]=='.' && plain_url[1]=='/' ) plain_url.erase( 0, 2 );
|
if ( decoded_url[0]=='.' && decoded_url[1]=='/' ) decoded_url.erase( 0, 2 );
|
||||||
|
|
||||||
// url is relative source_path.branch()
|
// url is relative source_path.branch()
|
||||||
// convert to target_path, which is_complete()
|
// convert to target_path, which is_complete()
|
||||||
path target_path;
|
path target_path;
|
||||||
try { target_path = source_path.branch_path() /= path( plain_url, fs::no_check ); }
|
try { target_path = source_path.branch_path() /= path( decoded_url, fs::no_check ); }
|
||||||
catch ( const fs::filesystem_error & )
|
catch ( const fs::filesystem_error & )
|
||||||
{
|
{
|
||||||
if(!no_link_errors) {
|
if(!no_link_errors) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user