Replace url parser with one less strict

This commit is contained in:
Nimetu 2015-04-19 22:12:05 +03:00
parent d7da85970d
commit f8813e4d2e
3 changed files with 290 additions and 12 deletions

View file

@ -0,0 +1,63 @@
// Ryzom - MMORPG Framework <http://dev.ryzom.com/projects/ryzom/>
// Copyright (C) 2010 Winch Gate Property Limited
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef CL_URL_PARSER_H
#define CL_URL_PARSER_H
#include <string>
namespace NLGUI
{
/**
* Simple URL parser
* \author Meelis Mägi
* \date 2015
*/
class CUrlParser
{
public:
CUrlParser(){}
// parse uri to components
CUrlParser(const std::string &url);
// parse uri to components
void parse(std::string uri);
// serialize URL back to string
std::string toString() const;
// inherit scheme, domain, path from given url
void inherit(const std::string &url);
// if current parts can compose absolute url or not
bool isAbsolute() const;
// resolve relative path like './a/../b' to absolute path '/a/b'
static void resolveRelativePath(std::string &path);
public:
std::string scheme;
std::string domain;
std::string path;
std::string query;
std::string hash;
};
}// namespace
#endif // CL_URL_PARSER_H

View file

@ -43,7 +43,7 @@
#include "nel/misc/md5.h"
#include "nel/3d/texture_file.h"
#include "nel/misc/big_file.h"
#include <libxml/uri.h>
#include "nel/gui/url_parser.h"
using namespace std;
using namespace NLMISC;
@ -4777,20 +4777,13 @@ namespace NLGUI
// ***************************************************************************
std::string CGroupHTML::getAbsoluteUrl(const std::string &url)
{
if (_URL.size() == 0 || url.find("http://") != std::string::npos || url.find("https://") != std::string::npos)
CUrlParser uri(url);
if (uri.isAbsolute())
return url;
xmlChar * uri;
uri = xmlBuildURI(reinterpret_cast<const xmlChar *>(url.c_str()), reinterpret_cast<const xmlChar *>(_URL.c_str()));
if (uri)
{
std::string ret(reinterpret_cast<char *>(uri));
xmlFree(uri);
uri.inherit(_URL);
return ret;
}
return url;
return uri.toString();
}
// ***************************************************************************

View file

@ -0,0 +1,222 @@
// Ryzom - MMORPG Framework <http://dev.ryzom.com/projects/ryzom/>
// Copyright (C) 2010 Winch Gate Property Limited
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include <string>
#include "nel/misc/types_nl.h"
#include "nel/gui/url_parser.h"
using namespace std;
namespace NLGUI
{
// ***************************************************************************
CUrlParser::CUrlParser(const std::string &uri)
{
parse(uri);
}
// ***************************************************************************
void CUrlParser::parse(std::string uri)
{
const size_t npos = std::string::npos;
size_t pos;
size_t offset = 0;
// strip fragment if present
pos = uri.find("#");
if (pos != npos)
{
hash = uri.substr(pos + 1);
uri = uri.substr(0, pos);
}
// scan for scheme
pos = uri.find(":");
if (pos != npos && pos >= 1)
{
for (uint i=0; i<pos; i++)
{
if (!isalnum(uri[i]))
{
pos = npos;
break;
}
}
if (pos != npos)
{
scheme = uri.substr(0, pos);
uri = uri.substr(pos + 1);
}
}
// scan for domain
if (uri.substr(0, 2) == "//")
{
pos = uri.find("/", 3);
domain = uri.substr(0, pos);
if (pos != npos)
uri = uri.substr(pos);
else
uri.clear();
}
// scan for query
pos = uri.find("?");
if (pos != npos)
{
query = uri.substr(pos + 1);
uri = uri.substr(0, pos);
}
// all that is remaining is path
path = uri;
}
void CUrlParser::inherit(const std::string &url)
{
// we have scheme, so we already absolute url
if (!scheme.empty())
return;
const size_t npos = std::string::npos;
size_t pos;
CUrlParser base(url);
scheme = base.scheme;
// if we already have domain, then ignore base path
if (!domain.empty())
return;
domain = base.domain;
if (path.empty())
{
path = base.path;
if (query.empty())
query = base.query;
}
else
if (path[0] != '/')
{
// find start of last path segment from base path
// if not found, then dont inherit base path at all
pos = base.path.find_last_of("/");
if (pos != npos)
path = base.path.substr(0, pos) + "/" + path;
}
resolveRelativePath(path);
}
void CUrlParser::resolveRelativePath(std::string &path)
{
const size_t npos = std::string::npos;
// no relative components in path. filename.ext is also matched, but that's fine
size_t pos = path.find(".");
if (pos == npos)
return;
// normalize path
size_t lhp = 0;
while(pos < path.size())
{
if (path[pos] == '.')
{
// scan ahead to see what we have
std::string sub = path.substr(pos, 2);
if (sub == "./" || sub == ".")
{
// starts with
if (pos == 0)
path.replace(pos, sub.size(), "/");
else
{
// full or last segment
sub = path.substr(pos-1, 3);
if (sub == "/./" || sub == "/.")
{
path.replace(pos, sub.size()-1, "");
// we just removed char that pos was pointing, so rewind
pos--;
}
}
}
else
if (sub == "..")
{
// starts with
if (pos == 0 && path.substr(pos, 3) == "../")
path.replace(pos, 3, "/");
else
if (pos > 0)
{
// full or last segment
sub = path.substr(pos-1, 4);
if (sub == "/../" || sub == "/..")
{
if (pos > 1)
lhp = path.find_last_of("/", pos - 2);
else
lhp = 0;
// pos points to first dot in ..
// lhp points to start slash (/) of last segment
pos += sub.size() - 1;
path.replace(lhp, pos - lhp, "/");
pos = lhp;
}
}
}// sub == ".."
} // path[pos] == '.'
pos++;
}// while
}
bool CUrlParser::isAbsolute() const
{
return !scheme.empty() && !domain.empty();
}
// serialize URL back to string
std::string CUrlParser::toString() const
{
std::string result;
if (!scheme.empty())
result += scheme + ":";
if (!domain.empty())
{
result += domain;
}
// path already has leading slash
if (!path.empty())
result += path;
if (!query.empty())
result += "?" + query;
if (!hash.empty())
result += "#" + hash;
return result;
}
}// namespace