Merged in nimetu/ryzomcore/libxml2-html-parser (pull request #127)
implement html parser using libxml2
This commit is contained in:
commit
8c006a46d4
4 changed files with 283 additions and 26 deletions
|
@ -102,6 +102,9 @@ namespace NLGUI
|
||||||
// Browse
|
// Browse
|
||||||
virtual void browse (const char *url);
|
virtual void browse (const char *url);
|
||||||
|
|
||||||
|
// parse html string using libxml2 parser
|
||||||
|
virtual bool parseHtml(std::string htmlString);
|
||||||
|
|
||||||
// Refresh
|
// Refresh
|
||||||
void refresh();
|
void refresh();
|
||||||
|
|
||||||
|
@ -199,6 +202,7 @@ namespace NLGUI
|
||||||
int luaBeginElement(CLuaState &ls);
|
int luaBeginElement(CLuaState &ls);
|
||||||
int luaEndElement(CLuaState &ls);
|
int luaEndElement(CLuaState &ls);
|
||||||
int luaShowDiv(CLuaState &ls);
|
int luaShowDiv(CLuaState &ls);
|
||||||
|
int luaParseHtml(CLuaState &ls);
|
||||||
|
|
||||||
REFLECT_EXPORT_START(CGroupHTML, CGroupScrollText)
|
REFLECT_EXPORT_START(CGroupHTML, CGroupScrollText)
|
||||||
REFLECT_LUA_METHOD("browse", luaBrowse)
|
REFLECT_LUA_METHOD("browse", luaBrowse)
|
||||||
|
@ -210,6 +214,7 @@ namespace NLGUI
|
||||||
REFLECT_LUA_METHOD("beginElement", luaBeginElement)
|
REFLECT_LUA_METHOD("beginElement", luaBeginElement)
|
||||||
REFLECT_LUA_METHOD("endElement", luaEndElement)
|
REFLECT_LUA_METHOD("endElement", luaEndElement)
|
||||||
REFLECT_LUA_METHOD("showDiv", luaShowDiv)
|
REFLECT_LUA_METHOD("showDiv", luaShowDiv)
|
||||||
|
REFLECT_LUA_METHOD("parseHtml", luaParseHtml)
|
||||||
REFLECT_STRING("url", getURL, setURL)
|
REFLECT_STRING("url", getURL, setURL)
|
||||||
REFLECT_FLOAT("timeout", getTimeout, setTimeout)
|
REFLECT_FLOAT("timeout", getTimeout, setTimeout)
|
||||||
REFLECT_EXPORT_END
|
REFLECT_EXPORT_END
|
||||||
|
@ -228,7 +233,7 @@ namespace NLGUI
|
||||||
virtual void addText (const char * buf, int len);
|
virtual void addText (const char * buf, int len);
|
||||||
|
|
||||||
// A link has been parsed
|
// A link has been parsed
|
||||||
virtual void addLink (uint element_number, uint attribute_number, HTChildAnchor *anchor, const BOOL *present, const char **value);
|
virtual void addLink (uint element_number, const BOOL *present, const char **value);
|
||||||
|
|
||||||
// A new begin HTML element has been parsed (<IMG> for exemple)
|
// A new begin HTML element has been parsed (<IMG> for exemple)
|
||||||
virtual void beginElement (uint element_number, const BOOL *present, const char **value);
|
virtual void beginElement (uint element_number, const BOOL *present, const char **value);
|
||||||
|
@ -251,6 +256,10 @@ namespace NLGUI
|
||||||
// the current request is terminated
|
// the current request is terminated
|
||||||
virtual void requestTerminated(HTRequest *request);
|
virtual void requestTerminated(HTRequest *request);
|
||||||
|
|
||||||
|
// libxml2 html parser functions
|
||||||
|
void htmlElement(xmlNode *node, int element_number);
|
||||||
|
void htmlWalkDOM(xmlNode *a_node);
|
||||||
|
|
||||||
// Get Home URL
|
// Get Home URL
|
||||||
virtual std::string home();
|
virtual std::string home();
|
||||||
|
|
||||||
|
@ -668,6 +677,12 @@ namespace NLGUI
|
||||||
// read style attribute
|
// read style attribute
|
||||||
void getStyleParams(const std::string &styleString, CStyleParams &style, bool inherit = true);
|
void getStyleParams(const std::string &styleString, CStyleParams &style, bool inherit = true);
|
||||||
|
|
||||||
|
// load and render local html file (from bnp for example)
|
||||||
|
void doBrowseLocalFile(const std::string &filename);
|
||||||
|
|
||||||
|
// render html string as new browser page
|
||||||
|
bool renderHtmlString(const std::string &html);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// decode all HTML entities
|
// decode all HTML entities
|
||||||
static ucstring decodeHTMLEntities(const ucstring &str);
|
static ucstring decodeHTMLEntities(const ucstring &str);
|
||||||
|
@ -699,6 +714,7 @@ namespace NLGUI
|
||||||
void checkImageDownload();
|
void checkImageDownload();
|
||||||
void addImageDownload(const std::string &url, CViewBase *img);
|
void addImageDownload(const std::string &url, CViewBase *img);
|
||||||
std::string localImageName(const std::string &url);
|
std::string localImageName(const std::string &url);
|
||||||
|
std::string getAbsoluteUrl(const std::string &url);
|
||||||
|
|
||||||
bool isTrustedDomain(const std::string &domain);
|
bool isTrustedDomain(const std::string &domain);
|
||||||
void setImage(CViewBase *view, const std::string &file);
|
void setImage(CViewBase *view, const std::string &file);
|
||||||
|
|
|
@ -570,7 +570,7 @@ namespace NLGUI
|
||||||
|
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
|
|
||||||
void CGroupHTML::addLink (uint element_number, uint /* attribute_number */, HTChildAnchor *anchor, const BOOL *present, const char **value)
|
void CGroupHTML::addLink (uint element_number, const BOOL *present, const char **value)
|
||||||
{
|
{
|
||||||
if (_Browsing)
|
if (_Browsing)
|
||||||
{
|
{
|
||||||
|
@ -591,16 +591,8 @@ namespace NLGUI
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
|
// convert href from "?key=val" into "http://domain.com/?key=val"
|
||||||
if (dest)
|
_Link.push_back(getAbsoluteUrl(suri));
|
||||||
{
|
|
||||||
C3WSmartPtr uri = HTAnchor_address(dest);
|
|
||||||
_Link.push_back ((const char*)uri);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
_Link.push_back("");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for(uint8 i = MY_HTML_A_ACCESSKEY; i < MY_HTML_A_Z_ACTION_SHORTCUT; i++)
|
for(uint8 i = MY_HTML_A_ACCESSKEY; i < MY_HTML_A_Z_ACTION_SHORTCUT; i++)
|
||||||
|
@ -3832,6 +3824,10 @@ namespace NLGUI
|
||||||
stopBrowse ();
|
stopBrowse ();
|
||||||
updateRefreshButton();
|
updateRefreshButton();
|
||||||
|
|
||||||
|
// Browsing
|
||||||
|
_Browsing = true;
|
||||||
|
updateRefreshButton();
|
||||||
|
|
||||||
// Home ?
|
// Home ?
|
||||||
if (_URL == "home")
|
if (_URL == "home")
|
||||||
_URL = home();
|
_URL = home();
|
||||||
|
@ -3851,17 +3847,24 @@ namespace NLGUI
|
||||||
_Connecting = true;
|
_Connecting = true;
|
||||||
_ConnectingTimeout = ( times.thisFrameMs / 1000.0f ) + _TimeoutValue;
|
_ConnectingTimeout = ( times.thisFrameMs / 1000.0f ) + _TimeoutValue;
|
||||||
|
|
||||||
|
// Save new url
|
||||||
|
_URL = finalUrl;
|
||||||
|
|
||||||
|
// file is probably from bnp (ingame help)
|
||||||
|
if (isLocal)
|
||||||
|
{
|
||||||
|
if (strlwr(finalUrl).find("file:/") == 0)
|
||||||
|
{
|
||||||
|
finalUrl = finalUrl.substr(6, finalUrl.size() - 6);
|
||||||
|
}
|
||||||
|
doBrowseLocalFile(finalUrl);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
|
||||||
CButtonFreezer freezer;
|
CButtonFreezer freezer;
|
||||||
this->visit(&freezer);
|
this->visit(&freezer);
|
||||||
|
|
||||||
// Browsing
|
|
||||||
_Browsing = true;
|
|
||||||
updateRefreshButton();
|
|
||||||
|
|
||||||
// Save new url
|
|
||||||
_URL = finalUrl;
|
|
||||||
|
|
||||||
// display HTTP query
|
// display HTTP query
|
||||||
//nlinfo("WEB: GET '%s'", finalUrl.c_str());
|
//nlinfo("WEB: GET '%s'", finalUrl.c_str());
|
||||||
|
|
||||||
|
@ -3877,12 +3880,7 @@ namespace NLGUI
|
||||||
C3WSmartPtr uri = HTParse(finalUrl.c_str(), NULL, PARSE_ALL);
|
C3WSmartPtr uri = HTParse(finalUrl.c_str(), NULL, PARSE_ALL);
|
||||||
|
|
||||||
// Create an anchor
|
// Create an anchor
|
||||||
#ifdef NL_OS_WINDOWS
|
|
||||||
if ((_LibWWW->Anchor = HTAnchor_findAddress(uri)) == NULL)
|
if ((_LibWWW->Anchor = HTAnchor_findAddress(uri)) == NULL)
|
||||||
#else
|
|
||||||
// temporarily disable local URL's until LibWWW can be replaced.
|
|
||||||
if (isLocal || ((_LibWWW->Anchor = HTAnchor_findAddress(uri)) == NULL))
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
browseError((string("The page address is malformed : ")+(const char*)uri).c_str());
|
browseError((string("The page address is malformed : ")+(const char*)uri).c_str());
|
||||||
}
|
}
|
||||||
|
@ -3919,6 +3917,8 @@ namespace NLGUI
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // !isLocal
|
||||||
|
|
||||||
_BrowseNextTime = false;
|
_BrowseNextTime = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4107,6 +4107,57 @@ namespace NLGUI
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ***************************************************************************
|
||||||
|
void CGroupHTML::doBrowseLocalFile(const std::string &filename)
|
||||||
|
{
|
||||||
|
CIFile in;
|
||||||
|
if (in.open(filename))
|
||||||
|
{
|
||||||
|
std::string html;
|
||||||
|
while(!in.eof())
|
||||||
|
{
|
||||||
|
char buf[1024];
|
||||||
|
in.getline(buf, 1024);
|
||||||
|
html += std::string(buf) + "\n";
|
||||||
|
}
|
||||||
|
in.close();
|
||||||
|
|
||||||
|
if (!renderHtmlString(html))
|
||||||
|
{
|
||||||
|
browseError((string("Failed to parse html from file : ")+filename).c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
browseError((string("The page address is malformed : ")+filename).c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ***************************************************************************
|
||||||
|
|
||||||
|
bool CGroupHTML::renderHtmlString(const std::string &html)
|
||||||
|
{
|
||||||
|
bool success;
|
||||||
|
|
||||||
|
// clear content
|
||||||
|
beginBuild();
|
||||||
|
|
||||||
|
success = parseHtml(html);
|
||||||
|
|
||||||
|
// invalidate coords
|
||||||
|
endBuild();
|
||||||
|
|
||||||
|
// libwww would call requestTerminated() here
|
||||||
|
_Browsing = false;
|
||||||
|
if (_TitleString.empty())
|
||||||
|
{
|
||||||
|
setTitle(_TitlePrefix);
|
||||||
|
}
|
||||||
|
updateRefreshButton();
|
||||||
|
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
|
|
||||||
void CGroupHTML::draw ()
|
void CGroupHTML::draw ()
|
||||||
|
@ -4516,7 +4567,7 @@ namespace NLGUI
|
||||||
|
|
||||||
beginElement(element_number, &present[0], &value[0]);
|
beginElement(element_number, &present[0], &value[0]);
|
||||||
if (element_number == HTML_A)
|
if (element_number == HTML_A)
|
||||||
addLink(element_number, 0, NULL, &present[0], &value[0]);
|
addLink(element_number, &present[0], &value[0]);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -4650,6 +4701,15 @@ namespace NLGUI
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ***************************************************************************
|
||||||
|
std::string CGroupHTML::getAbsoluteUrl(const std::string &url)
|
||||||
|
{
|
||||||
|
if (HTURL_isAbsolute(url.c_str()))
|
||||||
|
return url;
|
||||||
|
|
||||||
|
return std::string(HTParse(url.c_str(), _URL.c_str(), PARSE_ALL));
|
||||||
|
}
|
||||||
|
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
// CGroupHTML::CStyleParams style;
|
// CGroupHTML::CStyleParams style;
|
||||||
// style.FontSize; // font-size: 10px;
|
// style.FontSize; // font-size: 10px;
|
||||||
|
|
178
code/nel/src/gui/group_html_parser.cpp
Normal file
178
code/nel/src/gui/group_html_parser.cpp
Normal file
|
@ -0,0 +1,178 @@
|
||||||
|
// Ryzom - MMORPG Framework <http://dev.ryzom.com/projects/ryzom/>
|
||||||
|
// Copyright (C) 2010 Winch Gate Property Limited
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License as
|
||||||
|
// published by the Free Software Foundation, either version 3 of the
|
||||||
|
// License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Affero General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Affero General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
#include "stdpch.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <libxml/HTMLparser.h>
|
||||||
|
|
||||||
|
#include "nel/misc/types_nl.h"
|
||||||
|
#include "nel/gui/libwww.h"
|
||||||
|
#include "nel/gui/group_html.h"
|
||||||
|
#include "nel/gui/lua_ihm.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace NLMISC;
|
||||||
|
|
||||||
|
namespace NLGUI
|
||||||
|
{
|
||||||
|
// ***************************************************************************
|
||||||
|
void CGroupHTML::htmlElement(xmlNode *node, int element_number)
|
||||||
|
{
|
||||||
|
SGML_dtd *HTML_DTD = HTML_dtd ();
|
||||||
|
|
||||||
|
if (element_number < HTML_ELEMENTS)
|
||||||
|
{
|
||||||
|
CXMLAutoPtr ptr;
|
||||||
|
// load attributes into libwww structs
|
||||||
|
BOOL present[MAX_ATTRIBUTES];
|
||||||
|
const char *value[MAX_ATTRIBUTES];
|
||||||
|
std::string strvalues[MAX_ATTRIBUTES];
|
||||||
|
|
||||||
|
uint nbAttributes = std::min(MAX_ATTRIBUTES, HTML_DTD->tags[element_number].number_of_attributes);
|
||||||
|
for(uint i=0; i<nbAttributes; i++)
|
||||||
|
{
|
||||||
|
std::string name;
|
||||||
|
name = toLower(std::string(HTML_DTD->tags[element_number].attributes[i].name));
|
||||||
|
ptr = xmlGetProp(node, (const xmlChar *)name.c_str());
|
||||||
|
if (ptr)
|
||||||
|
{
|
||||||
|
// copy xmlChar to string (xmlChar will be released)
|
||||||
|
strvalues[i] = (const char *)(ptr);
|
||||||
|
// now use string pointer in value[] array
|
||||||
|
value[i] = strvalues[i].c_str();
|
||||||
|
present[i] = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
value[i] = NULL;
|
||||||
|
present[i] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (element_number == HTML_A)
|
||||||
|
{
|
||||||
|
addLink(element_number, present, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
beginElement(element_number, present, value);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
beginUnparsedElement((const char *)(node->name), xmlStrlen(node->name));
|
||||||
|
}
|
||||||
|
|
||||||
|
// recursive - text content / child nodes
|
||||||
|
htmlWalkDOM(node->children);
|
||||||
|
|
||||||
|
// closing tag
|
||||||
|
if (element_number < HTML_ELEMENTS)
|
||||||
|
{
|
||||||
|
endElement(element_number);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
endUnparsedElement((const char *)(node->name), xmlStrlen(node->name));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ***************************************************************************
|
||||||
|
// recursive function to walk html document
|
||||||
|
void CGroupHTML::htmlWalkDOM(xmlNode *a_node)
|
||||||
|
{
|
||||||
|
SGML_dtd *HTML_DTD = HTML_dtd ();
|
||||||
|
|
||||||
|
uint element_number;
|
||||||
|
xmlNode *node = a_node;
|
||||||
|
while(node)
|
||||||
|
{
|
||||||
|
if (node->type == XML_TEXT_NODE)
|
||||||
|
{
|
||||||
|
addText((const char *)(node->content), xmlStrlen(node->content));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if (node->type == XML_ELEMENT_NODE)
|
||||||
|
{
|
||||||
|
// find libwww tag
|
||||||
|
for(element_number = 0; element_number<HTML_ELEMENTS; ++element_number)
|
||||||
|
{
|
||||||
|
if (xmlStrncasecmp(node->name, (const xmlChar *)HTML_DTD->tags[element_number].name, xmlStrlen(node->name)) == 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
htmlElement(node, element_number);
|
||||||
|
}
|
||||||
|
|
||||||
|
// move into next sibling
|
||||||
|
node = node->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ***************************************************************************
|
||||||
|
bool CGroupHTML::parseHtml(std::string htmlString)
|
||||||
|
{
|
||||||
|
htmlParserCtxtPtr parser = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL, XML_CHAR_ENCODING_NONE);
|
||||||
|
if (!parser)
|
||||||
|
{
|
||||||
|
nlwarning("Creating html parser context failed");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
htmlCtxtUseOptions(parser, HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
|
||||||
|
|
||||||
|
htmlParseChunk(parser, htmlString.c_str(), htmlString.size(), 0);
|
||||||
|
htmlParseChunk(parser, "", 0, 1);
|
||||||
|
|
||||||
|
bool success = true;
|
||||||
|
if (parser->myDoc)
|
||||||
|
{
|
||||||
|
xmlNode *root = xmlDocGetRootElement(parser->myDoc);
|
||||||
|
if (root)
|
||||||
|
{
|
||||||
|
htmlWalkDOM(root);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
nlwarning("html root node failed");
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
nlwarning("htmlstring parsing failed");
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
htmlFreeParserCtxt(parser);
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ***************************************************************************
|
||||||
|
int CGroupHTML::luaParseHtml(CLuaState &ls)
|
||||||
|
{
|
||||||
|
const char *funcName = "parseHtml";
|
||||||
|
CLuaIHM::checkArgCount(ls, funcName, 1);
|
||||||
|
CLuaIHM::checkArgType(ls, funcName, 1, LUA_TSTRING);
|
||||||
|
std::string html = ls.toString(1);
|
||||||
|
|
||||||
|
parseHtml(html);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -322,7 +322,10 @@ namespace NLGUI
|
||||||
const char ** value)
|
const char ** value)
|
||||||
{
|
{
|
||||||
// Do the work in the class
|
// Do the work in the class
|
||||||
me->Parent->addLink (element_number, attribute_number, anchor, present, value);
|
if (element_number == HTML_A)
|
||||||
|
{
|
||||||
|
me->Parent->addLink (element_number, present, value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
|
|
Loading…
Reference in a new issue