1 <?xml version=
"1.0" encoding=
"ISO-8859-1"?>
2 <!DOCTYPE html PUBLIC
"-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3 <html xmlns=
"http://www.w3.org/1999/xhtml"><head><meta http-equiv=
"Content-Type" content=
"text/html; charset=ISO-8859-1" /><link rel=
"SHORTCUT ICON" href=
"/favicon.ico" /><style type=
"text/css">
4 TD {font-family: Verdana,Arial,Helvetica}
5 BODY {font-family: Verdana,Arial,Helvetica; margin-top:
2em; margin-left:
0em; margin-right:
0em}
6 H1 {font-family: Verdana,Arial,Helvetica}
7 H2 {font-family: Verdana,Arial,Helvetica}
8 H3 {font-family: Verdana,Arial,Helvetica}
9 A:link, A:visited, A:active { text-decoration: underline }
10 </style><style type=
"text/css">
11 div.deprecated pre.programlisting {border-style: double;border-color:red}
12 pre.programlisting {border-style: double;background: #EECFA1}
13 </style><title>Module HTMLparser from libxml2
</title></head><body bgcolor=
"#8b7765" text=
"#000000" link=
"#a06060" vlink=
"#000000"><table border=
"0" width=
"100%" cellpadding=
"5" cellspacing=
"0" align=
"center"><tr><td width=
"120"><a href=
"http://swpat.ffii.org/"><img src=
"../epatents.png" alt=
"Action against software patents" /></a></td><td width=
"180"><a href=
"http://www.gnome.org/"><img src=
"../gnome2.png" alt=
"Gnome2 Logo" /></a><a href=
"http://www.w3.org/Status"><img src=
"../w3c.png" alt=
"W3C Logo" /></a><a href=
"http://www.redhat.com/"><img src=
"../redhat.gif" alt=
"Red Hat Logo" /></a><div align=
"left"><a href=
"http://xmlsoft.org/"><img src=
"../Libxml2-Logo-180x168.gif" alt=
"Made with Libxml2 Logo" /></a></div></td><td><table border=
"0" width=
"90%" cellpadding=
"2" cellspacing=
"0" align=
"center" bgcolor=
"#000000"><tr><td><table width=
"100%" border=
"0" cellspacing=
"1" cellpadding=
"3" bgcolor=
"#fffacd"><tr><td align=
"center"><h1></h1><h2>Module HTMLparser from libxml2
</h2></td></tr></table></td></tr></table></td></tr></table><table border=
"0" cellpadding=
"4" cellspacing=
"0" width=
"100%" align=
"center"><tr><td bgcolor=
"#8b7765"><table border=
"0" cellspacing=
"0" cellpadding=
"2" width=
"100%"><tr><td valign=
"top" width=
"200" bgcolor=
"#8b7765"><table border=
"0" cellspacing=
"0" cellpadding=
"1" width=
"100%" bgcolor=
"#000000"><tr><td><table width=
"100%" border=
"0" cellspacing=
"1" cellpadding=
"3"><tr><td colspan=
"1" bgcolor=
"#eecfa1" align=
"center"><center><b>API Menu
</b></center></td></tr><tr><td bgcolor=
"#fffacd"><form action=
"../search.php" enctype=
"application/x-www-form-urlencoded" method=
"get"><input name=
"query" type=
"text" size=
"20" value=
"" /><input name=
"submit" type=
"submit" value=
"Search ..." /></form><ul><li><a style=
"font-weight:bold" href=
"../index.html">Main Menu
</a></li><li><a style=
"font-weight:bold" href=
"../docs.html">Developer Menu
</a></li><li><a style=
"font-weight:bold" href=
"../examples/index.html">Code Examples
</a></li><li><a style=
"font-weight:bold" href=
"index.html">API Menu
</a></li><li><a href=
"libxml-parser.html">Parser API
</a></li><li><a href=
"libxml-tree.html">Tree API
</a></li><li><a href=
"libxml-xmlreader.html">Reader API
</a></li><li><a href=
"../guidelines.html">XML Guidelines
</a></li><li><a href=
"../ChangeLog.html">ChangeLog
</a></li></ul></td></tr></table><table width=
"100%" border=
"0" cellspacing=
"1" cellpadding=
"3"><tr><td colspan=
"1" bgcolor=
"#eecfa1" align=
"center"><center><b>API Indexes
</b></center></td></tr><tr><td bgcolor=
"#fffacd"><ul><li><a href=
"../APIchunk0.html">Alphabetic
</a></li><li><a href=
"../APIconstructors.html">Constructors
</a></li><li><a href=
"../APIfunctions.html">Functions/Types
</a></li><li><a href=
"../APIfiles.html">Modules
</a></li><li><a href=
"../APIsymbols.html">Symbols
</a></li></ul></td></tr></table><table width=
"100%" border=
"0" cellspacing=
"1" cellpadding=
"3"><tr><td colspan=
"1" bgcolor=
"#eecfa1" align=
"center"><center><b>Related links
</b></center></td></tr><tr><td bgcolor=
"#fffacd"><ul><li><a href=
"http://mail.gnome.org/archives/xml/">Mail archive
</a></li><li><a href=
"http://xmlsoft.org/XSLT/">XSLT libxslt
</a></li><li><a href=
"http://phd.cs.unibo.it/gdome2/">DOM gdome2
</a></li><li><a href=
"http://www.aleksey.com/xmlsec/">XML-DSig xmlsec
</a></li><li><a href=
"ftp://xmlsoft.org/">FTP
</a></li><li><a href=
"http://www.zlatkovic.com/projects/libxml/">Windows binaries
</a></li><li><a href=
"http://www.blastwave.org/packages.php/libxml2">Solaris binaries
</a></li><li><a href=
"http://www.explain.com.au/oss/libxml2xslt.html">MacOsX binaries
</a></li><li><a href=
"http://libxmlplusplus.sourceforge.net/">C++ bindings
</a></li><li><a href=
"http://www.zend.com/php5/articles/php5-xmlphp.php#Heading4">PHP bindings
</a></li><li><a href=
"http://sourceforge.net/projects/libxml2-pas/">Pascal bindings
</a></li><li><a href=
"http://rubyforge.org/projects/xml-tools/">Ruby bindings
</a></li><li><a href=
"http://tclxml.sourceforge.net/">Tcl bindings
</a></li><li><a href=
"http://bugzilla.gnome.org/buglist.cgi?product=libxml2">Bug Tracker
</a></li></ul></td></tr></table></td></tr></table></td><td valign=
"top" bgcolor=
"#8b7765"><table border=
"0" cellspacing=
"0" cellpadding=
"1" width=
"100%"><tr><td><table border=
"0" cellspacing=
"0" cellpadding=
"1" width=
"100%" bgcolor=
"#000000"><tr><td><table border=
"0" cellpadding=
"3" cellspacing=
"1" width=
"100%"><tr><td bgcolor=
"#fffacd"><table class=
"navigation" width=
"100%" summary=
"Navigation header" cellpadding=
"2" cellspacing=
"2"><tr valign=
"middle"><td><a accesskey=
"p" href=
"libxml-DOCBparser.html"><img src=
"left.png" width=
"24" height=
"24" border=
"0" alt=
"Prev" /></a></td><th align=
"left"><a href=
"libxml-DOCBparser.html">DOCBparser
</a></th><td><a accesskey=
"u" href=
"index.html"><img src=
"up.png" width=
"24" height=
"24" border=
"0" alt=
"Up" /></a></td><th align=
"left"><a href=
"index.html">API documentation
</a></th><td><a accesskey=
"h" href=
"../index.html"><img src=
"home.png" width=
"24" height=
"24" border=
"0" alt=
"Home" /></a></td><th align=
"center"><a href=
"../index.html">The XML C parser and toolkit of Gnome
</a></th><th align=
"right"><a href=
"libxml-HTMLtree.html">HTMLtree
</a></th><td><a accesskey=
"n" href=
"libxml-HTMLtree.html"><img src=
"right.png" width=
"24" height=
"24" border=
"0" alt=
"Next" /></a></td></tr></table><p>this module implements an HTML
4.0 non-verifying parser with API compatible with the XML parser ones. It should be able to parse
"real world" HTML, even if severely broken from a specification point of view.
</p><h2>Table of Contents
</h2><pre class=
"programlisting">#define
<a href=
"#htmlDefaultSubelement">htmlDefaultSubelement
</a></pre><pre class=
"programlisting">#define
<a href=
"#htmlElementAllowedHereDesc">htmlElementAllowedHereDesc
</a></pre><pre class=
"programlisting">#define
<a href=
"#htmlRequiredAttrs">htmlRequiredAttrs
</a></pre><pre class=
"programlisting">Typedef
<a href=
"libxml-tree.html#xmlDocPtr">xmlDocPtr
</a> <a name=
"htmlDocPtr" id=
"htmlDocPtr">htmlDocPtr
</a>
14 </pre><pre class=
"programlisting">Structure
<a href=
"#htmlElemDesc">htmlElemDesc
</a><br />struct _htmlElemDesc
15 </pre><pre class=
"programlisting">Typedef
<a href=
"libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> *
<a name=
"htmlElemDescPtr" id=
"htmlElemDescPtr">htmlElemDescPtr
</a>
16 </pre><pre class=
"programlisting">Structure
<a href=
"#htmlEntityDesc">htmlEntityDesc
</a><br />struct _htmlEntityDesc
17 </pre><pre class=
"programlisting">Typedef
<a href=
"libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc
</a> *
<a name=
"htmlEntityDescPtr" id=
"htmlEntityDescPtr">htmlEntityDescPtr
</a>
18 </pre><pre class=
"programlisting">Typedef
<a href=
"libxml-tree.html#xmlNodePtr">xmlNodePtr
</a> <a name=
"htmlNodePtr" id=
"htmlNodePtr">htmlNodePtr
</a>
19 </pre><pre class=
"programlisting">Typedef
<a href=
"libxml-tree.html#xmlParserCtxt">xmlParserCtxt
</a> <a name=
"htmlParserCtxt" id=
"htmlParserCtxt">htmlParserCtxt
</a>
20 </pre><pre class=
"programlisting">Typedef
<a href=
"libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr
</a> <a name=
"htmlParserCtxtPtr" id=
"htmlParserCtxtPtr">htmlParserCtxtPtr
</a>
21 </pre><pre class=
"programlisting">Typedef
<a href=
"libxml-tree.html#xmlParserInput">xmlParserInput
</a> <a name=
"htmlParserInput" id=
"htmlParserInput">htmlParserInput
</a>
22 </pre><pre class=
"programlisting">Typedef
<a href=
"libxml-tree.html#xmlParserInputPtr">xmlParserInputPtr
</a> <a name=
"htmlParserInputPtr" id=
"htmlParserInputPtr">htmlParserInputPtr
</a>
23 </pre><pre class=
"programlisting">Typedef
<a href=
"libxml-parser.html#xmlParserNodeInfo">xmlParserNodeInfo
</a> <a name=
"htmlParserNodeInfo" id=
"htmlParserNodeInfo">htmlParserNodeInfo
</a>
24 </pre><pre class=
"programlisting">Enum
<a href=
"#htmlParserOption">htmlParserOption
</a>
25 </pre><pre class=
"programlisting">Typedef
<a href=
"libxml-tree.html#xmlSAXHandler">xmlSAXHandler
</a> <a name=
"htmlSAXHandler" id=
"htmlSAXHandler">htmlSAXHandler
</a>
26 </pre><pre class=
"programlisting">Typedef
<a href=
"libxml-tree.html#xmlSAXHandlerPtr">xmlSAXHandlerPtr
</a> <a name=
"htmlSAXHandlerPtr" id=
"htmlSAXHandlerPtr">htmlSAXHandlerPtr
</a>
27 </pre><pre class=
"programlisting">Enum
<a href=
"#htmlStatus">htmlStatus
</a>
28 </pre><pre class=
"programlisting">int
<a href=
"#UTF8ToHtml">UTF8ToHtml
</a> (unsigned char * out,
<br /> int * outlen,
<br /> const unsigned char * in,
<br /> int * inlen)
</pre>
29 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlStatus">htmlStatus
</a> <a href=
"#htmlAttrAllowed">htmlAttrAllowed
</a> (const
<a href=
"libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * elt,
<br /> const
<a href=
"libxml-xmlstring.html#xmlChar">xmlChar
</a> * attr,
<br /> int legacy)
</pre>
30 <pre class=
"programlisting">int
<a href=
"#htmlAutoCloseTag">htmlAutoCloseTag
</a> (
<a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> doc,
<br /> const
<a href=
"libxml-xmlstring.html#xmlChar">xmlChar
</a> * name,
<br /> <a href=
"libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr
</a> elem)
</pre>
31 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> <a href=
"#htmlCreateMemoryParserCtxt">htmlCreateMemoryParserCtxt
</a> (const char * buffer,
<br /> int size)
</pre>
32 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> <a href=
"#htmlCreatePushParserCtxt">htmlCreatePushParserCtxt
</a> (
<a href=
"libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr
</a> sax,
<br /> void * user_data,
<br /> const char * chunk,
<br /> int size,
<br /> const char * filename,
<br /> <a href=
"libxml-encoding.html#xmlCharEncoding">xmlCharEncoding
</a> enc)
</pre>
33 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlCtxtReadDoc">htmlCtxtReadDoc
</a> (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br /> const
<a href=
"libxml-xmlstring.html#xmlChar">xmlChar
</a> * cur,
<br /> const char * URL,
<br /> const char * encoding,
<br /> int options)
</pre>
34 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlCtxtReadFd">htmlCtxtReadFd
</a> (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br /> int fd,
<br /> const char * URL,
<br /> const char * encoding,
<br /> int options)
</pre>
35 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlCtxtReadFile">htmlCtxtReadFile
</a> (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br /> const char * filename,
<br /> const char * encoding,
<br /> int options)
</pre>
36 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlCtxtReadIO">htmlCtxtReadIO
</a> (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br /> <a href=
"libxml-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback
</a> ioread,
<br /> <a href=
"libxml-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback
</a> ioclose,
<br /> void * ioctx,
<br /> const char * URL,
<br /> const char * encoding,
<br /> int options)
</pre>
37 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlCtxtReadMemory">htmlCtxtReadMemory
</a> (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br /> const char * buffer,
<br /> int size,
<br /> const char * URL,
<br /> const char * encoding,
<br /> int options)
</pre>
38 <pre class=
"programlisting">void
<a href=
"#htmlCtxtReset">htmlCtxtReset
</a> (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt)
</pre>
39 <pre class=
"programlisting">int
<a href=
"#htmlCtxtUseOptions">htmlCtxtUseOptions
</a> (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br /> int options)
</pre>
40 <pre class=
"programlisting">int
<a href=
"#htmlElementAllowedHere">htmlElementAllowedHere
</a> (const
<a href=
"libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * parent,
<br /> const
<a href=
"libxml-xmlstring.html#xmlChar">xmlChar
</a> * elt)
</pre>
41 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlStatus">htmlStatus
</a> <a href=
"#htmlElementStatusHere">htmlElementStatusHere
</a> (const
<a href=
"libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * parent,
<br /> const
<a href=
"libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * elt)
</pre>
42 <pre class=
"programlisting">int
<a href=
"#htmlEncodeEntities">htmlEncodeEntities
</a> (unsigned char * out,
<br /> int * outlen,
<br /> const unsigned char * in,
<br /> int * inlen,
<br /> int quoteChar)
</pre>
43 <pre class=
"programlisting">const
<a href=
"libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc
</a> *
<a href=
"#htmlEntityLookup">htmlEntityLookup
</a> (const
<a href=
"libxml-xmlstring.html#xmlChar">xmlChar
</a> * name)
</pre>
44 <pre class=
"programlisting">const
<a href=
"libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc
</a> *
<a href=
"#htmlEntityValueLookup">htmlEntityValueLookup
</a> (unsigned int value)
</pre>
45 <pre class=
"programlisting">void
<a href=
"#htmlFreeParserCtxt">htmlFreeParserCtxt
</a> (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt)
</pre>
46 <pre class=
"programlisting">int
<a href=
"#htmlHandleOmittedElem">htmlHandleOmittedElem
</a> (int val)
</pre>
47 <pre class=
"programlisting">int
<a href=
"#htmlIsAutoClosed">htmlIsAutoClosed
</a> (
<a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> doc,
<br /> <a href=
"libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr
</a> elem)
</pre>
48 <pre class=
"programlisting">int
<a href=
"#htmlIsScriptAttribute">htmlIsScriptAttribute
</a> (const
<a href=
"libxml-xmlstring.html#xmlChar">xmlChar
</a> * name)
</pre>
49 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlStatus">htmlStatus
</a> <a href=
"#htmlNodeStatus">htmlNodeStatus
</a> (const
<a href=
"libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr
</a> node,
<br /> int legacy)
</pre>
50 <pre class=
"programlisting">int
<a href=
"#htmlParseCharRef">htmlParseCharRef
</a> (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt)
</pre>
51 <pre class=
"programlisting">int
<a href=
"#htmlParseChunk">htmlParseChunk
</a> (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br /> const char * chunk,
<br /> int size,
<br /> int terminate)
</pre>
52 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlParseDoc">htmlParseDoc
</a> (
<a href=
"libxml-xmlstring.html#xmlChar">xmlChar
</a> * cur,
<br /> const char * encoding)
</pre>
53 <pre class=
"programlisting">int
<a href=
"#htmlParseDocument">htmlParseDocument
</a> (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt)
</pre>
54 <pre class=
"programlisting">void
<a href=
"#htmlParseElement">htmlParseElement
</a> (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt)
</pre>
55 <pre class=
"programlisting">const
<a href=
"libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc
</a> *
<a href=
"#htmlParseEntityRef">htmlParseEntityRef
</a> (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br /> const
<a href=
"libxml-xmlstring.html#xmlChar">xmlChar
</a> ** str)
</pre>
56 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlParseFile">htmlParseFile
</a> (const char * filename,
<br /> const char * encoding)
</pre>
57 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlReadDoc">htmlReadDoc
</a> (const
<a href=
"libxml-xmlstring.html#xmlChar">xmlChar
</a> * cur,
<br /> const char * URL,
<br /> const char * encoding,
<br /> int options)
</pre>
58 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlReadFd">htmlReadFd
</a> (int fd,
<br /> const char * URL,
<br /> const char * encoding,
<br /> int options)
</pre>
59 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlReadFile">htmlReadFile
</a> (const char * filename,
<br /> const char * encoding,
<br /> int options)
</pre>
60 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlReadIO">htmlReadIO
</a> (
<a href=
"libxml-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback
</a> ioread,
<br /> <a href=
"libxml-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback
</a> ioclose,
<br /> void * ioctx,
<br /> const char * URL,
<br /> const char * encoding,
<br /> int options)
</pre>
61 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlReadMemory">htmlReadMemory
</a> (const char * buffer,
<br /> int size,
<br /> const char * URL,
<br /> const char * encoding,
<br /> int options)
</pre>
62 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlSAXParseDoc">htmlSAXParseDoc
</a> (
<a href=
"libxml-xmlstring.html#xmlChar">xmlChar
</a> * cur,
<br /> const char * encoding,
<br /> <a href=
"libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr
</a> sax,
<br /> void * userData)
</pre>
63 <pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> <a href=
"#htmlSAXParseFile">htmlSAXParseFile
</a> (const char * filename,
<br /> const char * encoding,
<br /> <a href=
"libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr
</a> sax,
<br /> void * userData)
</pre>
64 <pre class=
"programlisting">const
<a href=
"libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> *
<a href=
"#htmlTagLookup">htmlTagLookup
</a> (const
<a href=
"libxml-xmlstring.html#xmlChar">xmlChar
</a> * tag)
</pre>
66 <h3><a name=
"htmlDefaultSubelement" id=
"htmlDefaultSubelement"></a>Macro: htmlDefaultSubelement
</h3><pre>#define htmlDefaultSubelement
</pre><p>Returns the default subelement for this element
</p>
67 <h3><a name=
"htmlElementAllowedHereDesc" id=
"htmlElementAllowedHereDesc"></a>Macro: htmlElementAllowedHereDesc
</h3><pre>#define htmlElementAllowedHereDesc
</pre><p>Checks whether an HTML element description may be a direct child of the specified element. Returns
1 if allowed;
0 otherwise.
</p>
68 <h3><a name=
"htmlRequiredAttrs" id=
"htmlRequiredAttrs"></a>Macro: htmlRequiredAttrs
</h3><pre>#define htmlRequiredAttrs
</pre><p>Returns the attributes required for the specified element.
</p>
69 <h3><a name=
"htmlElemDesc" id=
"htmlElemDesc">Structure htmlElemDesc
</a></h3><pre class=
"programlisting">Structure htmlElemDesc
<br />struct _htmlElemDesc {
70 const char * name : The tag name
71 char startTag : Whether the start tag can be implied
72 char endTag : Whether the end tag can be implied
73 char saveEndTag : Whether the end tag should be saved
74 char empty : Is this an empty element ?
75 char depr : Is this a deprecated element ?
76 char dtd :
1: only in Loose DTD,
2: only Frameset
77 char isinline : is this a block
0 or inline
1 element
78 const char * desc : the description NRK Jan
.2003 * New fiel
79 const char ** subelts : allowed sub-elements of this element
80 const char * defaultsubelt : subelement for suggested auto-repair if
81 const char ** attrs_opt : Optional Attributes
82 const char ** attrs_depr : Additional deprecated attributes
83 const char ** attrs_req : Required attributes
84 }
</pre><h3><a name=
"htmlEntityDesc" id=
"htmlEntityDesc">Structure htmlEntityDesc
</a></h3><pre class=
"programlisting">Structure htmlEntityDesc
<br />struct _htmlEntityDesc {
85 unsigned int value : the UNICODE value for the character
86 const char * name : The entity name
87 const char * desc : the description
88 }
</pre><h3>Enum
<a name=
"htmlParserOption" id=
"htmlParserOption">htmlParserOption
</a></h3><pre class=
"programlisting">Enum htmlParserOption {
89 <a name=
"HTML_PARSE_NOERROR" id=
"HTML_PARSE_NOERROR">HTML_PARSE_NOERROR
</a> =
32 : suppress error reports
90 <a name=
"HTML_PARSE_NOWARNING" id=
"HTML_PARSE_NOWARNING">HTML_PARSE_NOWARNING
</a> =
64 : suppress warning reports
91 <a name=
"HTML_PARSE_PEDANTIC" id=
"HTML_PARSE_PEDANTIC">HTML_PARSE_PEDANTIC
</a> =
128 : pedantic error reporting
92 <a name=
"HTML_PARSE_NOBLANKS" id=
"HTML_PARSE_NOBLANKS">HTML_PARSE_NOBLANKS
</a> =
256 : remove blank nodes
93 <a name=
"HTML_PARSE_NONET" id=
"HTML_PARSE_NONET">HTML_PARSE_NONET
</a> =
2048 : Forbid network access
95 </pre><h3>Enum
<a name=
"htmlStatus" id=
"htmlStatus">htmlStatus
</a></h3><pre class=
"programlisting">Enum htmlStatus {
96 <a name=
"HTML_NA" id=
"HTML_NA">HTML_NA
</a> =
0 : something we don't check at all
97 <a name=
"HTML_INVALID" id=
"HTML_INVALID">HTML_INVALID
</a> =
1
98 <a name=
"HTML_DEPRECATED" id=
"HTML_DEPRECATED">HTML_DEPRECATED
</a> =
2
99 <a name=
"HTML_VALID" id=
"HTML_VALID">HTML_VALID
</a> =
4
100 <a name=
"HTML_REQUIRED" id=
"HTML_REQUIRED">HTML_REQUIRED
</a> =
12 : VALID bit set so (
& <a href=
"libxml-HTMLparser.html#HTML_VALID">HTML_VALID
</a> ) is TRUE
102 </pre><h3><a name=
"UTF8ToHtml" id=
"UTF8ToHtml"></a>Function: UTF8ToHtml
</h3><pre class=
"programlisting">int UTF8ToHtml (unsigned char * out,
<br /> int * outlen,
<br /> const unsigned char * in,
<br /> int * inlen)
<br />
103 </pre><p>Take a block of UTF-
8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.
</p>
104 <div class=
"variablelist"><table border=
"0"><col align=
"left" /><tbody><tr><td><span class=
"term"><i><tt>out
</tt></i>:
</span></td><td>a pointer to an array of bytes to store the result
</td></tr><tr><td><span class=
"term"><i><tt>outlen
</tt></i>:
</span></td><td>the length of @out
</td></tr><tr><td><span class=
"term"><i><tt>in
</tt></i>:
</span></td><td>a pointer to an array of UTF-
8 chars
</td></tr><tr><td><span class=
"term"><i><tt>inlen
</tt></i>:
</span></td><td>the length of @in
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>0 if success, -
2 if the transcoding fails, or -
1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed.
</td></tr></tbody></table></div><h3><a name=
"htmlAttrAllowed" id=
"htmlAttrAllowed"></a>Function: htmlAttrAllowed
</h3><pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlStatus">htmlStatus
</a> htmlAttrAllowed (const
<a href=
"libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * elt,
<br /> const
<a href=
"libxml-xmlstring.html#xmlChar">xmlChar
</a> * attr,
<br /> int legacy)
<br />
105 </pre><p>Checks whether an
<a href=
"libxml-SAX.html#attribute">attribute
</a> is valid for an element Has full knowledge of Required and Deprecated attributes
</p>
106 <div class=
"variablelist"><table border=
"0"><col align=
"left" /><tbody><tr><td><span class=
"term"><i><tt>elt
</tt></i>:
</span></td><td>HTML element
</td></tr><tr><td><span class=
"term"><i><tt>attr
</tt></i>:
</span></td><td>HTML
<a href=
"libxml-SAX.html#attribute">attribute
</a></td></tr><tr><td><span class=
"term"><i><tt>legacy
</tt></i>:
</span></td><td>whether to allow deprecated attributes
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED,
<a href=
"libxml-HTMLparser.html#HTML_INVALID">HTML_INVALID
</a></td></tr></tbody></table></div><h3><a name=
"htmlAutoCloseTag" id=
"htmlAutoCloseTag"></a>Function: htmlAutoCloseTag
</h3><pre class=
"programlisting">int htmlAutoCloseTag (
<a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> doc,
<br /> const
<a href=
"libxml-xmlstring.html#xmlChar">xmlChar
</a> * name,
<br /> <a href=
"libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr
</a> elem)
<br />
107 </pre><p>The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if the element or one of it's children would autoclose the given tag.
</p>
108 <div class=
"variablelist"><table border=
"0"><col align=
"left" /><tbody><tr><td><span class=
"term"><i><tt>doc
</tt></i>:
</span></td><td>the HTML document
</td></tr><tr><td><span class=
"term"><i><tt>name
</tt></i>:
</span></td><td>The tag name
</td></tr><tr><td><span class=
"term"><i><tt>elem
</tt></i>:
</span></td><td>the HTML element
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>1 if autoclose,
0 otherwise
</td></tr></tbody></table></div><h3><a name=
"htmlCreateMemoryParserCtxt" id=
"htmlCreateMemoryParserCtxt"></a>Function: htmlCreateMemoryParserCtxt
</h3><pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> htmlCreateMemoryParserCtxt (const char * buffer,
<br /> int size)
<br />
109 </pre><p>Create a parser context for an HTML in-memory document.
</p>
110 <div class=
"variablelist"><table border=
"0"><col align=
"left" /><tbody><tr><td><span class=
"term"><i><tt>buffer
</tt></i>:
</span></td><td>a pointer to a char array
</td></tr><tr><td><span class=
"term"><i><tt>size
</tt></i>:
</span></td><td>the size of the array
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>the new parser context or NULL
</td></tr></tbody></table></div><h3><a name=
"htmlCreatePushParserCtxt" id=
"htmlCreatePushParserCtxt"></a>Function: htmlCreatePushParserCtxt
</h3><pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> htmlCreatePushParserCtxt (
<a href=
"libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr
</a> sax,
<br /> void * user_data,
<br /> const char * chunk,
<br /> int size,
<br /> const char * filename,
<br /> <a href=
"libxml-encoding.html#xmlCharEncoding">xmlCharEncoding
</a> enc)
<br />
111 </pre><p>Create a parser context for using the HTML parser in push mode The value of @filename is used for fetching external entities and error/warning reports.
</p>
112 <div class=
"variablelist"><table border=
"0"><col align=
"left" /><tbody><tr><td><span class=
"term"><i><tt>sax
</tt></i>:
</span></td><td>a SAX handler
</td></tr><tr><td><span class=
"term"><i><tt>user_data
</tt></i>:
</span></td><td>The user data returned on SAX callbacks
</td></tr><tr><td><span class=
"term"><i><tt>chunk
</tt></i>:
</span></td><td>a pointer to an array of chars
</td></tr><tr><td><span class=
"term"><i><tt>size
</tt></i>:
</span></td><td>number of chars in the array
</td></tr><tr><td><span class=
"term"><i><tt>filename
</tt></i>:
</span></td><td>an optional file name or URI
</td></tr><tr><td><span class=
"term"><i><tt>enc
</tt></i>:
</span></td><td>an optional encoding
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>the new parser context or NULL
</td></tr></tbody></table></div><h3><a name=
"htmlCtxtReadDoc" id=
"htmlCtxtReadDoc"></a>Function: htmlCtxtReadDoc
</h3><pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> htmlCtxtReadDoc (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br /> const
<a href=
"libxml-xmlstring.html#xmlChar">xmlChar
</a> * cur,
<br /> const char * URL,
<br /> const char * encoding,
<br /> int options)
<br />
113 </pre><p>parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context
</p>
114 <div class=
"variablelist"><table border=
"0"><col align=
"left" /><tbody><tr><td><span class=
"term"><i><tt>ctxt
</tt></i>:
</span></td><td>an HTML parser context
</td></tr><tr><td><span class=
"term"><i><tt>cur
</tt></i>:
</span></td><td>a pointer to a zero terminated string
</td></tr><tr><td><span class=
"term"><i><tt>URL
</tt></i>:
</span></td><td>the base URL to use for the document
</td></tr><tr><td><span class=
"term"><i><tt>encoding
</tt></i>:
</span></td><td>the document encoding, or NULL
</td></tr><tr><td><span class=
"term"><i><tt>options
</tt></i>:
</span></td><td>a combination of htmlParserOption(s)
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>the resulting document tree
</td></tr></tbody></table></div><h3><a name=
"htmlCtxtReadFd" id=
"htmlCtxtReadFd"></a>Function: htmlCtxtReadFd
</h3><pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> htmlCtxtReadFd (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br /> int fd,
<br /> const char * URL,
<br /> const char * encoding,
<br /> int options)
<br />
115 </pre><p>parse an XML from a file descriptor and build a tree. This reuses the existing @ctxt parser context
</p>
116 <div class=
"variablelist"><table border=
"0"><col align=
"left" /><tbody><tr><td><span class=
"term"><i><tt>ctxt
</tt></i>:
</span></td><td>an HTML parser context
</td></tr><tr><td><span class=
"term"><i><tt>fd
</tt></i>:
</span></td><td>an open file descriptor
</td></tr><tr><td><span class=
"term"><i><tt>URL
</tt></i>:
</span></td><td>the base URL to use for the document
</td></tr><tr><td><span class=
"term"><i><tt>encoding
</tt></i>:
</span></td><td>the document encoding, or NULL
</td></tr><tr><td><span class=
"term"><i><tt>options
</tt></i>:
</span></td><td>a combination of htmlParserOption(s)
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>the resulting document tree
</td></tr></tbody></table></div><h3><a name=
"htmlCtxtReadFile" id=
"htmlCtxtReadFile"></a>Function: htmlCtxtReadFile
</h3><pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> htmlCtxtReadFile (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br /> const char * filename,
<br /> const char * encoding,
<br /> int options)
<br />
117 </pre><p>parse an XML file from the filesystem or the network. This reuses the existing @ctxt parser context
</p>
118 <div class=
"variablelist"><table border=
"0"><col align=
"left" /><tbody><tr><td><span class=
"term"><i><tt>ctxt
</tt></i>:
</span></td><td>an HTML parser context
</td></tr><tr><td><span class=
"term"><i><tt>filename
</tt></i>:
</span></td><td>a file or URL
</td></tr><tr><td><span class=
"term"><i><tt>encoding
</tt></i>:
</span></td><td>the document encoding, or NULL
</td></tr><tr><td><span class=
"term"><i><tt>options
</tt></i>:
</span></td><td>a combination of htmlParserOption(s)
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>the resulting document tree
</td></tr></tbody></table></div><h3><a name=
"htmlCtxtReadIO" id=
"htmlCtxtReadIO"></a>Function: htmlCtxtReadIO
</h3><pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> htmlCtxtReadIO (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br /> <a href=
"libxml-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback
</a> ioread,
<br /> <a href=
"libxml-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback
</a> ioclose,
<br /> void * ioctx,
<br /> const char * URL,
<br /> const char * encoding,
<br /> int options)
<br />
119 </pre><p>parse an HTML document from I/O functions and source and build a tree. This reuses the existing @ctxt parser context
</p>
120 <div class=
"variablelist"><table border=
"0"><col align=
"left" /><tbody><tr><td><span class=
"term"><i><tt>ctxt
</tt></i>:
</span></td><td>an HTML parser context
</td></tr><tr><td><span class=
"term"><i><tt>ioread
</tt></i>:
</span></td><td>an I/O read function
</td></tr><tr><td><span class=
"term"><i><tt>ioclose
</tt></i>:
</span></td><td>an I/O close function
</td></tr><tr><td><span class=
"term"><i><tt>ioctx
</tt></i>:
</span></td><td>an I/O handler
</td></tr><tr><td><span class=
"term"><i><tt>URL
</tt></i>:
</span></td><td>the base URL to use for the document
</td></tr><tr><td><span class=
"term"><i><tt>encoding
</tt></i>:
</span></td><td>the document encoding, or NULL
</td></tr><tr><td><span class=
"term"><i><tt>options
</tt></i>:
</span></td><td>a combination of htmlParserOption(s)
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>the resulting document tree
</td></tr></tbody></table></div><h3><a name=
"htmlCtxtReadMemory" id=
"htmlCtxtReadMemory"></a>Function: htmlCtxtReadMemory
</h3><pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr
</a> htmlCtxtReadMemory (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br /> const char * buffer,
<br /> int size,
<br /> const char * URL,
<br /> const char * encoding,
<br /> int options)
<br />
121 </pre><p>parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context
</p>
122 <div class=
"variablelist"><table border=
"0"><col align=
"left" /><tbody><tr><td><span class=
"term"><i><tt>ctxt
</tt></i>:
</span></td><td>an HTML parser context
</td></tr><tr><td><span class=
"term"><i><tt>buffer
</tt></i>:
</span></td><td>a pointer to a char array
</td></tr><tr><td><span class=
"term"><i><tt>size
</tt></i>:
</span></td><td>the size of the array
</td></tr><tr><td><span class=
"term"><i><tt>URL
</tt></i>:
</span></td><td>the base URL to use for the document
</td></tr><tr><td><span class=
"term"><i><tt>encoding
</tt></i>:
</span></td><td>the document encoding, or NULL
</td></tr><tr><td><span class=
"term"><i><tt>options
</tt></i>:
</span></td><td>a combination of htmlParserOption(s)
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>the resulting document tree
</td></tr></tbody></table></div><h3><a name=
"htmlCtxtReset" id=
"htmlCtxtReset"></a>Function: htmlCtxtReset
</h3><pre class=
"programlisting">void htmlCtxtReset (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt)
<br />
123 </pre><p>Reset a parser context
</p>
124 <div class=
"variablelist"><table border=
"0"><col align=
"left" /><tbody><tr><td><span class=
"term"><i><tt>ctxt
</tt></i>:
</span></td><td>an HTML parser context
</td></tr></tbody></table></div><h3><a name=
"htmlCtxtUseOptions" id=
"htmlCtxtUseOptions"></a>Function: htmlCtxtUseOptions
</h3><pre class=
"programlisting">int htmlCtxtUseOptions (
<a href=
"libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr
</a> ctxt,
<br /> int options)
<br />
125 </pre><p>Applies the options to the parser context
</p>
126 <div class=
"variablelist"><table border=
"0"><col align=
"left" /><tbody><tr><td><span class=
"term"><i><tt>ctxt
</tt></i>:
</span></td><td>an HTML parser context
</td></tr><tr><td><span class=
"term"><i><tt>options
</tt></i>:
</span></td><td>a combination of htmlParserOption(s)
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>0 in case of success, the set of unknown or unimplemented options in case of error.
</td></tr></tbody></table></div><h3><a name=
"htmlElementAllowedHere" id=
"htmlElementAllowedHere"></a>Function: htmlElementAllowedHere
</h3><pre class=
"programlisting">int htmlElementAllowedHere (const
<a href=
"libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * parent,
<br /> const
<a href=
"libxml-xmlstring.html#xmlChar">xmlChar
</a> * elt)
<br />
127 </pre><p>Checks whether an HTML element may be a direct child of a parent element. Note - doesn't check for deprecated elements
</p>
128 <div class=
"variablelist"><table border=
"0"><col align=
"left" /><tbody><tr><td><span class=
"term"><i><tt>parent
</tt></i>:
</span></td><td>HTML parent element
</td></tr><tr><td><span class=
"term"><i><tt>elt
</tt></i>:
</span></td><td>HTML element
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>1 if allowed;
0 otherwise.
</td></tr></tbody></table></div><h3><a name=
"htmlElementStatusHere" id=
"htmlElementStatusHere"></a>Function: htmlElementStatusHere
</h3><pre class=
"programlisting"><a href=
"libxml-HTMLparser.html#htmlStatus">htmlStatus
</a> htmlElementStatusHere (const
<a href=
"libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * parent,
<br /> const
<a href=
"libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc
</a> * elt)
<br />
129 </pre><p>Checks whether an HTML element may be a direct child of a parent element. and if so whether it is valid or deprecated.
</p>
130 <div class=
"variablelist"><table border=
"0"><col align=
"left" /><tbody><tr><td><span class=
"term"><i><tt>parent
</tt></i>:
</span></td><td>HTML parent element
</td></tr><tr><td><span class=
"term"><i><tt>elt
</tt></i>:
</span></td><td>HTML element
</td></tr><tr><td><span class=
"term"><i><tt>Returns
</tt></i>:
</span></td><td>one of HTML_VALID, HTML_DEPRECATED,
<a href=
"libxml-HTMLparser.html#HTML_INVALID">HTML_INVALID
</a></td></tr></tbody></table></div><h3><a name=
"htmlEncodeEntities" id=
"htmlEncodeEntities"></a>Function: htmlEncodeEntities
</h3><pre class=
"programlisting">int htmlEncodeEntities (unsigned char * out,
<br /> int * outlen,
<br /> const unsigned char * in,
<br /> int * inlen,
<br /> int quoteChar)
<br />
131 </pre><p>Take a block of UTF-
8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.
</p>
132 <div class=
"variablelist"><table border=
"0"><col align=
"left" /><tbody><tr><td><span class=
"term"><i><tt>out
</tt></i>:
</span></td><td>a pointer to an array of bytes to store the result
</td></tr><tr><td><span class=
"term"><i><tt>outlen
</tt></i>:
</span></td><td>the length of @out
</td></tr><tr><td><span class=
"term"><i><tt>in
</tt></i>:
</span></td><td>a pointer to an array of UTF-
8 chars
</td></tr><tr><td><span class=
"term"><i><tt>inlen
</tt></i>:
</span></td><td>the length of @in
</td></tr><tr><td><span class=
"term"><i><tt>quoteChar
</tt></i>:
</span></td><td>the quote character to escape (' or
") or zero.</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed.</td></tr></tbody></table></div><h3><a name="htmlEntityLookup
" id="htmlEntityLookup
"></a>Function: htmlEntityLookup</h3><pre class="programlisting
">const <a href="libxml-HTMLparser.html#htmlEntityDesc
">htmlEntityDesc</a> * htmlEntityLookup (const <a href="libxml-xmlstring.html#xmlChar
">xmlChar</a> * name)<br />
133 </pre><p>Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.</p>
134 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>name</tt></i>:</span></td><td>the entity name</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the associated <a href="libxml-HTMLparser.html#htmlEntityDescPtr
">htmlEntityDescPtr</a> if found, NULL otherwise.</td></tr></tbody></table></div><h3><a name="htmlEntityValueLookup
" id="htmlEntityValueLookup
"></a>Function: htmlEntityValueLookup</h3><pre class="programlisting
">const <a href="libxml-HTMLparser.html#htmlEntityDesc
">htmlEntityDesc</a> * htmlEntityValueLookup (unsigned int value)<br />
135 </pre><p>Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.</p>
136 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>value</tt></i>:</span></td><td>the entity's unicode value</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the associated <a href="libxml-HTMLparser.html#htmlEntityDescPtr
">htmlEntityDescPtr</a> if found, NULL otherwise.</td></tr></tbody></table></div><h3><a name="htmlFreeParserCtxt
" id="htmlFreeParserCtxt
"></a>Function: htmlFreeParserCtxt</h3><pre class="programlisting
">void htmlFreeParserCtxt (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr
">htmlParserCtxtPtr</a> ctxt)<br />
137 </pre><p>Free all the memory used by a parser context. However the parsed document in ctxt->myDoc is not freed.</p>
138 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr></tbody></table></div><h3><a name="htmlHandleOmittedElem
" id="htmlHandleOmittedElem
"></a>Function: htmlHandleOmittedElem</h3><pre class="programlisting
">int htmlHandleOmittedElem (int val)<br />
139 </pre><p>Set and return the previous value for handling HTML omitted tags.</p>
140 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>val</tt></i>:</span></td><td>int 0 or 1</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the last value for 0 for no handling, 1 for auto insertion.</td></tr></tbody></table></div><h3><a name="htmlIsAutoClosed
" id="htmlIsAutoClosed
"></a>Function: htmlIsAutoClosed</h3><pre class="programlisting
">int htmlIsAutoClosed (<a href="libxml-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> doc, <br /> <a href="libxml-HTMLparser.html#htmlNodePtr
">htmlNodePtr</a> elem)<br />
141 </pre><p>The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if a tag is autoclosed by one of it's child</p>
142 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>doc</tt></i>:</span></td><td>the HTML document</td></tr><tr><td><span class="term
"><i><tt>elem</tt></i>:</span></td><td>the HTML element</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>1 if autoclosed, 0 otherwise</td></tr></tbody></table></div><h3><a name="htmlIsScriptAttribute
" id="htmlIsScriptAttribute
"></a>Function: htmlIsScriptAttribute</h3><pre class="programlisting
">int htmlIsScriptAttribute (const <a href="libxml-xmlstring.html#xmlChar
">xmlChar</a> * name)<br />
143 </pre><p>Check if an <a href="libxml-SAX.html#attribute
">attribute</a> is of content type Script</p>
144 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>name</tt></i>:</span></td><td>an <a href="libxml-SAX.html#attribute
">attribute</a> name</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>1 is the <a href="libxml-SAX.html#attribute
">attribute</a> is a script 0 otherwise</td></tr></tbody></table></div><h3><a name="htmlNodeStatus
" id="htmlNodeStatus
"></a>Function: htmlNodeStatus</h3><pre class="programlisting
"><a href="libxml-HTMLparser.html#htmlStatus
">htmlStatus</a> htmlNodeStatus (const <a href="libxml-HTMLparser.html#htmlNodePtr
">htmlNodePtr</a> node, <br /> int legacy)<br />
145 </pre><p>Checks whether the tree node is valid. Experimental (the author only uses the HTML enhancements in a SAX parser)</p>
146 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>node</tt></i>:</span></td><td>an <a href="libxml-HTMLparser.html#htmlNodePtr
">htmlNodePtr</a> in a tree</td></tr><tr><td><span class="term
"><i><tt>legacy</tt></i>:</span></td><td>whether to allow deprecated elements (YES is faster here for Element nodes)</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>for Element nodes, a return from <a href="libxml-HTMLparser.html#htmlElementAllowedHere
">htmlElementAllowedHere</a> (if legacy allowed) or <a href="libxml-HTMLparser.html#htmlElementStatusHere
">htmlElementStatusHere</a> (otherwise). for Attribute nodes, a return from <a href="libxml-HTMLparser.html#htmlAttrAllowed
">htmlAttrAllowed</a> for other nodes, <a href="libxml-HTMLparser.html#HTML_NA
">HTML_NA</a> (no checks performed)</td></tr></tbody></table></div><h3><a name="htmlParseCharRef
" id="htmlParseCharRef
"></a>Function: htmlParseCharRef</h3><pre class="programlisting
">int htmlParseCharRef (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr
">htmlParserCtxtPtr</a> ctxt)<br />
147 </pre><p>parse Reference declarations [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'</p>
148 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the value parsed (as an int)</td></tr></tbody></table></div><h3><a name="htmlParseChunk
" id="htmlParseChunk
"></a>Function: htmlParseChunk</h3><pre class="programlisting
">int htmlParseChunk (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr
">htmlParserCtxtPtr</a> ctxt, <br /> const char * chunk, <br /> int size, <br /> int terminate)<br />
149 </pre><p>Parse a Chunk of memory</p>
150 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term
"><i><tt>chunk</tt></i>:</span></td><td>an char array</td></tr><tr><td><span class="term
"><i><tt>size</tt></i>:</span></td><td>the size in byte of the chunk</td></tr><tr><td><span class="term
"><i><tt>terminate</tt></i>:</span></td><td>last chunk indicator</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>zero if no error, the <a href="libxml-xmlerror.html#xmlParserErrors
">xmlParserErrors</a> otherwise.</td></tr></tbody></table></div><h3><a name="htmlParseDoc
" id="htmlParseDoc
"></a>Function: htmlParseDoc</h3><pre class="programlisting
"><a href="libxml-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlParseDoc (<a href="libxml-xmlstring.html#xmlChar
">xmlChar</a> * cur, <br /> const char * encoding)<br />
151 </pre><p>parse an HTML in-memory document and build a tree.</p>
152 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>cur</tt></i>:</span></td><td>a pointer to an array of <a href="libxml-xmlstring.html#xmlChar
">xmlChar</a></td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>a free form C string describing the HTML document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div><h3><a name="htmlParseDocument
" id="htmlParseDocument
"></a>Function: htmlParseDocument</h3><pre class="programlisting
">int htmlParseDocument (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr
">htmlParserCtxtPtr</a> ctxt)<br />
153 </pre><p>parse an HTML document (and build a tree if using the standard SAX interface).</p>
154 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>0, -1 in case of error. the parser context is augmented as a result of the parsing.</td></tr></tbody></table></div><h3><a name="htmlParseElement
" id="htmlParseElement
"></a>Function: htmlParseElement</h3><pre class="programlisting
">void htmlParseElement (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr
">htmlParserCtxtPtr</a> ctxt)<br />
155 </pre><p>parse an HTML element, this is highly recursive [39] element ::= EmptyElemTag | STag content ETag [41] Attribute ::= Name Eq AttValue</p>
156 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr></tbody></table></div><h3><a name="htmlParseEntityRef
" id="htmlParseEntityRef
"></a>Function: htmlParseEntityRef</h3><pre class="programlisting
">const <a href="libxml-HTMLparser.html#htmlEntityDesc
">htmlEntityDesc</a> * htmlParseEntityRef (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr
">htmlParserCtxtPtr</a> ctxt, <br /> const <a href="libxml-xmlstring.html#xmlChar
">xmlChar</a> ** str)<br />
157 </pre><p>parse an HTML ENTITY references [68] EntityRef ::= '&' Name ';'</p>
158 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term
"><i><tt>str</tt></i>:</span></td><td>location to store the entity name</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the associated <a href="libxml-HTMLparser.html#htmlEntityDescPtr
">htmlEntityDescPtr</a> if found, or NULL otherwise, if non-NULL *str will have to be freed by the caller.</td></tr></tbody></table></div><h3><a name="htmlParseFile
" id="htmlParseFile
"></a>Function: htmlParseFile</h3><pre class="programlisting
"><a href="libxml-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlParseFile (const char * filename, <br /> const char * encoding)<br />
159 </pre><p>parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.</p>
160 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>filename</tt></i>:</span></td><td>the filename</td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>a free form C string describing the HTML document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div><h3><a name="htmlReadDoc
" id="htmlReadDoc
"></a>Function: htmlReadDoc</h3><pre class="programlisting
"><a href="libxml-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlReadDoc (const <a href="libxml-xmlstring.html#xmlChar
">xmlChar</a> * cur, <br /> const char * URL, <br /> const char * encoding, <br /> int options)<br />
161 </pre><p>parse an XML in-memory document and build a tree.</p>
162 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>cur</tt></i>:</span></td><td>a pointer to a zero terminated string</td></tr><tr><td><span class="term
"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div><h3><a name="htmlReadFd
" id="htmlReadFd
"></a>Function: htmlReadFd</h3><pre class="programlisting
"><a href="libxml-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlReadFd (int fd, <br /> const char * URL, <br /> const char * encoding, <br /> int options)<br />
163 </pre><p>parse an XML from a file descriptor and build a tree.</p>
164 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>fd</tt></i>:</span></td><td>an open file descriptor</td></tr><tr><td><span class="term
"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div><h3><a name="htmlReadFile
" id="htmlReadFile
"></a>Function: htmlReadFile</h3><pre class="programlisting
"><a href="libxml-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlReadFile (const char * filename, <br /> const char * encoding, <br /> int options)<br />
165 </pre><p>parse an XML file from the filesystem or the network.</p>
166 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>filename</tt></i>:</span></td><td>a file or URL</td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div><h3><a name="htmlReadIO
" id="htmlReadIO
"></a>Function: htmlReadIO</h3><pre class="programlisting
"><a href="libxml-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlReadIO (<a href="libxml-xmlIO.html#xmlInputReadCallback
">xmlInputReadCallback</a> ioread, <br /> <a href="libxml-xmlIO.html#xmlInputCloseCallback
">xmlInputCloseCallback</a> ioclose, <br /> void * ioctx, <br /> const char * URL, <br /> const char * encoding, <br /> int options)<br />
167 </pre><p>parse an HTML document from I/O functions and source and build a tree.</p>
168 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>ioread</tt></i>:</span></td><td>an I/O read function</td></tr><tr><td><span class="term
"><i><tt>ioclose</tt></i>:</span></td><td>an I/O close function</td></tr><tr><td><span class="term
"><i><tt>ioctx</tt></i>:</span></td><td>an I/O handler</td></tr><tr><td><span class="term
"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div><h3><a name="htmlReadMemory
" id="htmlReadMemory
"></a>Function: htmlReadMemory</h3><pre class="programlisting
"><a href="libxml-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlReadMemory (const char * buffer, <br /> int size, <br /> const char * URL, <br /> const char * encoding, <br /> int options)<br />
169 </pre><p>parse an XML in-memory document and build a tree.</p>
170 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>buffer</tt></i>:</span></td><td>a pointer to a char array</td></tr><tr><td><span class="term
"><i><tt>size</tt></i>:</span></td><td>the size of the array</td></tr><tr><td><span class="term
"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div><h3><a name="htmlSAXParseDoc
" id="htmlSAXParseDoc
"></a>Function: htmlSAXParseDoc</h3><pre class="programlisting
"><a href="libxml-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlSAXParseDoc (<a href="libxml-xmlstring.html#xmlChar
">xmlChar</a> * cur, <br /> const char * encoding, <br /> <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr
">htmlSAXHandlerPtr</a> sax, <br /> void * userData)<br />
171 </pre><p>Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks to handle parse events. If sax is NULL, fallback to the default DOM behavior and return a tree.</p>
172 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>cur</tt></i>:</span></td><td>a pointer to an array of <a href="libxml-xmlstring.html#xmlChar
">xmlChar</a></td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>a free form C string describing the HTML document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>sax</tt></i>:</span></td><td>the SAX handler block</td></tr><tr><td><span class="term
"><i><tt>userData</tt></i>:</span></td><td>if using SAX, this pointer will be provided on callbacks.</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree unless SAX is NULL or the document is not well formed.</td></tr></tbody></table></div><h3><a name="htmlSAXParseFile
" id="htmlSAXParseFile
"></a>Function: htmlSAXParseFile</h3><pre class="programlisting
"><a href="libxml-HTMLparser.html#htmlDocPtr
">htmlDocPtr</a> htmlSAXParseFile (const char * filename, <br /> const char * encoding, <br /> <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr
">htmlSAXHandlerPtr</a> sax, <br /> void * userData)<br />
173 </pre><p>parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.</p>
174 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>filename</tt></i>:</span></td><td>the filename</td></tr><tr><td><span class="term
"><i><tt>encoding</tt></i>:</span></td><td>a free form C string describing the HTML document encoding, or NULL</td></tr><tr><td><span class="term
"><i><tt>sax</tt></i>:</span></td><td>the SAX handler block</td></tr><tr><td><span class="term
"><i><tt>userData</tt></i>:</span></td><td>if using SAX, this pointer will be provided on callbacks.</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree unless SAX is NULL or the document is not well formed.</td></tr></tbody></table></div><h3><a name="htmlTagLookup
" id="htmlTagLookup
"></a>Function: htmlTagLookup</h3><pre class="programlisting
">const <a href="libxml-HTMLparser.html#htmlElemDesc
">htmlElemDesc</a> * htmlTagLookup (const <a href="libxml-xmlstring.html#xmlChar
">xmlChar</a> * tag)<br />
175 </pre><p>Lookup the HTML tag in the ElementTable</p>
176 <div class="variablelist
"><table border="0"><col align="left
" /><tbody><tr><td><span class="term
"><i><tt>tag</tt></i>:</span></td><td>The tag name in lowercase</td></tr><tr><td><span class="term
"><i><tt>Returns</tt></i>:</span></td><td>the related <a href="libxml-HTMLparser.html#htmlElemDescPtr
">htmlElemDescPtr</a> or NULL if not found.</td></tr></tbody></table></div><p><a href="../bugs.html
">Daniel Veillard</a></p></td></tr></table></td></tr></table></td></tr></table></td></tr></table></td></tr></table></body></html>