public class HTMLParser
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
DEFAULT_ATTR_LINK_TAG |
Constructor and Description |
---|
HTMLParser(android.content.Context context)
Constructor.
|
HTMLParser(java.lang.String document,
android.content.Context context)
Constructor.
|
Modifier and Type | Method and Description |
---|---|
void |
filterByTag(java.lang.String tag)
Keep only the content that is under the selected tag.
|
java.lang.String |
getContent()
Get the html document as a string.
|
org.jsoup.nodes.Element |
getDocument()
Return the document as an org.jsoup.nodes.Element instance.
|
java.lang.String |
getURLbyText(java.lang.String text)
Recover the URL in the 'href' attribute, matching the passed text.
|
java.lang.String |
getURLbyText(java.lang.String text,
java.lang.String attribute)
Recover the URL attribute matching the passed text.
|
void |
removeLinks()
Delete all the links (tag 'a') from the document.
|
void |
setBaseDocument(java.io.InputStream htmlBase,
java.lang.String charsetName,
java.lang.String baseUri)
Injects the document element into the base document.
|
void |
setContent(java.io.InputStream is)
Reset the content of the document.
|
void |
setContent(java.lang.String document)
Reset the content of the document.
|
public static final java.lang.String DEFAULT_ATTR_LINK_TAG
public HTMLParser(android.content.Context context)
context
- The application context.java.lang.IllegalArgumentException
- if context
is null
.public HTMLParser(java.lang.String document, android.content.Context context)
document
- html content to manipulate.context
- The application context.java.lang.IllegalArgumentException
- if context
is null
.public java.lang.String getURLbyText(java.lang.String text) throws java.lang.Exception
text
- where to search the URL.java.lang.Exception
public java.lang.String getURLbyText(java.lang.String text, java.lang.String attribute) throws java.lang.Exception
text
- where to search the URL.attribute
- attribute that owns the URL ('href' in most of cases).java.lang.IllegalArgumentException
- if text or @code attribute
is null
.java.lang.NoSuchFieldException
java.lang.Exception
public void filterByTag(java.lang.String tag) throws java.lang.Exception
tag
- selected tag.java.lang.IllegalArgumentException
- if text or @code attribute
is null
.java.lang.NoSuchFieldException
java.lang.Exception
public void removeLinks()
public void setBaseDocument(java.io.InputStream htmlBase, java.lang.String charsetName, java.lang.String baseUri) throws java.lang.Exception
htmlBase
- base html document (it must have basic html structure (html/body).charsetName
- charset to use, for example 'UTF-8'.baseUri
- URI of the base document.java.lang.IllegalArgumentException
- if htmlBase or @code baseUri
is null
.java.lang.Exception
public java.lang.String getContent()
public void setContent(java.lang.String document)
document
- The HTML document to parse.java.lang.IllegalArgumentException
- if document
is null
.public void setContent(java.io.InputStream is) throws java.io.IOException
is
- java.io.IOException
public org.jsoup.nodes.Element getDocument()