Public Member Functions | |
__construct (InputStream $stream) | |
suppressWhitespaces ($isSuppressWhitespaces) | |
lowercaseAttributes ($isLowercaseAttributes) | |
lowercaseTags ($isLowercaseTags) | |
nextToken () | |
getErrors () | |
isInlineTag ($id) | |
Static Public Member Functions | |
static | create (InputStream $stream) |
static | isIdFirstChar ($char) |
static | isIdChar ($char) |
static | isValidId ($id) |
static | isSpacerChar ($char) |
static | removeWhitespaces (Cdata $cdata) |
Public Attributes | |
const | INITIAL_STATE = 1 |
const | START_TAG_STATE = 2 |
const | END_TAG_STATE = 3 |
const | INSIDE_TAG_STATE = 4 |
const | ATTR_NAME_STATE = 5 |
const | WAITING_EQUAL_SIGN_STATE = 6 |
const | ATTR_VALUE_STATE = 7 |
const | CDATA_STATE = 8 |
const | COMMENT_STATE = 9 |
const | INLINE_TAG_STATE = 10 |
const | EXTERNAL_TAG_STATE = 11 |
const | DOCTYPE_TAG_STATE = 12 |
const | FINAL_STATE = 42 |
const | SPACER_MASK = '[ \r\n\t]' |
const | ID_FIRST_CHAR_MASK = '[A-Za-z]' |
const | ID_CHAR_MASK = '[-_:.A-Za-z0-9]' |
Private Member Functions | |
getNextChar () | |
getChars ($count) | |
mark () | |
reset () | |
skip ($count) | |
lookAhead ($count) | |
skipString ($string, $skipSpaces=false) | |
makeTag () | |
setupTag (SgmlTag $tag) | |
handleState () | |
dumpBuffer () | |
checkSpecialTagState () | |
outsideTagState () | |
createOpenTag () | |
startTagState () | |
dumpEndTag () | |
endTagState () | |
insideTagState () | |
dumpAttribute () | |
attrNameState () | |
waitingEqualSignState () | |
attrValueState () | |
inlineTagState () | |
cdataState () | |
getComment () | |
commentState () | |
externalTagState () | |
doctypeTagState () | |
getContentToSubstring ($substring, $ignoreCase=false) | |
using Knuth-Morris-Pratt algorithm. | |
getTextualPosition () | |
warning ($message) | |
error ($message) | |
Static Private Member Functions | |
static | optionalLowercase ($string, $ignoreCase) |
Private Attributes | |
$inlineTags = array('style', 'script', 'textarea') | |
$stream = null | |
$char = null | |
$line = 1 | |
$linePosition = 1 | |
$previousChar = null | |
$mark = null | |
$state = self::INITIAL_STATE | |
$tags = array() | |
$errors = array() | |
$buffer = null | |
$tagId = null | |
$tag = null | |
$completeTag = null | |
$previousTag = null | |
$attrName = null | |
$attrValue = null | |
$insideQuote = null | |
$substringFound = false | |
$suppressWhitespaces = false | |
$lowercaseAttributes = false | |
$lowercaseTags = false |
Definition at line 15 of file HtmlTokenizer.class.php.
HtmlTokenizer::__construct | ( | InputStream $ | stream | ) |
Definition at line 73 of file HtmlTokenizer.class.php.
References getNextChar().
HtmlTokenizer::attrNameState | ( | ) | [private] |
Definition at line 800 of file HtmlTokenizer.class.php.
References $char, dumpAttribute(), error(), getNextChar(), Assert::isNotNull(), Assert::isNull(), Assert::isTrue(), and makeTag().
Referenced by handleState().
HtmlTokenizer::attrValueState | ( | ) | [private] |
Definition at line 927 of file HtmlTokenizer.class.php.
References dumpAttribute(), error(), getNextChar(), Assert::isNotNull(), Assert::isNull(), Assert::isTrue(), makeTag(), mark(), reset(), and warning().
Referenced by handleState().
HtmlTokenizer::cdataState | ( | ) | [private] |
Definition at line 1113 of file HtmlTokenizer.class.php.
References create(), error(), getContentToSubstring(), Assert::isNull(), and makeTag().
Referenced by handleState().
HtmlTokenizer::checkSpecialTagState | ( | ) | [private] |
Definition at line 427 of file HtmlTokenizer.class.php.
References $state, $tag, and skipString().
Referenced by outsideTagState().
HtmlTokenizer::commentState | ( | ) | [private] |
Definition at line 1164 of file HtmlTokenizer.class.php.
References SgmlIgnoredTag::comment(), Cdata::create(), getComment(), Assert::isNull(), and makeTag().
Referenced by handleState().
static HtmlTokenizer::create | ( | InputStream $ | stream | ) | [static] |
Definition at line 83 of file HtmlTokenizer.class.php.
Referenced by cdataState(), dumpBuffer(), and dumpEndTag().
HtmlTokenizer::createOpenTag | ( | ) | [private] |
Definition at line 523 of file HtmlTokenizer.class.php.
References SgmlOpenTag::create(), error(), lowercaseTags(), and setupTag().
Referenced by startTagState().
HtmlTokenizer::doctypeTagState | ( | ) | [private] |
Definition at line 1216 of file HtmlTokenizer.class.php.
References Cdata::create(), error(), getContentToSubstring(), Assert::isTrue(), and makeTag().
Referenced by handleState().
HtmlTokenizer::dumpAttribute | ( | ) | [private] |
Definition at line 778 of file HtmlTokenizer.class.php.
References error(), and warning().
Referenced by attrNameState(), attrValueState(), and waitingEqualSignState().
HtmlTokenizer::dumpBuffer | ( | ) | [private] |
Definition at line 414 of file HtmlTokenizer.class.php.
References create(), and makeTag().
Referenced by inlineTagState(), and outsideTagState().
HtmlTokenizer::dumpEndTag | ( | ) | [private] |
Definition at line 622 of file HtmlTokenizer.class.php.
References create(), error(), lowercaseTags(), makeTag(), and warning().
Referenced by endTagState().
HtmlTokenizer::endTagState | ( | ) | [private] |
Definition at line 644 of file HtmlTokenizer.class.php.
References dumpEndTag(), error(), getNextChar(), Assert::isNull(), and Assert::isTrue().
Referenced by handleState().
HtmlTokenizer::error | ( | $ | message | ) | [private] |
Definition at line 1326 of file HtmlTokenizer.class.php.
Referenced by attrNameState(), attrValueState(), cdataState(), createOpenTag(), doctypeTagState(), dumpAttribute(), dumpEndTag(), endTagState(), externalTagState(), getComment(), inlineTagState(), insideTagState(), startTagState(), and waitingEqualSignState().
HtmlTokenizer::externalTagState | ( | ) | [private] |
Definition at line 1183 of file HtmlTokenizer.class.php.
References Cdata::create(), error(), getContentToSubstring(), Assert::isTrue(), makeTag(), mark(), and reset().
Referenced by handleState().
HtmlTokenizer::getChars | ( | $ | count | ) | [private] |
Definition at line 234 of file HtmlTokenizer.class.php.
References getNextChar().
Referenced by skipString().
HtmlTokenizer::getComment | ( | ) | [private] |
Definition at line 1137 of file HtmlTokenizer.class.php.
References error(), getContentToSubstring(), mark(), and reset().
Referenced by commentState().
HtmlTokenizer::getContentToSubstring | ( | $ | substring, | |
$ | ignoreCase = false | |||
) | [private] |
using Knuth-Morris-Pratt algorithm.
If $substring not found, returns whole remaining content
Definition at line 1239 of file HtmlTokenizer.class.php.
References $buffer, $char, getNextChar(), and optionalLowercase().
Referenced by cdataState(), doctypeTagState(), externalTagState(), getComment(), and inlineTagState().
HtmlTokenizer::getErrors | ( | ) |
Definition at line 145 of file HtmlTokenizer.class.php.
HtmlTokenizer::getNextChar | ( | ) | [private] |
Definition at line 212 of file HtmlTokenizer.class.php.
Referenced by __construct(), attrNameState(), attrValueState(), endTagState(), getChars(), getContentToSubstring(), inlineTagState(), insideTagState(), outsideTagState(), skip(), skipString(), startTagState(), and waitingEqualSignState().
HtmlTokenizer::getTextualPosition | ( | ) | [private] |
Definition at line 1301 of file HtmlTokenizer.class.php.
HtmlTokenizer::handleState | ( | ) | [private] |
Definition at line 364 of file HtmlTokenizer.class.php.
References attrNameState(), attrValueState(), cdataState(), commentState(), doctypeTagState(), endTagState(), externalTagState(), inlineTagState(), insideTagState(), isInlineTag(), outsideTagState(), startTagState(), and waitingEqualSignState().
Referenced by nextToken().
HtmlTokenizer::inlineTagState | ( | ) | [private] |
TODO: some browsers expect cdata and parses it as well. TODO: browsers handles comments in more complex way, figure it out
Definition at line 1044 of file HtmlTokenizer.class.php.
References dumpBuffer(), error(), getContentToSubstring(), getNextChar(), Assert::isNull(), and skipString().
Referenced by handleState().
HtmlTokenizer::insideTagState | ( | ) | [private] |
Definition at line 703 of file HtmlTokenizer.class.php.
References $char, error(), getNextChar(), Assert::isNotNull(), Assert::isNull(), Assert::isTrue(), and makeTag().
Referenced by handleState().
static HtmlTokenizer::isIdChar | ( | $ | char | ) | [static] |
Definition at line 155 of file HtmlTokenizer.class.php.
References $char.
static HtmlTokenizer::isIdFirstChar | ( | $ | char | ) | [static] |
Definition at line 150 of file HtmlTokenizer.class.php.
References $char.
HtmlTokenizer::isInlineTag | ( | $ | id | ) |
Definition at line 199 of file HtmlTokenizer.class.php.
Referenced by handleState().
static HtmlTokenizer::isSpacerChar | ( | $ | char | ) | [static] |
Definition at line 170 of file HtmlTokenizer.class.php.
References $char.
static HtmlTokenizer::isValidId | ( | $ | id | ) | [static] |
Definition at line 160 of file HtmlTokenizer.class.php.
HtmlTokenizer::lookAhead | ( | $ | count | ) | [private] |
Definition at line 292 of file HtmlTokenizer.class.php.
HtmlTokenizer::lowercaseAttributes | ( | $ | isLowercaseAttributes | ) |
Definition at line 103 of file HtmlTokenizer.class.php.
References Assert::isBoolean().
HtmlTokenizer::lowercaseTags | ( | $ | isLowercaseTags | ) |
Definition at line 115 of file HtmlTokenizer.class.php.
References Assert::isBoolean().
Referenced by createOpenTag(), and dumpEndTag().
HtmlTokenizer::makeTag | ( | ) | [private] |
Definition at line 328 of file HtmlTokenizer.class.php.
References Assert::isNotNull(), Assert::isNull(), and suppressWhitespaces().
Referenced by attrNameState(), attrValueState(), cdataState(), commentState(), doctypeTagState(), dumpBuffer(), dumpEndTag(), externalTagState(), insideTagState(), startTagState(), and waitingEqualSignState().
HtmlTokenizer::mark | ( | ) | [private] |
Definition at line 252 of file HtmlTokenizer.class.php.
Referenced by attrValueState(), externalTagState(), getComment(), reset(), and skipString().
HtmlTokenizer::nextToken | ( | ) |
Definition at line 127 of file HtmlTokenizer.class.php.
References handleState().
static HtmlTokenizer::optionalLowercase | ( | $ | string, | |
$ | ignoreCase | |||
) | [static, private] |
Definition at line 204 of file HtmlTokenizer.class.php.
Referenced by getContentToSubstring().
HtmlTokenizer::outsideTagState | ( | ) | [private] |
Definition at line 447 of file HtmlTokenizer.class.php.
References checkSpecialTagState(), dumpBuffer(), getNextChar(), Assert::isNull(), Assert::isUnreachable(), and warning().
Referenced by handleState().
static HtmlTokenizer::removeWhitespaces | ( | Cdata $ | cdata | ) | [static] |
Definition at line 175 of file HtmlTokenizer.class.php.
References Cdata::getData(), and Cdata::setData().
HtmlTokenizer::reset | ( | ) | [private] |
Definition at line 267 of file HtmlTokenizer.class.php.
References Assert::isNotNull(), and mark().
Referenced by attrValueState(), externalTagState(), getComment(), and skipString().
HtmlTokenizer::setupTag | ( | SgmlTag $ | tag | ) | [private] |
Definition at line 352 of file HtmlTokenizer.class.php.
References Assert::isNotNull(), Assert::isNull(), and SgmlTag::setId().
Referenced by createOpenTag(), and startTagState().
HtmlTokenizer::skip | ( | $ | count | ) | [private] |
Definition at line 284 of file HtmlTokenizer.class.php.
References getNextChar().
HtmlTokenizer::skipString | ( | $ | string, | |
$ | skipSpaces = false | |||
) | [private] |
Definition at line 303 of file HtmlTokenizer.class.php.
References getChars(), getNextChar(), mark(), and reset().
Referenced by checkSpecialTagState(), and inlineTagState().
HtmlTokenizer::startTagState | ( | ) | [private] |
Definition at line 534 of file HtmlTokenizer.class.php.
References $char, SgmlIgnoredTag::create(), createOpenTag(), error(), getNextChar(), Assert::isNotNull(), Assert::isNull(), makeTag(), and setupTag().
Referenced by handleState().
HtmlTokenizer::suppressWhitespaces | ( | $ | isSuppressWhitespaces | ) |
Definition at line 91 of file HtmlTokenizer.class.php.
References Assert::isBoolean().
Referenced by makeTag().
HtmlTokenizer::waitingEqualSignState | ( | ) | [private] |
Definition at line 879 of file HtmlTokenizer.class.php.
References dumpAttribute(), error(), getNextChar(), Assert::isNotNull(), Assert::isNull(), Assert::isTrue(), and makeTag().
Referenced by handleState().
HtmlTokenizer::warning | ( | $ | message | ) | [private] |
Definition at line 1315 of file HtmlTokenizer.class.php.
Referenced by attrValueState(), dumpAttribute(), dumpEndTag(), and outsideTagState().
HtmlTokenizer::$attrName = null [private] |
Definition at line 63 of file HtmlTokenizer.class.php.
HtmlTokenizer::$attrValue = null [private] |
Definition at line 64 of file HtmlTokenizer.class.php.
HtmlTokenizer::$buffer = null [private] |
Definition at line 55 of file HtmlTokenizer.class.php.
Referenced by getContentToSubstring().
HtmlTokenizer::$char = null [private] |
Definition at line 41 of file HtmlTokenizer.class.php.
Referenced by attrNameState(), getContentToSubstring(), insideTagState(), isIdChar(), isIdFirstChar(), isSpacerChar(), and startTagState().
HtmlTokenizer::$completeTag = null [private] |
Definition at line 60 of file HtmlTokenizer.class.php.
HtmlTokenizer::$errors = array() [private] |
Definition at line 53 of file HtmlTokenizer.class.php.
HtmlTokenizer::$inlineTags = array('style', 'script', 'textarea') [private] |
Definition at line 37 of file HtmlTokenizer.class.php.
HtmlTokenizer::$insideQuote = null [private] |
Definition at line 65 of file HtmlTokenizer.class.php.
HtmlTokenizer::$line = 1 [private] |
Definition at line 44 of file HtmlTokenizer.class.php.
HtmlTokenizer::$linePosition = 1 [private] |
Definition at line 45 of file HtmlTokenizer.class.php.
HtmlTokenizer::$lowercaseAttributes = false [private] |
Definition at line 70 of file HtmlTokenizer.class.php.
HtmlTokenizer::$lowercaseTags = false [private] |
Definition at line 71 of file HtmlTokenizer.class.php.
HtmlTokenizer::$mark = null [private] |
Definition at line 48 of file HtmlTokenizer.class.php.
HtmlTokenizer::$previousChar = null [private] |
Definition at line 46 of file HtmlTokenizer.class.php.
HtmlTokenizer::$previousTag = null [private] |
Definition at line 61 of file HtmlTokenizer.class.php.
HtmlTokenizer::$state = self::INITIAL_STATE [private] |
Definition at line 50 of file HtmlTokenizer.class.php.
Referenced by checkSpecialTagState().
HtmlTokenizer::$stream = null [private] |
Definition at line 39 of file HtmlTokenizer.class.php.
HtmlTokenizer::$substringFound = false [private] |
Definition at line 67 of file HtmlTokenizer.class.php.
HtmlTokenizer::$suppressWhitespaces = false [private] |
Definition at line 69 of file HtmlTokenizer.class.php.
HtmlTokenizer::$tag = null [private] |
Definition at line 59 of file HtmlTokenizer.class.php.
Referenced by checkSpecialTagState().
HtmlTokenizer::$tagId = null [private] |
Definition at line 57 of file HtmlTokenizer.class.php.
HtmlTokenizer::$tags = array() [private] |
Definition at line 52 of file HtmlTokenizer.class.php.
const HtmlTokenizer::ATTR_NAME_STATE = 5 |
Definition at line 21 of file HtmlTokenizer.class.php.
const HtmlTokenizer::ATTR_VALUE_STATE = 7 |
Definition at line 23 of file HtmlTokenizer.class.php.
const HtmlTokenizer::CDATA_STATE = 8 |
Definition at line 25 of file HtmlTokenizer.class.php.
const HtmlTokenizer::COMMENT_STATE = 9 |
Definition at line 26 of file HtmlTokenizer.class.php.
const HtmlTokenizer::DOCTYPE_TAG_STATE = 12 |
Definition at line 29 of file HtmlTokenizer.class.php.
const HtmlTokenizer::END_TAG_STATE = 3 |
Definition at line 19 of file HtmlTokenizer.class.php.
const HtmlTokenizer::EXTERNAL_TAG_STATE = 11 |
Definition at line 28 of file HtmlTokenizer.class.php.
const HtmlTokenizer::FINAL_STATE = 42 |
Definition at line 31 of file HtmlTokenizer.class.php.
const HtmlTokenizer::ID_CHAR_MASK = '[-_:.A-Za-z0-9]' |
Definition at line 35 of file HtmlTokenizer.class.php.
const HtmlTokenizer::ID_FIRST_CHAR_MASK = '[A-Za-z]' |
Definition at line 34 of file HtmlTokenizer.class.php.
const HtmlTokenizer::INITIAL_STATE = 1 |
Definition at line 17 of file HtmlTokenizer.class.php.
const HtmlTokenizer::INLINE_TAG_STATE = 10 |
Definition at line 27 of file HtmlTokenizer.class.php.
const HtmlTokenizer::INSIDE_TAG_STATE = 4 |
Definition at line 20 of file HtmlTokenizer.class.php.
const HtmlTokenizer::SPACER_MASK = '[ \r\n\t]' |
Definition at line 33 of file HtmlTokenizer.class.php.
const HtmlTokenizer::START_TAG_STATE = 2 |
Definition at line 18 of file HtmlTokenizer.class.php.
const HtmlTokenizer::WAITING_EQUAL_SIGN_STATE = 6 |
Definition at line 22 of file HtmlTokenizer.class.php.