Go to the documentation of this file.00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00015 final class OqlTokenizer
00016 {
00017 private $tokens = array();
00018 private $tokensCount = 0;
00019 private $token = null;
00020 private $prevToken = null;
00021 private $index = -1;
00022
00023 private static $masks = array(
00024 OqlToken::NEW_LINE =>
00025 '\n',
00026
00027
00028 OqlToken::STRING =>
00029 '"[^"\\\]*(?:\\\.[^"\\\]*)*"|\'[^\'\\\]*(?:\\\.[^\'\\\]*)*\'|`[^`\\\]*(?:\\\.[^`\\\]*)*`',
00030
00031
00032 OqlToken::NUMBER =>
00033 '(?:\b[\d]+)?\.?[\d]+(?:[eE][-+]?[\d]+)?\b',
00034
00035
00036 OqlToken::BOOLEAN =>
00037 '\b(?:true|false)\b',
00038
00039 OqlToken::NULL =>
00040 '\bnull\b',
00041
00042
00043 OqlToken::SUBSTITUTION =>
00044 '\$[\d]+',
00045
00046
00047 OqlToken::KEYWORD =>
00048 '\b(?:as|distinct|from|where|not|and|or|in|like|ilike|similar\s+to|between|is|group\s+by|order\s+by|asc|desc|having|limit|offset)\b',
00049
00050
00051 OqlToken::AGGREGATE_FUNCTION =>
00052 '\b(?:sum|avg|min|max|count)\b',
00053
00054
00055 OqlToken::IDENTIFIER =>
00056 '\b[a-zA-Z_][a-zA-Z\d_]*(?:\.[a-zA-Z_][a-zA-Z\d_]+)*\b',
00057
00058
00059 OqlToken::PARENTHESES =>
00060 '[\(\)]',
00061
00062
00063 OqlToken::PUNCTUATION =>
00064 ',',
00065
00066
00067 OqlToken::COMPARISON_OPERATOR =>
00068 '>\=|<\=|<>|>|<|\!\=|\=',
00069
00070
00071 OqlToken::ARITHMETIC_OPERATOR =>
00072 '\+|\-|\/|\*'
00073 );
00074
00075 public function __construct($string)
00076 {
00077 $this->tokenize($string);
00078 }
00079
00080 public function getList()
00081 {
00082 return $this->tokens;
00083 }
00084
00085 public function getLine()
00086 {
00087 $token = $this->token;
00088 if (!$token)
00089 $token = $this->prevToken;
00090
00091 return $token ? $token->getLine() : null;
00092 }
00093
00094 public function getPosition()
00095 {
00096 $token = $this->token;
00097 if (!$token)
00098 $token = $this->prevToken;
00099
00100 return $token ? $token->getPosition() : null;
00101 }
00102
00103 public function getIndex()
00104 {
00105 return $this->index;
00106 }
00107
00111 public function setIndex($index)
00112 {
00113 if ($index > $this->tokensCount - 1) {
00114 $index = $this->tokensCount - 1;
00115
00116 } elseif ($index < -1) {
00117 $index = -1;
00118 }
00119
00120 $this->index = $index;
00121 $this->token = $this->getByIndex($this->index);
00122 $this->prevToken = $this->getByIndex($this->index - 1);
00123
00124 return $this;
00125 }
00126
00130 public function get()
00131 {
00132 return $this->token;
00133 }
00134
00138 public function next()
00139 {
00140 $this->setIndex($this->index + 1);
00141
00142 return $this->token;
00143 }
00144
00148 public function back()
00149 {
00150 $this->setIndex($this->index - 1);
00151
00152 return $this->token;
00153 }
00154
00158 public function peek()
00159 {
00160 if ($this->token)
00161 $this->prevToken = $this->token;
00162
00163 return $this->token = $this->getByIndex($this->index + 1);
00164 }
00165
00169 private function getByIndex($index)
00170 {
00171 return isset($this->tokens[$index]) ? $this->tokens[$index] : null;
00172 }
00173
00177 private function tokenize($string)
00178 {
00179 Assert::isString($string);
00180
00181 $maxMultibyteDelta = strlen($string) - mb_strlen($string);
00182 $isMultibyte = $maxMultibyteDelta > 0;
00183
00184 $pattern = '/('.implode(')|(', self::$masks).')/is';
00185 if ($isMultibyte)
00186 $pattern .= 'u';
00187
00188 preg_match_all(
00189 $pattern,
00190 $string,
00191 $matches,
00192 PREG_SET_ORDER | PREG_OFFSET_CAPTURE
00193 );
00194
00195 $line = 1;
00196 $lineStart = 0;
00197 $multibyteDelta = 0;
00198
00199 foreach ($matches as $match) {
00200 $type = count($match) - 1;
00201 $offset = $match[0][1] - $multibyteDelta;
00202
00203 if ($type == OqlToken::NEW_LINE) {
00204 $line++;
00205 $lineStart = $offset + 1;
00206 continue;
00207 }
00208
00209 $value = $match[0][0];
00210 $position = $offset - $lineStart;
00211
00212 $this->tokens[] =
00213 OqlToken::make(
00214 $this->importTokenValue($value, $type),
00215 $value,
00216 $type,
00217 $line,
00218 $position
00219 );
00220
00221 if (
00222 $type == OqlToken::KEYWORD
00223 && ($pos = strpos($value, "\n")) !== false
00224 ) {
00225 $line++;
00226 $lineStart = $offset + $pos + 1;
00227 }
00228
00229 if ($isMultibyte && $type == OqlToken::STRING) {
00230 $multibyteDelta += (strlen($value) - mb_strlen($value));
00231
00232 if ($multibyteDelta >= $maxMultibyteDelta)
00233 $isMultibyte = false;
00234 }
00235 }
00236
00237 $this->tokensCount = count($this->tokens);
00238
00239 return $this;
00240 }
00241
00242 private static function importTokenValue($value, $type)
00243 {
00244 switch ($type) {
00245 case OqlToken::STRING:
00246 $quote = mb_substr($value, 0, 1);
00247
00248 return mb_ereg_replace(
00249 '\\\\'.$quote,
00250 $quote,
00251 mb_substr($value, 1, mb_strlen($value) - 2)
00252 );
00253
00254 case OqlToken::NUMBER:
00255 return floatval($value);
00256
00257 case OqlToken::BOOLEAN:
00258 return strtolower($value) != 'false';
00259
00260 case OqlToken::NULL:
00261 return 'null';
00262
00263 case OqlToken::AGGREGATE_FUNCTION:
00264 return strtolower($value);
00265
00266 case OqlToken::SUBSTITUTION:
00267 return intval(substr($value, 1));
00268
00269 case OqlToken::KEYWORD:
00270 return strtolower(
00271 preg_replace('/\s+/', ' ', $value)
00272 );
00273
00274 case OqlToken::COMPARISON_OPERATOR:
00275 return $value == '<>' ? BinaryExpression::NOT_EQUALS : $value;
00276 }
00277
00278 return $value;
00279 }
00280 }
00281 ?>