Go to the documentation of this file.00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00017 final class TidyValidator
00018 {
00019 private $content = null;
00020 private $messages = null;
00021 private $errorCount = null;
00022 private $warningCount = null;
00023
00024 private $config = array(
00025 'output-xhtml' => true,
00026 'doctype' => 'strict',
00027 'wrap' => 0,
00028 'quote-marks' => true,
00029 'drop-empty-paras' => true
00030 );
00031
00032 private $header = '
00033 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
00034 <html xmlns="http://www.w3.org/1999/xhtml">
00035 <head>
00036 <title></title>
00037 </head>
00038 <body>';
00039
00040 private $headerLines = 7;
00041
00042 private $encoding = 'utf8';
00043
00047 public static function create()
00048 {
00049 return new self;
00050 }
00051
00060 public function setContent($content)
00061 {
00062 $this->content = $content;
00063
00064 return $this;
00065 }
00066
00067 public function getContent()
00068 {
00069 return $this->content;
00070 }
00071
00072 public function getMessages()
00073 {
00074 return $this->messages;
00075 }
00076
00085 public function setConfig($config)
00086 {
00087 $this->config = $config;
00088
00089 return $this;
00090 }
00091
00092 public function getConfig()
00093 {
00094 return $this->config;
00095 }
00096
00103 public function setHeader($header)
00104 {
00105 $this->header = $header;
00106 $this->headerLines = count(explode("\n", $header));
00107
00108 return $this;
00109 }
00110
00111 public function getHeader()
00112 {
00113 return $this->header;
00114 }
00115
00124 public function setEncoding($encoding)
00125 {
00126 $this->encoding = $encoding;
00127
00128 return $this;
00129 }
00130
00131 public function getEncoding()
00132 {
00133 return $this->encoding;
00134 }
00135
00136 public function getErrorCount()
00137 {
00138 return $this->errorCount;
00139 }
00140
00141 public function getWarningCount()
00142 {
00143 return $this->warningCount;
00144 }
00145
00165 public function validateContent($content = null)
00166 {
00167 static $symbols = array(
00168 '…' => '…',
00169 '™' => '™',
00170 '©' => '©',
00171 '№' => '№',
00172 '—' => '—',
00173 '–' => '—',
00174 '«' => '«',
00175 '»' => '»',
00176 '„' => '„',
00177 '“' => '“',
00178 '•' => '•',
00179 '®' => '®',
00180 '¼' => '¼',
00181 '½' => '½',
00182 '¾' => '¾',
00183 '±' => '±'
00184 );
00185
00186 if ($content) {
00187 $this->setContent($content);
00188 } elseif (!$this->getContent()) {
00189 return $this;
00190 }
00191
00192 $tidy = tidy_parse_string(
00193 $this->getHeader()."\n".$this->getContent()."\n</body></html>",
00194 $this->getConfig(),
00195 $this->getEncoding()
00196 );
00197
00198 $this->errorCount = tidy_error_count($tidy);
00199 $this->warningCount = tidy_warning_count($tidy);
00200
00201 $rawMessages = tidy_get_error_buffer($tidy);
00202 $out = null;
00203
00204 if (!empty($rawMessages)) {
00205 $errorStrings =
00206 explode(
00207 "\n",
00208 htmlspecialchars($rawMessages)
00209 );
00210
00211 foreach ($errorStrings as $string) {
00212 list (, $num, , $rest) =
00213 explode(' ', $string, 4);
00214
00215 $out .=
00216 (
00217 $out == null
00218 ? null
00219 : "\n"
00220 )
00221 .'line '
00222 .($num - ($this->headerLines))
00223 .' column '.$rest;
00224 }
00225 }
00226
00227 $tidy->cleanRepair();
00228
00229 $outContent = array();
00230
00231 preg_match_all('/<body>(.*)<\/body>/s', $tidy, $outContent);
00232
00233 Assert::isTrue(isset($outContent[1][0]));
00234
00235 $outContent[1][0] = strtr($outContent[1][0], $symbols);
00236
00237 $crcBefore = crc32(
00238 preg_replace('/[\t\n\r\0 ]/', null, $this->getContent())
00239 );
00240 $crcAfter = crc32(
00241 preg_replace('/[\t\n\r\0 ]/', null, $outContent[1][0])
00242 );
00243
00244 if ($crcBefore != $crcAfter) {
00245 if (
00246 (
00247 $this->countTags('<[\t ]*p[\t ]*>', $this->getContent())
00248 != $this->countTags('<[\t ]*p[\t ]*>', $outContent[1][0])
00249 ) || (
00250 $this->countTags(
00251 '<[\t ]*\/[\t ]*p[\t ]*>',
00252 $this->getContent()
00253 )
00254 != $this->countTags(
00255 '<[\t ]*\/[\t ]*p[\t ]*>',
00256 $outContent[1][0]
00257 )
00258 )
00259 ) {
00260 $out =
00261 (
00262 $out == null
00263 ? null
00264 : $out."\n\n"
00265 )
00266 .'Paragraphs have been changed, please review content';
00267 } else
00268 if (!$out) {
00269 $out = 'Content has been changed, please review';
00270 }
00271 }
00272
00273 $this->messages = $out;
00274 $this->content = $outContent[1][0];
00275
00276 return $this;
00277 }
00278
00279 private function countTags($tag, $text)
00280 {
00281 $matches = array();
00282
00283 if (preg_match_all("/$tag/i", $text, $matches))
00284 return count($matches[0]);
00285
00286 return 0;
00287 }
00288 }
00289 ?>