SafeUtf8Filter.class.php

Go to the documentation of this file.
00001 <?php
00002 /***************************************************************************
00003  *   Copyright (C) 2007 by Ivan Y. Khvostishkov                            *
00004  *                                                                         *
00005  *   This program is free software; you can redistribute it and/or modify  *
00006  *   it under the terms of the GNU Lesser General Public License as        *
00007  *   published by the Free Software Foundation; either version 3 of the    *
00008  *   License, or (at your option) any later version.                       *
00009  *                                                                         *
00010  ***************************************************************************/
00011 
00015     final class SafeUtf8Filter extends BaseFilter
00016     {
00020         public static function me()
00021         {
00022             return Singleton::getInstance(__CLASS__);
00023         }
00024 
00025         public function apply($value)
00026         {
00027             $matches = null;
00028             
00029             // voodoo magic from w3 validator
00030             preg_match_all(
00031                 '/[\x00-\x7F]                         ' # ASCII
00032                 .'| [\xC2-\xDF]        [\x80-\xBF]    ' # non-overlong 2-byte sequences
00033                 .'|  \xE0[\xA0-\xBF]   [\x80-\xBF]    ' # excluding overlongs
00034                 .'| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} ' # straight 3-byte sequences
00035                 .'|  \xED[\x80-\x9F]   [\x80-\xBF]    ' # excluding surrogates
00036                 .'|  \xF0[\x90-\xBF]   [\x80-\xBF]{2} ' # planes 1-3
00037                 .'| [\xF1-\xF3]        [\x80-\xBF]{3} ' # planes 4-15
00038                 .'|  \xF4[\x80-\x8F][\x80-\xBF]{2}    ' # plane 16
00039                 .'/x',
00040                 $value,
00041                 $matches
00042             );
00043 
00044             if (!isset($matches[0]))
00045                 return null;
00046             else
00047                 return implode(null, $matches[0]);
00048         }
00049     }
00050 ?>