From f45a286b8d62f710b519a98c7d4b75a0c34d5d10 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 22 Jun 2009 13:56:49 +0400 Subject: strip_tags_long: use htmlpurifier to properly reformat html content --- lib/htmlpurifier/library/HTMLPurifier/Config.php | 497 +++++++++++++++++++++++ 1 file changed, 497 insertions(+) create mode 100755 lib/htmlpurifier/library/HTMLPurifier/Config.php (limited to 'lib/htmlpurifier/library/HTMLPurifier/Config.php') diff --git a/lib/htmlpurifier/library/HTMLPurifier/Config.php b/lib/htmlpurifier/library/HTMLPurifier/Config.php new file mode 100755 index 000000000..f8e1f7804 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/Config.php @@ -0,0 +1,497 @@ +plist = new HTMLPurifier_PropertyList($definition->defaultPlist); + $this->def = $definition; // keep a copy around for checking + $this->parser = new HTMLPurifier_VarParser_Flexible(); + } + + /** + * Convenience constructor that creates a config object based on a mixed var + * @param mixed $config Variable that defines the state of the config + * object. Can be: a HTMLPurifier_Config() object, + * an array of directives based on loadArray(), + * or a string filename of an ini file. + * @param HTMLPurifier_ConfigSchema Schema object + * @return Configured HTMLPurifier_Config object + */ + public static function create($config, $schema = null) { + if ($config instanceof HTMLPurifier_Config) { + // pass-through + return $config; + } + if (!$schema) { + $ret = HTMLPurifier_Config::createDefault(); + } else { + $ret = new HTMLPurifier_Config($schema); + } + if (is_string($config)) $ret->loadIni($config); + elseif (is_array($config)) $ret->loadArray($config); + return $ret; + } + + /** + * Convenience constructor that creates a default configuration object. + * @return Default HTMLPurifier_Config object. + */ + public static function createDefault() { + $definition = HTMLPurifier_ConfigSchema::instance(); + $config = new HTMLPurifier_Config($definition); + return $config; + } + + /** + * Retreives a value from the configuration. + * @param $namespace String namespace + * @param $key String key + */ + public function get($namespace, $key) { + if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true); + if (!isset($this->def->info[$namespace][$key])) { + // can't add % due to SimpleTest bug + trigger_error('Cannot retrieve value of undefined directive ' . htmlspecialchars("$namespace.$key"), + E_USER_WARNING); + return; + } + if (isset($this->def->info[$namespace][$key]->isAlias)) { + $d = $this->def->info[$namespace][$key]; + trigger_error('Cannot get value from aliased directive, use real name ' . $d->namespace . '.' . $d->name, + E_USER_ERROR); + return; + } + return $this->plist->get("$namespace.$key"); + } + + /** + * Retreives an array of directives to values from a given namespace + * @param $namespace String namespace + */ + public function getBatch($namespace) { + if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true); + if (!isset($this->def->info[$namespace])) { + trigger_error('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace), + E_USER_WARNING); + return; + } + $full = $this->getAll(); + return $full[$namespace]; + } + + /** + * Returns a md5 signature of a segment of the configuration object + * that uniquely identifies that particular configuration + * @note Revision is handled specially and is removed from the batch + * before processing! + * @param $namespace Namespace to get serial for + */ + public function getBatchSerial($namespace) { + if (empty($this->serials[$namespace])) { + $batch = $this->getBatch($namespace); + unset($batch['DefinitionRev']); + $this->serials[$namespace] = md5(serialize($batch)); + } + return $this->serials[$namespace]; + } + + /** + * Returns a md5 signature for the entire configuration object + * that uniquely identifies that particular configuration + */ + public function getSerial() { + if (empty($this->serial)) { + $this->serial = md5(serialize($this->getAll())); + } + return $this->serial; + } + + /** + * Retrieves all directives, organized by namespace + */ + public function getAll() { + if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true); + $ret = array(); + foreach ($this->plist->squash() as $name => $value) { + list($ns, $key) = explode('.', $name, 2); + $ret[$ns][$key] = $value; + } + return $ret; + } + + /** + * Sets a value to configuration. + * @param $namespace String namespace + * @param $key String key + * @param $value Mixed value + */ + public function set($namespace, $key, $value, $from_alias = false) { + if ($this->isFinalized('Cannot set directive after finalization')) return; + if (!isset($this->def->info[$namespace][$key])) { + trigger_error('Cannot set undefined directive ' . htmlspecialchars("$namespace.$key") . ' to value', + E_USER_WARNING); + return; + } + $def = $this->def->info[$namespace][$key]; + + if (isset($def->isAlias)) { + if ($from_alias) { + trigger_error('Double-aliases not allowed, please fix '. + 'ConfigSchema bug with' . "$namespace.$key", E_USER_ERROR); + return; + } + $this->set($new_ns = $def->namespace, + $new_dir = $def->name, + $value, true); + trigger_error("$namespace.$key is an alias, preferred directive name is $new_ns.$new_dir", E_USER_NOTICE); + return; + } + + // Raw type might be negative when using the fully optimized form + // of stdclass, which indicates allow_null == true + $rtype = is_int($def) ? $def : $def->type; + if ($rtype < 0) { + $type = -$rtype; + $allow_null = true; + } else { + $type = $rtype; + $allow_null = isset($def->allow_null); + } + + try { + $value = $this->parser->parse($value, $type, $allow_null); + } catch (HTMLPurifier_VarParserException $e) { + trigger_error('Value for ' . "$namespace.$key" . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING); + return; + } + if (is_string($value) && is_object($def)) { + // resolve value alias if defined + if (isset($def->aliases[$value])) { + $value = $def->aliases[$value]; + } + // check to see if the value is allowed + if (isset($def->allowed) && !isset($def->allowed[$value])) { + trigger_error('Value not supported, valid values are: ' . + $this->_listify($def->allowed), E_USER_WARNING); + return; + } + } + $this->plist->set("$namespace.$key", $value); + + // reset definitions if the directives they depend on changed + // this is a very costly process, so it's discouraged + // with finalization + if ($namespace == 'HTML' || $namespace == 'CSS') { + $this->definitions[$namespace] = null; + } + + $this->serials[$namespace] = false; + } + + /** + * Convenience function for error reporting + */ + private function _listify($lookup) { + $list = array(); + foreach ($lookup as $name => $b) $list[] = $name; + return implode(', ', $list); + } + + /** + * Retrieves object reference to the HTML definition. + * @param $raw Return a copy that has not been setup yet. Must be + * called before it's been setup, otherwise won't work. + */ + public function getHTMLDefinition($raw = false) { + return $this->getDefinition('HTML', $raw); + } + + /** + * Retrieves object reference to the CSS definition + * @param $raw Return a copy that has not been setup yet. Must be + * called before it's been setup, otherwise won't work. + */ + public function getCSSDefinition($raw = false) { + return $this->getDefinition('CSS', $raw); + } + + /** + * Retrieves a definition + * @param $type Type of definition: HTML, CSS, etc + * @param $raw Whether or not definition should be returned raw + */ + public function getDefinition($type, $raw = false) { + if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true); + $factory = HTMLPurifier_DefinitionCacheFactory::instance(); + $cache = $factory->create($type, $this); + if (!$raw) { + // see if we can quickly supply a definition + if (!empty($this->definitions[$type])) { + if (!$this->definitions[$type]->setup) { + $this->definitions[$type]->setup($this); + $cache->set($this->definitions[$type], $this); + } + return $this->definitions[$type]; + } + // memory check missed, try cache + $this->definitions[$type] = $cache->get($this); + if ($this->definitions[$type]) { + // definition in cache, return it + return $this->definitions[$type]; + } + } elseif ( + !empty($this->definitions[$type]) && + !$this->definitions[$type]->setup + ) { + // raw requested, raw in memory, quick return + return $this->definitions[$type]; + } + // quick checks failed, let's create the object + if ($type == 'HTML') { + $this->definitions[$type] = new HTMLPurifier_HTMLDefinition(); + } elseif ($type == 'CSS') { + $this->definitions[$type] = new HTMLPurifier_CSSDefinition(); + } elseif ($type == 'URI') { + $this->definitions[$type] = new HTMLPurifier_URIDefinition(); + } else { + throw new HTMLPurifier_Exception("Definition of $type type not supported"); + } + // quick abort if raw + if ($raw) { + if (is_null($this->get($type, 'DefinitionID'))) { + // fatally error out if definition ID not set + throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID"); + } + return $this->definitions[$type]; + } + // set it up + $this->definitions[$type]->setup($this); + // save in cache + $cache->set($this->definitions[$type], $this); + return $this->definitions[$type]; + } + + /** + * Loads configuration values from an array with the following structure: + * Namespace.Directive => Value + * @param $config_array Configuration associative array + */ + public function loadArray($config_array) { + if ($this->isFinalized('Cannot load directives after finalization')) return; + foreach ($config_array as $key => $value) { + $key = str_replace('_', '.', $key); + if (strpos($key, '.') !== false) { + // condensed form + list($namespace, $directive) = explode('.', $key); + $this->set($namespace, $directive, $value); + } else { + $namespace = $key; + $namespace_values = $value; + foreach ($namespace_values as $directive => $value) { + $this->set($namespace, $directive, $value); + } + } + } + } + + /** + * Returns a list of array(namespace, directive) for all directives + * that are allowed in a web-form context as per an allowed + * namespaces/directives list. + * @param $allowed List of allowed namespaces/directives + */ + public static function getAllowedDirectivesForForm($allowed, $schema = null) { + if (!$schema) { + $schema = HTMLPurifier_ConfigSchema::instance(); + } + if ($allowed !== true) { + if (is_string($allowed)) $allowed = array($allowed); + $allowed_ns = array(); + $allowed_directives = array(); + $blacklisted_directives = array(); + foreach ($allowed as $ns_or_directive) { + if (strpos($ns_or_directive, '.') !== false) { + // directive + if ($ns_or_directive[0] == '-') { + $blacklisted_directives[substr($ns_or_directive, 1)] = true; + } else { + $allowed_directives[$ns_or_directive] = true; + } + } else { + // namespace + $allowed_ns[$ns_or_directive] = true; + } + } + } + $ret = array(); + foreach ($schema->info as $ns => $keypairs) { + foreach ($keypairs as $directive => $def) { + if ($allowed !== true) { + if (isset($blacklisted_directives["$ns.$directive"])) continue; + if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue; + } + if (isset($def->isAlias)) continue; + if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue; + $ret[] = array($ns, $directive); + } + } + return $ret; + } + + /** + * Loads configuration values from $_GET/$_POST that were posted + * via ConfigForm + * @param $array $_GET or $_POST array to import + * @param $index Index/name that the config variables are in + * @param $allowed List of allowed namespaces/directives + * @param $mq_fix Boolean whether or not to enable magic quotes fix + * @param $schema Instance of HTMLPurifier_ConfigSchema to use, if not global copy + */ + public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) { + $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema); + $config = HTMLPurifier_Config::create($ret, $schema); + return $config; + } + + /** + * Merges in configuration values from $_GET/$_POST to object. NOT STATIC. + * @note Same parameters as loadArrayFromForm + */ + public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true) { + $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def); + $this->loadArray($ret); + } + + /** + * Prepares an array from a form into something usable for the more + * strict parts of HTMLPurifier_Config + */ + public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) { + if ($index !== false) $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array(); + $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc(); + + $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema); + $ret = array(); + foreach ($allowed as $key) { + list($ns, $directive) = $key; + $skey = "$ns.$directive"; + if (!empty($array["Null_$skey"])) { + $ret[$ns][$directive] = null; + continue; + } + if (!isset($array[$skey])) continue; + $value = $mq ? stripslashes($array[$skey]) : $array[$skey]; + $ret[$ns][$directive] = $value; + } + return $ret; + } + + /** + * Loads configuration values from an ini file + * @param $filename Name of ini file + */ + public function loadIni($filename) { + if ($this->isFinalized('Cannot load directives after finalization')) return; + $array = parse_ini_file($filename, true); + $this->loadArray($array); + } + + /** + * Checks whether or not the configuration object is finalized. + * @param $error String error message, or false for no error + */ + public function isFinalized($error = false) { + if ($this->finalized && $error) { + trigger_error($error, E_USER_ERROR); + } + return $this->finalized; + } + + /** + * Finalizes configuration only if auto finalize is on and not + * already finalized + */ + public function autoFinalize() { + if (!$this->finalized && $this->autoFinalize) $this->finalize(); + } + + /** + * Finalizes a configuration object, prohibiting further change + */ + public function finalize() { + $this->finalized = true; + } + +} + +// vim: et sw=4 sts=4 -- cgit v1.2.3