From f4f0f80d2118437e5047ba266f92d7acb3c38fb7 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 11 Apr 2011 16:41:01 +0400 Subject: update HTMLPurifier; enable embedded flash video in articles --- lib/htmlpurifier/library/HTMLPurifier/Config.php | 384 ++++++++++++++++++----- 1 file changed, 298 insertions(+), 86 deletions(-) mode change 100755 => 100644 lib/htmlpurifier/library/HTMLPurifier/Config.php (limited to 'lib/htmlpurifier/library/HTMLPurifier/Config.php') diff --git a/lib/htmlpurifier/library/HTMLPurifier/Config.php b/lib/htmlpurifier/library/HTMLPurifier/Config.php old mode 100755 new mode 100644 index f8e1f7804..b6551398f --- a/lib/htmlpurifier/library/HTMLPurifier/Config.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Config.php @@ -20,7 +20,7 @@ class HTMLPurifier_Config /** * HTML Purifier's version */ - public $version = '3.3.0'; + public $version = '4.3.0'; /** * Bool indicator whether or not to automatically finalize @@ -68,12 +68,31 @@ class HTMLPurifier_Config */ protected $plist; + /** + * Whether or not a set is taking place due to an + * alias lookup. + */ + private $aliasMode; + + /** + * Set to false if you do not want line and file numbers in errors + * (useful when unit testing). This will also compress some errors + * and exceptions. + */ + public $chatty = true; + + /** + * Current lock; only gets to this namespace are allowed. + */ + private $lock; + /** * @param $definition HTMLPurifier_ConfigSchema that defines what directives * are allowed. */ - public function __construct($definition) { - $this->plist = new HTMLPurifier_PropertyList($definition->defaultPlist); + public function __construct($definition, $parent = null) { + $parent = $parent ? $parent : $definition->defaultPlist; + $this->plist = new HTMLPurifier_PropertyList($parent); $this->def = $definition; // keep a copy around for checking $this->parser = new HTMLPurifier_VarParser_Flexible(); } @@ -102,6 +121,16 @@ class HTMLPurifier_Config return $ret; } + /** + * Creates a new config object that inherits from a previous one. + * @param HTMLPurifier_Config $config Configuration object to inherit + * from. + * @return HTMLPurifier_Config object with $config as its parent. + */ + public static function inherit(HTMLPurifier_Config $config) { + return new HTMLPurifier_Config($config->def, $config->plist); + } + /** * Convenience constructor that creates a default configuration object. * @return Default HTMLPurifier_Config object. @@ -114,24 +143,34 @@ class HTMLPurifier_Config /** * Retreives a value from the configuration. - * @param $namespace String namespace * @param $key String key */ - public function get($namespace, $key) { - if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true); - if (!isset($this->def->info[$namespace][$key])) { + public function get($key, $a = null) { + if ($a !== null) { + $this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING); + $key = "$key.$a"; + } + if (!$this->finalized) $this->autoFinalize(); + if (!isset($this->def->info[$key])) { // can't add % due to SimpleTest bug - trigger_error('Cannot retrieve value of undefined directive ' . htmlspecialchars("$namespace.$key"), + $this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key), E_USER_WARNING); return; } - if (isset($this->def->info[$namespace][$key]->isAlias)) { - $d = $this->def->info[$namespace][$key]; - trigger_error('Cannot get value from aliased directive, use real name ' . $d->namespace . '.' . $d->name, + if (isset($this->def->info[$key]->isAlias)) { + $d = $this->def->info[$key]; + $this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key, E_USER_ERROR); return; } - return $this->plist->get("$namespace.$key"); + if ($this->lock) { + list($ns) = explode('.', $key); + if ($ns !== $this->lock) { + $this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR); + return; + } + } + return $this->plist->get($key); } /** @@ -139,13 +178,13 @@ class HTMLPurifier_Config * @param $namespace String namespace */ public function getBatch($namespace) { - if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true); - if (!isset($this->def->info[$namespace])) { - trigger_error('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace), + if (!$this->finalized) $this->autoFinalize(); + $full = $this->getAll(); + if (!isset($full[$namespace])) { + $this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace), E_USER_WARNING); return; } - $full = $this->getAll(); return $full[$namespace]; } @@ -178,9 +217,10 @@ class HTMLPurifier_Config /** * Retrieves all directives, organized by namespace + * @warning This is a pretty inefficient function, avoid if you can */ public function getAll() { - if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true); + if (!$this->finalized) $this->autoFinalize(); $ret = array(); foreach ($this->plist->squash() as $name => $value) { list($ns, $key) = explode('.', $name, 2); @@ -191,29 +231,37 @@ class HTMLPurifier_Config /** * Sets a value to configuration. - * @param $namespace String namespace * @param $key String key * @param $value Mixed value */ - public function set($namespace, $key, $value, $from_alias = false) { + public function set($key, $value, $a = null) { + if (strpos($key, '.') === false) { + $namespace = $key; + $directive = $value; + $value = $a; + $key = "$key.$directive"; + $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE); + } else { + list($namespace) = explode('.', $key); + } if ($this->isFinalized('Cannot set directive after finalization')) return; - if (!isset($this->def->info[$namespace][$key])) { - trigger_error('Cannot set undefined directive ' . htmlspecialchars("$namespace.$key") . ' to value', + if (!isset($this->def->info[$key])) { + $this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key) . ' to value', E_USER_WARNING); return; } - $def = $this->def->info[$namespace][$key]; + $def = $this->def->info[$key]; if (isset($def->isAlias)) { - if ($from_alias) { - trigger_error('Double-aliases not allowed, please fix '. - 'ConfigSchema bug with' . "$namespace.$key", E_USER_ERROR); + if ($this->aliasMode) { + $this->triggerError('Double-aliases not allowed, please fix '. + 'ConfigSchema bug with' . $key, E_USER_ERROR); return; } - $this->set($new_ns = $def->namespace, - $new_dir = $def->name, - $value, true); - trigger_error("$namespace.$key is an alias, preferred directive name is $new_ns.$new_dir", E_USER_NOTICE); + $this->aliasMode = true; + $this->set($def->key, $value); + $this->aliasMode = false; + $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE); return; } @@ -231,7 +279,7 @@ class HTMLPurifier_Config try { $value = $this->parser->parse($value, $type, $allow_null); } catch (HTMLPurifier_VarParserException $e) { - trigger_error('Value for ' . "$namespace.$key" . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING); + $this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING); return; } if (is_string($value) && is_object($def)) { @@ -241,17 +289,17 @@ class HTMLPurifier_Config } // check to see if the value is allowed if (isset($def->allowed) && !isset($def->allowed[$value])) { - trigger_error('Value not supported, valid values are: ' . + $this->triggerError('Value not supported, valid values are: ' . $this->_listify($def->allowed), E_USER_WARNING); return; } } - $this->plist->set("$namespace.$key", $value); + $this->plist->set($key, $value); // reset definitions if the directives they depend on changed // this is a very costly process, so it's discouraged // with finalization - if ($namespace == 'HTML' || $namespace == 'CSS') { + if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') { $this->definitions[$namespace] = null; } @@ -271,74 +319,203 @@ class HTMLPurifier_Config * Retrieves object reference to the HTML definition. * @param $raw Return a copy that has not been setup yet. Must be * called before it's been setup, otherwise won't work. + * @param $optimized If true, this method may return null, to + * indicate that a cached version of the modified + * definition object is available and no further edits + * are necessary. Consider using + * maybeGetRawHTMLDefinition, which is more explicitly + * named, instead. */ - public function getHTMLDefinition($raw = false) { - return $this->getDefinition('HTML', $raw); + public function getHTMLDefinition($raw = false, $optimized = false) { + return $this->getDefinition('HTML', $raw, $optimized); } /** * Retrieves object reference to the CSS definition * @param $raw Return a copy that has not been setup yet. Must be * called before it's been setup, otherwise won't work. + * @param $optimized If true, this method may return null, to + * indicate that a cached version of the modified + * definition object is available and no further edits + * are necessary. Consider using + * maybeGetRawCSSDefinition, which is more explicitly + * named, instead. */ - public function getCSSDefinition($raw = false) { - return $this->getDefinition('CSS', $raw); + public function getCSSDefinition($raw = false, $optimized = false) { + return $this->getDefinition('CSS', $raw, $optimized); + } + + /** + * Retrieves object reference to the URI definition + * @param $raw Return a copy that has not been setup yet. Must be + * called before it's been setup, otherwise won't work. + * @param $optimized If true, this method may return null, to + * indicate that a cached version of the modified + * definition object is available and no further edits + * are necessary. Consider using + * maybeGetRawURIDefinition, which is more explicitly + * named, instead. + */ + public function getURIDefinition($raw = false, $optimized = false) { + return $this->getDefinition('URI', $raw, $optimized); } /** * Retrieves a definition * @param $type Type of definition: HTML, CSS, etc * @param $raw Whether or not definition should be returned raw + * @param $optimized Only has an effect when $raw is true. Whether + * or not to return null if the result is already present in + * the cache. This is off by default for backwards + * compatibility reasons, but you need to do things this + * way in order to ensure that caching is done properly. + * Check out enduser-customize.html for more details. + * We probably won't ever change this default, as much as the + * maybe semantics is the "right thing to do." */ - public function getDefinition($type, $raw = false) { - if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true); + public function getDefinition($type, $raw = false, $optimized = false) { + if ($optimized && !$raw) { + throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false"); + } + if (!$this->finalized) $this->autoFinalize(); + // temporarily suspend locks, so we can handle recursive definition calls + $lock = $this->lock; + $this->lock = null; $factory = HTMLPurifier_DefinitionCacheFactory::instance(); $cache = $factory->create($type, $this); + $this->lock = $lock; if (!$raw) { - // see if we can quickly supply a definition + // full definition + // --------------- + // check if definition is in memory + if (!empty($this->definitions[$type])) { + $def = $this->definitions[$type]; + // check if the definition is setup + if ($def->setup) { + return $def; + } else { + $def->setup($this); + if ($def->optimized) $cache->add($def, $this); + return $def; + } + } + // check if definition is in cache + $def = $cache->get($this); + if ($def) { + // definition in cache, save to memory and return it + $this->definitions[$type] = $def; + return $def; + } + // initialize it + $def = $this->initDefinition($type); + // set it up + $this->lock = $type; + $def->setup($this); + $this->lock = null; + // save in cache + $cache->add($def, $this); + // return it + return $def; + } else { + // raw definition + // -------------- + // check preconditions + $def = null; + if ($optimized) { + if (is_null($this->get($type . '.DefinitionID'))) { + // fatally error out if definition ID not set + throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID"); + } + } if (!empty($this->definitions[$type])) { - if (!$this->definitions[$type]->setup) { - $this->definitions[$type]->setup($this); - $cache->set($this->definitions[$type], $this); + $def = $this->definitions[$type]; + if ($def->setup && !$optimized) { + $extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : ""; + throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra); + } + if ($def->optimized === null) { + $extra = $this->chatty ? " (try flushing your cache)" : ""; + throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra); + } + if ($def->optimized !== $optimized) { + $msg = $optimized ? "optimized" : "unoptimized"; + $extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : ""; + throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra); } - return $this->definitions[$type]; } - // memory check missed, try cache - $this->definitions[$type] = $cache->get($this); - if ($this->definitions[$type]) { - // definition in cache, return it - return $this->definitions[$type]; + // check if definition was in memory + if ($def) { + if ($def->setup) { + // invariant: $optimized === true (checked above) + return null; + } else { + return $def; + } + } + // if optimized, check if definition was in cache + // (because we do the memory check first, this formulation + // is prone to cache slamming, but I think + // guaranteeing that either /all/ of the raw + // setup code or /none/ of it is run is more important.) + if ($optimized) { + // This code path only gets run once; once we put + // something in $definitions (which is guaranteed by the + // trailing code), we always short-circuit above. + $def = $cache->get($this); + if ($def) { + // save the full definition for later, but don't + // return it yet + $this->definitions[$type] = $def; + return null; + } + } + // check invariants for creation + if (!$optimized) { + if (!is_null($this->get($type . '.DefinitionID'))) { + if ($this->chatty) { + $this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached. If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary). See Customize for more details", E_USER_WARNING); + } else { + $this->triggerError("Useless DefinitionID declaration", E_USER_WARNING); + } + } } - } elseif ( - !empty($this->definitions[$type]) && - !$this->definitions[$type]->setup - ) { - // raw requested, raw in memory, quick return - return $this->definitions[$type]; + // initialize it + $def = $this->initDefinition($type); + $def->optimized = $optimized; + return $def; } + throw new HTMLPurifier_Exception("The impossible happened!"); + } + + private function initDefinition($type) { // quick checks failed, let's create the object if ($type == 'HTML') { - $this->definitions[$type] = new HTMLPurifier_HTMLDefinition(); + $def = new HTMLPurifier_HTMLDefinition(); } elseif ($type == 'CSS') { - $this->definitions[$type] = new HTMLPurifier_CSSDefinition(); + $def = new HTMLPurifier_CSSDefinition(); } elseif ($type == 'URI') { - $this->definitions[$type] = new HTMLPurifier_URIDefinition(); + $def = new HTMLPurifier_URIDefinition(); } else { throw new HTMLPurifier_Exception("Definition of $type type not supported"); } - // quick abort if raw - if ($raw) { - if (is_null($this->get($type, 'DefinitionID'))) { - // fatally error out if definition ID not set - throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID"); - } - return $this->definitions[$type]; - } - // set it up - $this->definitions[$type]->setup($this); - // save in cache - $cache->set($this->definitions[$type], $this); - return $this->definitions[$type]; + $this->definitions[$type] = $def; + return $def; + } + + public function maybeGetRawDefinition($name) { + return $this->getDefinition($name, true, true); + } + + public function maybeGetRawHTMLDefinition() { + return $this->getDefinition('HTML', true, true); + } + + public function maybeGetRawCSSDefinition() { + return $this->getDefinition('CSS', true, true); + } + + public function maybeGetRawURIDefinition() { + return $this->getDefinition('URI', true, true); } /** @@ -351,14 +528,12 @@ class HTMLPurifier_Config foreach ($config_array as $key => $value) { $key = str_replace('_', '.', $key); if (strpos($key, '.') !== false) { - // condensed form - list($namespace, $directive) = explode('.', $key); - $this->set($namespace, $directive, $value); + $this->set($key, $value); } else { $namespace = $key; $namespace_values = $value; foreach ($namespace_values as $directive => $value) { - $this->set($namespace, $directive, $value); + $this->set($namespace .'.'. $directive, $value); } } } @@ -394,16 +569,15 @@ class HTMLPurifier_Config } } $ret = array(); - foreach ($schema->info as $ns => $keypairs) { - foreach ($keypairs as $directive => $def) { - if ($allowed !== true) { - if (isset($blacklisted_directives["$ns.$directive"])) continue; - if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue; - } - if (isset($def->isAlias)) continue; - if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue; - $ret[] = array($ns, $directive); + foreach ($schema->info as $key => $def) { + list($ns, $directive) = explode('.', $key, 2); + if ($allowed !== true) { + if (isset($blacklisted_directives["$ns.$directive"])) continue; + if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue; } + if (isset($def->isAlias)) continue; + if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue; + $ret[] = array($ns, $directive); } return $ret; } @@ -472,7 +646,7 @@ class HTMLPurifier_Config */ public function isFinalized($error = false) { if ($this->finalized && $error) { - trigger_error($error, E_USER_ERROR); + $this->triggerError($error, E_USER_ERROR); } return $this->finalized; } @@ -482,7 +656,11 @@ class HTMLPurifier_Config * already finalized */ public function autoFinalize() { - if (!$this->finalized && $this->autoFinalize) $this->finalize(); + if ($this->autoFinalize) { + $this->finalize(); + } else { + $this->plist->squash(true); + } } /** @@ -490,6 +668,40 @@ class HTMLPurifier_Config */ public function finalize() { $this->finalized = true; + unset($this->parser); + } + + /** + * Produces a nicely formatted error message by supplying the + * stack frame information OUTSIDE of HTMLPurifier_Config. + */ + protected function triggerError($msg, $no) { + // determine previous stack frame + $extra = ''; + if ($this->chatty) { + $trace = debug_backtrace(); + // zip(tail(trace), trace) -- but PHP is not Haskell har har + for ($i = 0, $c = count($trace); $i < $c - 1; $i++) { + if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') { + continue; + } + $frame = $trace[$i]; + $extra = " invoked on line {$frame['line']} in file {$frame['file']}"; + break; + } + } + trigger_error($msg . $extra, $no); + } + + /** + * Returns a serialized form of the configuration object that can + * be reconstituted. + */ + public function serialize() { + $this->getDefinition('HTML'); + $this->getDefinition('CSS'); + $this->getDefinition('URI'); + return serialize($this); } } -- cgit v1.2.3