From f45a286b8d62f710b519a98c7d4b75a0c34d5d10 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 22 Jun 2009 13:56:49 +0400 Subject: strip_tags_long: use htmlpurifier to properly reformat html content --- .../library/HTMLPurifier/AttrDef/HTML/Bool.php | 28 +++++++++ .../library/HTMLPurifier/AttrDef/HTML/Color.php | 32 ++++++++++ .../HTMLPurifier/AttrDef/HTML/FrameTarget.php | 21 +++++++ .../library/HTMLPurifier/AttrDef/HTML/ID.php | 70 ++++++++++++++++++++++ .../library/HTMLPurifier/AttrDef/HTML/Length.php | 41 +++++++++++++ .../HTMLPurifier/AttrDef/HTML/LinkTypes.php | 53 ++++++++++++++++ .../HTMLPurifier/AttrDef/HTML/MultiLength.php | 41 +++++++++++++ .../library/HTMLPurifier/AttrDef/HTML/Nmtokens.php | 48 +++++++++++++++ .../library/HTMLPurifier/AttrDef/HTML/Pixels.php | 48 +++++++++++++++ 9 files changed, 382 insertions(+) create mode 100755 lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Bool.php create mode 100755 lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php create mode 100755 lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php create mode 100755 lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php create mode 100755 lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Length.php create mode 100755 lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php create mode 100755 lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/MultiLength.php create mode 100755 lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php create mode 100755 lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Pixels.php (limited to 'lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML') diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Bool.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Bool.php new file mode 100755 index 000000000..e06987eb8 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Bool.php @@ -0,0 +1,28 @@ +name = $name;} + + public function validate($string, $config, $context) { + if (empty($string)) return false; + return $this->name; + } + + /** + * @param $string Name of attribute + */ + public function make($string) { + return new HTMLPurifier_AttrDef_HTML_Bool($string); + } + +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php new file mode 100755 index 000000000..5311a3c61 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php @@ -0,0 +1,32 @@ +get('Core', 'ColorKeywords'); + + $string = trim($string); + + if (empty($string)) return false; + if (isset($colors[$string])) return $colors[$string]; + if ($string[0] === '#') $hex = substr($string, 1); + else $hex = $string; + + $length = strlen($hex); + if ($length !== 3 && $length !== 6) return false; + if (!ctype_xdigit($hex)) return false; + if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2]; + + return "#$hex"; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php new file mode 100755 index 000000000..bd281a89f --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php @@ -0,0 +1,21 @@ +valid_values === false) $this->valid_values = $config->get('Attr', 'AllowedFrameTargets'); + return parent::validate($string, $config, $context); + } + +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php new file mode 100755 index 000000000..7c5c169c2 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php @@ -0,0 +1,70 @@ +get('Attr', 'EnableID')) return false; + + $id = trim($id); // trim it first + + if ($id === '') return false; + + $prefix = $config->get('Attr', 'IDPrefix'); + if ($prefix !== '') { + $prefix .= $config->get('Attr', 'IDPrefixLocal'); + // prevent re-appending the prefix + if (strpos($id, $prefix) !== 0) $id = $prefix . $id; + } elseif ($config->get('Attr', 'IDPrefixLocal') !== '') { + trigger_error('%Attr.IDPrefixLocal cannot be used unless '. + '%Attr.IDPrefix is set', E_USER_WARNING); + } + + //if (!$this->ref) { + $id_accumulator =& $context->get('IDAccumulator'); + if (isset($id_accumulator->ids[$id])) return false; + //} + + // we purposely avoid using regex, hopefully this is faster + + if (ctype_alpha($id)) { + $result = true; + } else { + if (!ctype_alpha(@$id[0])) return false; + $trim = trim( // primitive style of regexps, I suppose + $id, + 'A..Za..z0..9:-._' + ); + $result = ($trim === ''); + } + + $regexp = $config->get('Attr', 'IDBlacklistRegexp'); + if ($regexp && preg_match($regexp, $id)) { + return false; + } + + if (/*!$this->ref && */$result) $id_accumulator->add($id); + + // if no change was made to the ID, return the result + // else, return the new id if stripping whitespace made it + // valid, or return false. + return $result ? $id : false; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Length.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Length.php new file mode 100755 index 000000000..a242f9c23 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Length.php @@ -0,0 +1,41 @@ + 100) return '100%'; + + return ((string) $points) . '%'; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php new file mode 100755 index 000000000..8a0da0c89 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php @@ -0,0 +1,53 @@ + 'AllowedRel', + 'rev' => 'AllowedRev' + ); + if (!isset($configLookup[$name])) { + trigger_error('Unrecognized attribute name for link '. + 'relationship.', E_USER_ERROR); + return; + } + $this->name = $configLookup[$name]; + } + + public function validate($string, $config, $context) { + + $allowed = $config->get('Attr', $this->name); + if (empty($allowed)) return false; + + $string = $this->parseCDATA($string); + $parts = explode(' ', $string); + + // lookup to prevent duplicates + $ret_lookup = array(); + foreach ($parts as $part) { + $part = strtolower(trim($part)); + if (!isset($allowed[$part])) continue; + $ret_lookup[$part] = true; + } + + if (empty($ret_lookup)) return false; + $string = implode(' ', array_keys($ret_lookup)); + + return $string; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/MultiLength.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/MultiLength.php new file mode 100755 index 000000000..c72fc76e4 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/MultiLength.php @@ -0,0 +1,41 @@ +max = $max; + } + + public function validate($string, $config, $context) { + + $string = trim($string); + if ($string === '0') return $string; + if ($string === '') return false; + $length = strlen($string); + if (substr($string, $length - 2) == 'px') { + $string = substr($string, 0, $length - 2); + } + if (!is_numeric($string)) return false; + $int = (int) $string; + + if ($int < 0) return '0'; + + // upper-bound value, extremely high values can + // crash operating systems, see + // WARNING, above link WILL crash you if you're using Windows + + if ($this->max !== null && $int > $this->max) return (string) $this->max; + + return (string) $int; + + } + + public function make($string) { + if ($string === '') $max = null; + else $max = (int) $string; + $class = get_class($this); + return new $class($max); + } + +} + +// vim: et sw=4 sts=4 -- cgit v1.2.3