summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser/UTF8Utils.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/HTML5/Parser/UTF8Utils.php')
-rw-r--r--src/HTML5/Parser/UTF8Utils.php23
1 files changed, 11 insertions, 12 deletions
diff --git a/src/HTML5/Parser/UTF8Utils.php b/src/HTML5/Parser/UTF8Utils.php
index 451c155..77c2dfb 100644
--- a/src/HTML5/Parser/UTF8Utils.php
+++ b/src/HTML5/Parser/UTF8Utils.php
@@ -1,5 +1,7 @@
<?php
+
namespace Masterminds\HTML5\Parser;
+
/*
*
* Portions based on code from html5lib files with the following copyright:
@@ -30,11 +32,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
use Masterminds\HTML5\Exception;
/**
- * UTF-8 Utilities
+ * UTF-8 Utilities.
*/
class UTF8Utils
{
-
/**
* The Unicode replacement character..
*/
@@ -76,10 +77,8 @@ class UTF8Utils
* This has not yet been tested with charactersets other than UTF-8.
* It should work with ISO-8859-1/-13 and standard Latin Win charsets.
*
- * @param string $data
- * The data to convert.
- * @param string $encoding
- * A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php
+ * @param string $data The data to convert
+ * @param string $encoding A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php
*
* @return string
*/
@@ -108,7 +107,7 @@ class UTF8Utils
$data = mb_convert_encoding($data, 'UTF-8', $encoding);
mb_substitute_character($save);
} // @todo Get iconv running in at least some environments if that is possible.
- elseif (function_exists('iconv') && $encoding != 'auto') {
+ elseif (function_exists('iconv') && 'auto' !== $encoding) {
// fprintf(STDOUT, "iconv found\n");
// iconv has the following behaviors:
// - Overlong representations are ignored.
@@ -122,7 +121,7 @@ class UTF8Utils
/*
* One leading U+FEFF BYTE ORDER MARK character must be ignored if any are present.
*/
- if (substr($data, 0, 3) === "\xEF\xBB\xBF") {
+ if ("\xEF\xBB\xBF" === substr($data, 0, 3)) {
$data = substr($data, 3);
}
@@ -132,9 +131,9 @@ class UTF8Utils
/**
* Checks for Unicode code points that are not valid in a document.
*
- * @param string $data A string to analyze.
+ * @param string $data a string to analyze
*
- * @return array An array of (string) error messages produced by the scanning.
+ * @return array an array of (string) error messages produced by the scanning
*/
public static function checkForIllegalCodepoints($data)
{
@@ -144,7 +143,7 @@ class UTF8Utils
/*
* All U+0000 null characters in the input must be replaced by U+FFFD REPLACEMENT CHARACTERs. Any occurrences of such characters is a parse error.
*/
- for ($i = 0, $count = substr_count($data, "\0"); $i < $count; $i ++) {
+ for ($i = 0, $count = substr_count($data, "\0"); $i < $count; ++$i) {
$errors[] = 'null-character';
}
@@ -166,7 +165,7 @@ class UTF8Utils
|
[\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16})
)/x', $data, $matches);
- for ($i = 0; $i < $count; $i ++) {
+ for ($i = 0; $i < $count; ++$i) {
$errors[] = 'invalid-codepoint';
}