summaryrefslogtreecommitdiff
path: root/lib/hyphen/hyphen.js
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2020-09-14 14:08:20 +0300
committerAndrew Dolgov <[email protected]>2020-09-14 14:08:20 +0300
commit40d215d8bf746a02977ebbc4138138e2e1800ea9 (patch)
treec2230d941dfa8d133780d9a8ed66e38b60eeee13 /lib/hyphen/hyphen.js
parent3dd691939c9c8755b47a4761ec36660b7ec2c140 (diff)
experimental: add optional soft hyphenation
Diffstat (limited to 'lib/hyphen/hyphen.js')
-rw-r--r--lib/hyphen/hyphen.js458
1 files changed, 458 insertions, 0 deletions
diff --git a/lib/hyphen/hyphen.js b/lib/hyphen/hyphen.js
new file mode 100644
index 0000000..d5c2df3
--- /dev/null
+++ b/lib/hyphen/hyphen.js
@@ -0,0 +1,458 @@
+/** Text hyphenation in Javascript.
+ * Copyright (C) 2020 Yevhen Tiurin ([email protected])
+ * https://github.com/ytiurin/hyphen
+ *
+ * Released under the ISC license
+ * https://github.com/ytiurin/hyphen/blob/master/LICENSE
+ */
+(function (root, factory) {
+ if (typeof define === "function" && define.amd) {
+ // AMD. Register as an anonymous module.
+ define([], factory);
+ } else if (typeof module === "object" && module.exports) {
+ // Node. Does not work with strict CommonJS, but
+ // only CommonJS-like environments that support module.exports,
+ // like Node.
+ module.exports = factory();
+ } else {
+ // Browser globals (root is window)
+ root.createHyphenator = factory();
+ }
+})(this, function () {
+ var MIN_WORD_LENGTH_BOUNDRY = 5;
+
+ var SETTING_DEFAULT_ASYNC = false,
+ SETTING_DEFAULT_DEBUG = false,
+ SETTING_DEFAULT_HTML = false,
+ SETTING_DEFAULT_HYPH_CHAR = "\u00AD",
+ SETTING_NAME_ASYNC = "async",
+ SETTING_NAME_DEBUG = "debug",
+ SETTING_NAME_HTML = "html",
+ SETTING_NAME_HYPH_CHAR = "hyphenChar",
+ SETTING_NAME_MIN_WORD_LENGTH = "minWordLength";
+
+ var _global =
+ typeof global === "object"
+ ? global
+ : typeof window === "object"
+ ? window
+ : typeof this === "object"
+ ? this
+ : {};
+
+ function cloneObj(source) {
+ var target = {};
+ for (var key in source) {
+ target[key] = source[key];
+ }
+ return target;
+ }
+
+ function keyOrDefault(object, key, defaultValue) {
+ if (key in object) {
+ return object[key];
+ }
+ return defaultValue;
+ }
+
+ function exceptionsFromDefinition(patternsDefinition, hyphenChar) {
+ return patternsDefinition.exceptions.reduce(function (
+ exceptions,
+ exception
+ ) {
+ exceptions[exception.replace(/\-/g, "")] = exception.replace(
+ /\-/g,
+ hyphenChar
+ );
+ return exceptions;
+ },
+ {});
+ }
+
+ function createHyphenator(patternsDefinition, options) {
+ options = options || {};
+ var //
+ asyncMode = keyOrDefault(
+ options,
+ SETTING_NAME_ASYNC,
+ SETTING_DEFAULT_ASYNC
+ ),
+ caches = {},
+ debug = keyOrDefault(options, SETTING_NAME_DEBUG, SETTING_DEFAULT_DEBUG),
+ exceptions = {},
+ hyphenChar = keyOrDefault(
+ options,
+ SETTING_NAME_HYPH_CHAR,
+ SETTING_DEFAULT_HYPH_CHAR
+ ),
+ patterns = patternsDefinition.patterns.map(preprocessPattern),
+ minWordLength = Math.max(
+ options[SETTING_NAME_MIN_WORD_LENGTH] >> 0,
+ MIN_WORD_LENGTH_BOUNDRY
+ ),
+ skipHTML = keyOrDefault(options, SETTING_NAME_HTML, SETTING_DEFAULT_HTML);
+
+ // Prepare cache
+ var cacheKey = hyphenChar + minWordLength;
+ exceptions[cacheKey] = exceptionsFromDefinition(
+ patternsDefinition,
+ hyphenChar
+ );
+ caches[cacheKey] = cloneObj(exceptions[cacheKey]);
+
+ if (asyncMode && !("Promise" in _global)) {
+ throw new Error(
+ "Failed to create hyphenator: Could not find global Promise object, needed for hyphenator to work in async mode"
+ );
+ }
+
+ return function (text, options) {
+ options = options || {};
+
+ var localDebug = keyOrDefault(options, SETTING_NAME_DEBUG, debug),
+ localHyphenChar = keyOrDefault(
+ options,
+ SETTING_NAME_HYPH_CHAR,
+ hyphenChar
+ ),
+ localMinWordLength = Math.max(
+ options[SETTING_NAME_MIN_WORD_LENGTH] >> 0,
+ minWordLength
+ ),
+ cacheKey = localHyphenChar + localMinWordLength;
+
+ if (!exceptions[cacheKey]) {
+ exceptions[cacheKey] = exceptionsFromDefinition(
+ patternsDefinition,
+ localHyphenChar
+ );
+ }
+
+ if (!caches[cacheKey]) {
+ caches[cacheKey] = cloneObj(exceptions[cacheKey]);
+ }
+
+ return start(
+ text,
+ patterns,
+ caches[cacheKey],
+ localDebug,
+ localHyphenChar,
+ skipHTML,
+ localMinWordLength,
+ asyncMode
+ );
+ };
+ }
+ function createTextChunkReader(text, hyphenChar, skipHTML, minWordLength) {
+ function readNextTextChunk() {
+ var nextTextChunk = "";
+
+ shouldHyphenate = void 0;
+
+ chunkReader: while (nextCharIndex <= text.length) {
+ var //
+ nextChar = text.charAt(nextCharIndex++),
+ charIsLetter =
+ !!nextChar && !/\s|[\!-\@\[-\`\{-\~\u2013-\u203C]/.test(nextChar),
+ charIsAngleOpen = nextChar === "<",
+ charIsAngleClose = nextChar === ">",
+ charIsHyphen = nextChar === hyphenChar;
+
+ do {
+ if (state === STATE_READ_TAG) {
+ if (charIsAngleClose) {
+ state = STATE_RETURN_UNTOUCHED;
+ }
+ break;
+ }
+
+ if (charIsHyphen) {
+ shouldHyphenate = SHOULD_SKIP;
+ state = STATE_READ_WORD;
+ break;
+ }
+
+ if (charIsLetter) {
+ state = STATE_READ_WORD;
+ break;
+ }
+
+ if (state === STATE_READ_WORD) {
+ state = STATE_RETURN_WORD;
+ shouldHyphenate =
+ shouldHyphenate ||
+ (nextTextChunk.length >= minWordLength && SHOULD_HYPHENATE);
+ break;
+ }
+
+ shouldHyphenate = SHOULD_SKIP;
+ state = STATE_RETURN_UNTOUCHED;
+ } while (0);
+
+ if (
+ charIsAngleOpen &&
+ state !== STATE_RETURN_WORD &&
+ skipHTML &&
+ !isSpacelike(text.charAt(nextCharIndex))
+ ) {
+ shouldHyphenate = SHOULD_SKIP;
+ state = STATE_READ_TAG;
+ }
+
+ switch (state) {
+ case STATE_READ_TAG:
+ nextTextChunk += nextChar;
+ break;
+
+ case STATE_READ_WORD:
+ nextTextChunk += nextChar;
+ break;
+
+ case STATE_RETURN_UNTOUCHED:
+ nextTextChunk += nextChar;
+ break chunkReader;
+
+ case STATE_RETURN_WORD:
+ nextCharIndex--;
+ break chunkReader;
+ }
+ }
+ return nextTextChunk || void 0;
+ }
+
+ function shouldNextHyphenate() {
+ return shouldHyphenate === SHOULD_HYPHENATE;
+ }
+
+ var isSpacelike = RegExp.prototype.test.bind(/\s/);
+
+ var //
+ nextCharIndex = 0,
+ SHOULD_HYPHENATE = 1,
+ SHOULD_SKIP = 2,
+ shouldHyphenate,
+ STATE_READ_TAG = 1,
+ STATE_READ_WORD = 2,
+ STATE_RETURN_UNTOUCHED = 3,
+ STATE_RETURN_WORD = 4,
+ state;
+
+ return [readNextTextChunk, shouldNextHyphenate];
+ }
+ function hyphenateWord(text, patterns, debug, hyphenChar) {
+ var //
+ levels = new Array(text.length + 1),
+ loweredText = text.toLocaleLowerCase(),
+ p = [],
+ patternData,
+ patternIndex = 0;
+
+ for (var i = levels.length; i--; ) levels[i] = 0;
+
+ while ((patternData = patterns[patternIndex++])) {
+ var //
+ fromChar = 0,
+ endPattern = false;
+ while (!endPattern) {
+ var //
+ patternEntityIndex = loweredText.indexOf(patternData.text, fromChar),
+ patternFits =
+ patternEntityIndex > -1 &&
+ (patternData.stickToLeft ? patternEntityIndex === 0 : true) &&
+ (patternData.stickToRight
+ ? patternEntityIndex + patternData.text.length === text.length
+ : true);
+
+ if (patternFits) {
+ p.push(patternData.pattern + ">" + patternData.levels.join(""));
+
+ for (var i = 0; i < patternData.levels.length; i++)
+ levels[patternEntityIndex + i] = Math.max(
+ patternData.levels[i],
+ levels[patternEntityIndex + i]
+ );
+ }
+ if (patternEntityIndex > -1 && patternData.text.length > 0) {
+ fromChar = patternEntityIndex + patternData.text.length + 1;
+ } else {
+ endPattern = true;
+ }
+ }
+ }
+
+ levels[0] = levels[1] = levels[levels.length - 1] = levels[
+ levels.length - 2
+ ] = 0;
+
+ var //
+ hyphenatedText = "",
+ leveledText = "",
+ debugHyphenatedText = "";
+
+ for (var i = 0; i < levels.length; i++) {
+ hyphenatedText +=
+ (levels[i] % 2 === 1 ? hyphenChar : "") + text.charAt(i);
+ debugHyphenatedText += (levels[i] % 2 === 1 ? "-" : "") + text.charAt(i);
+ leveledText += (levels[i] > 0 ? levels[i] : "") + text.charAt(i);
+ }
+
+ if (debug)
+ console.log.apply(
+ console,
+ [text, "->"]
+ .concat(p)
+ .concat(["->"])
+ .concat(levels)
+ .concat(["->", leveledText])
+ .concat(["->", debugHyphenatedText])
+ );
+
+ return hyphenatedText;
+ }
+ function preprocessPattern(pattern) {
+ var //
+ patternCharIndex = 0,
+ patternChar,
+ patternData = {
+ pattern: pattern,
+ text: "",
+ levels: [],
+ stickToLeft: 0,
+ stickToRight: 0
+ },
+ states = { alphabet: 1, level: 2, stickToLeft: 3, stickToRight: 4 };
+
+ while ((patternChar = pattern.charAt(patternCharIndex++))) {
+ var //
+ charIsDot = patternChar === ".",
+ charIsNumber = !charIsDot && /\d/.test(patternChar),
+ state = charIsDot
+ ? patternCharIndex - 1 === 0
+ ? states.stickToLeft
+ : states.stickToRight
+ : charIsNumber
+ ? states.level
+ : states.alphabet;
+
+ switch (state) {
+ case states.alphabet:
+ !prevCharIsNumber && patternData.levels.push(0);
+ patternData.text += patternChar;
+ break;
+
+ case states.level:
+ patternData.levels.push(parseInt(patternChar));
+ break;
+
+ case states.stickToLeft:
+ patternData.stickToLeft = true;
+ break;
+
+ case states.stickToRight:
+ patternData.stickToRight = true;
+ break;
+ }
+
+ var prevCharIsNumber = charIsNumber;
+ }
+
+ return patternData;
+ }
+ function start(
+ text,
+ patterns,
+ cache,
+ debug,
+ hyphenChar,
+ skipHTML,
+ minWordLength,
+ isAsync
+ ) {
+ function done() {
+ allTime = new Date() - allTime;
+ resolveNewText(newText);
+
+ if (debug) {
+ console.log(
+ "----------------\nHyphenation stats: " +
+ processedN +
+ " text chunks processed, " +
+ hyphenatedN +
+ " words hyphenated"
+ );
+ console.log("Work time: " + workTime / 1000);
+ console.log("Wait time: " + (allTime - workTime) / 1000);
+ console.log("All time: " + allTime / 1000);
+ }
+ }
+
+ var //
+ newText = "",
+ nextTextChunk,
+ reader = createTextChunkReader(text, hyphenChar, skipHTML, minWordLength),
+ readNextTextChunk = reader[0],
+ shouldNextHyphenate = reader[1],
+ states = { hyphenateWord: 1, concatenate: 2 },
+ processedN = 0,
+ hyphenatedN = 0;
+
+ var //
+ allTime = new Date(),
+ workTime = 0;
+
+ var resolveNewText = function () {};
+
+ function nextTick() {
+ var loopStart = new Date();
+
+ while (
+ (!isAsync || new Date() - loopStart < 10) &&
+ (nextTextChunk = readNextTextChunk())
+ ) {
+ var state = shouldNextHyphenate()
+ ? states.hyphenateWord
+ : states.concatenate;
+
+ switch (state) {
+ case states.hyphenateWord:
+ if (!cache[nextTextChunk])
+ cache[nextTextChunk] = hyphenateWord(
+ nextTextChunk,
+ patterns,
+ debug,
+ hyphenChar
+ );
+
+ if (nextTextChunk !== cache[nextTextChunk]) hyphenatedN++;
+
+ nextTextChunk = cache[nextTextChunk];
+
+ case states.concatenate:
+ newText += nextTextChunk;
+ }
+
+ processedN++;
+ }
+ workTime += new Date() - loopStart;
+
+ if (!nextTextChunk) {
+ done();
+ } else {
+ setTimeout(nextTick);
+ }
+ }
+
+ if (isAsync) {
+ setTimeout(nextTick);
+ return new Promise(function (resolve) {
+ resolveNewText = resolve;
+ });
+ } else {
+ nextTick();
+ return newText;
+ }
+ }
+
+ return createHyphenator;
+});