fix: Use tokenize instead of getTokenizer (#16)

k-tahiro · web-flow · commit 65dc1a48e809 · 2021-12-12T22:00:48.000+09:00
* Use tokenize instead of getTokenizer

* Bug fix
diff --git a/src/max-ten.js b/src/max-ten.js
@@ -1,7 +1,7 @@
 // LICENSE : MIT
 "use strict";
 import { RuleHelper } from "textlint-rule-helper";
-import { getTokenizer } from "kuromojin";
+import { tokenize } from "kuromojin";
 import { splitAST, Syntax as SentenceSyntax } from "sentence-splitter";
 import { StringSource } from "textlint-util-to-string";
 
@@ -103,59 +103,58 @@ module.exports = function (context, options = {}) {
              2. sentence to tokens
              3. check tokens
              */
-            return getTokenizer().then((tokenizer) => {
-                sentences.forEach((sentence) => {
-                    const source = new StringSource(sentence);
-                    const text = source.toString();
-                    const tokens = tokenizer.tokenizeForSentence(text);
-                    let currentTenCount = 0;
-                    let lastToken = null;
-                    tokens.forEach((token, index) => {
-                        const surface = token.surface_form;
-                        if (surface === touten) {
-                            // 名詞に囲まわれている場合は例外とする
-                            const isSandwiched = isSandwichedMeishi({
-                                before: findSiblingMeaningToken({
-                                    tokens,
-                                    currentIndex: index,
-                                    direction: "prev"
-                                }),
-                                token: token,
-                                after: findSiblingMeaningToken({
-                                    tokens,
-                                    currentIndex: index,
-                                    direction: "next"
-                                })
-                            });
-                            // strictなら例外を例外としない
-                            if (!isStrict && isSandwiched) {
-                                return;
-                            }
-                            currentTenCount++;
-                            lastToken = token;
-                        }
-                        if (surface === kuten) {
-                            // reset
-                            currentTenCount = 0;
-                        }
-                        // report
-                        if (currentTenCount > maxLen) {
-                            const positionInSentence = source.originalIndexFromIndex(lastToken.word_position - 1);
-                            // relative index from Paragraph Node
-                            // Sentence start(relative) + word position(relative)
-                            const index = sentence.range[0] - node.range[0] + positionInSentence;
-                            const ruleError = new context.RuleError(
-                                `一つの文で"${touten}"を${maxLen + 1}つ以上使用しています`,
-                                {
-                                    index
-                                }
-                            );
-                            report(node, ruleError);
-                            currentTenCount = 0;
+            const checkSentence = async (sentence) => {
+                const source = new StringSource(sentence);
+                const text = source.toString();
+                const tokens = await tokenize(text);
+                let currentTenCount = 0;
+                let lastToken = null;
+                tokens.forEach((token, index) => {
+                    const surface = token.surface_form;
+                    if (surface === touten) {
+                        // 名詞に囲まわれている場合は例外とする
+                        const isSandwiched = isSandwichedMeishi({
+                            before: findSiblingMeaningToken({
+                                tokens,
+                                currentIndex: index,
+                                direction: "prev"
+                            }),
+                            token: token,
+                            after: findSiblingMeaningToken({
+                                tokens,
+                                currentIndex: index,
+                                direction: "next"
+                            })
+                        });
+                        // strictなら例外を例外としない
+                        if (!isStrict && isSandwiched) {
+                            return;
                         }
-                    });
+                        currentTenCount++;
+                        lastToken = token;
+                    }
+                    if (surface === kuten) {
+                        // reset
+                        currentTenCount = 0;
+                    }
+                    // report
+                    if (currentTenCount > maxLen) {
+                        const positionInSentence = source.originalIndexFromIndex(lastToken.word_position - 1);
+                        // relative index from Paragraph Node
+                        // Sentence start(relative) + word position(relative)
+                        const index = sentence.range[0] - node.range[0] + positionInSentence;
+                        const ruleError = new context.RuleError(
+                            `一つの文で"${touten}"を${maxLen + 1}つ以上使用しています`,
+                            {
+                                index
+                            }
+                        );
+                        report(node, ruleError);
+                        currentTenCount = 0;
+                    }
                 });
-            });
+            };
+            return Promise.all(sentences.map(checkSentence));
         }
     };
 };