feat(rule): ignore \w言語

azu · azu · commit dd7f2396a081 · 2017-03-26T18:13:17.000+09:00
diff --git a/README.md b/README.md
@@ -49,8 +49,9 @@ textlint --rule ja-unnatural-alphabet README.md
 
 - `allow`: `string[]`
     - 無視するアルファベットの配列
-    - デフォルト: `["a", "i", "u", "e", "o", "n"]`
+    - デフォルト: `["a", "i", "u", "e", "o", "n", "/[a-zA-Zａ-ｚＡ-Ｚ]言語/"]`
     - デフォルトでは母音とnを除外している
+    - `"/正規表現/" のような文字列もサポート
 
 ```json5
 {
diff --git a/package.json b/package.json
@@ -34,6 +34,7 @@
     "textlint-scripts": "^1.2.2"
   },
   "dependencies": {
+    "escape-string-regexp": "^1.0.5",
     "match-index": "^1.0.1",
     "regx": "^1.0.4"
   }
diff --git a/src/textlint-rule-ja-unnatural-alphabet.js b/src/textlint-rule-ja-unnatural-alphabet.js
@@ -1,10 +1,11 @@
 // MIT © 2017 azu
 "use strict";
+const escapeStringRegexp = require('escape-string-regexp');
 const matchCaptureGroupAll = require("match-index").matchCaptureGroupAll;
 const regx = require("regx").default;
 // IME的に入力されそうな文字列
 // 日本語 + 記号
-const japaneseRegExp = /(?:[々〇〻\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]|[\uD840-\uD87F][\uDC00-\uDFFF]|[ぁ-んァ-ヶー。、・−])/;
+const japaneseRegExp = /(?:[々〇〻\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]|[\uD840-\uD87F]|[\uFF00-\uFFEF]|[\uDC00-\uDFFF]|[ぁ-んァ-ヶー。、・−])/;
 // 半角/全角のアルファベットの正規表現
 const alphabetPattern = /([a-zA-Zａ-ｚＡ-Ｚ])/;
 /**
@@ -21,21 +22,59 @@ const matchUnnaturalAlphabet = (text) => {
     return matchCaptureGroupAll(text, unnaturalPattern);
 };
 
+/**
+ * if actual is in the `exceptGroups`, return true
+ * @param {MatchCaptureGroup[]} exceptGroups
+ * @param {MatchCaptureGroup} actual
+ * @returns {boolean}
+ */
+const isIgnoredRange = (exceptGroups, actual) => {
+    return exceptGroups.some(({ text, index }) => {
+        const endIndex = index + text.length;
+        return index <= actual.index && actual.index <= endIndex;
+    });
+};
+/***
+ *
+ * @param {string} input
+ * @param {string[]} allowAlphabets
+ * @returns {MatchCaptureGroup[]}
+ */
+const createIgnoreRanges = (input, allowAlphabets) => {
+    // str -> RegExp
+    const patterns = allowAlphabets.map(allowWord => {
+        if (!allowWord) {
+            return /^$/;
+        }
+        if (allowWord[0] === "/" && allowWord[allowWord.length - 1] === "/") {
+            const regExpString = allowWord.slice(1, allowWord.length - 1);
+            return new RegExp(`(${regExpString})`, "g");
+        }
+        const escapeString = escapeStringRegexp(allowWord);
+        return new RegExp(`(${escapeString})`, "g");
+    });
+    return patterns.reduce((total, pattern) => {
+        return total.concat(matchCaptureGroupAll(input, pattern));
+    }, []);
+};
+
 const defaultOptions = {
     // 無視するアルファベット
     // 例) ["X"]
-    // デフォルトでは母音とnを除外している
-    "allow": ["a", "i", "u", "e", "o", "n"]
+    // デフォルトでは母音とnと典型例を除外している
+    "allow": ["a", "i", "u", "e", "o", "n", "/[a-zA-Zａ-ｚＡ-Ｚ]言語/"]
 };
 const report = (context, options = {}) => {
     const { Syntax, RuleError, report, getSource } = context;
     const allowAlphabets = options.allow || defaultOptions.allow;
     return {
         [Syntax.Str](node){
             const text = getSource(node);
-            matchUnnaturalAlphabet(text).forEach(({ text, index }) => {
-                // 無視するアルファベットであるなら無視
-                if (allowAlphabets.indexOf(text) !== -1) {
+            const ignoreMatch = createIgnoreRanges(text, allowAlphabets);
+            matchUnnaturalAlphabet(text).forEach((actual) => {
+                const { text, index } = actual;
+                // 無視する単語を含んでいるなら無視
+                if (isIgnoredRange(ignoreMatch, actual)) {
                     return;
                 }
                 report(node, new RuleError(`不自然なアルファベットがあります: ${text}`, {
diff --git a/test/textlint-rule-ja-unnatural-alphabet-test.js b/test/textlint-rule-ja-unnatural-alphabet-test.js
@@ -10,6 +10,8 @@ tester.run("textlint-rule-ja-unnatural-alphabet", rule, {
         "リリース",
         "aiueo",
         "This is pen.",
+        "これはC言語",
+        "これはD言語",
         {
             text: "アンドロイドNは良し",
             options: {
@@ -33,6 +35,14 @@ tester.run("textlint-rule-ja-unnatural-alphabet", rule, {
                     message: "不自然なアルファベットがあります: ｋ"
                 }
             ]
+        },
+        {
+            text: "無駄なk脳",
+            errors: [
+                {
+                    message: "不自然なアルファベットがあります: k"
+                }
+            ]
         }
     ]
 });

Original file line number	Diff line number	Diff line change
`@@ -49,8 +49,9 @@ textlint --rule ja-unnatural-alphabet README.md`
`49`	`49`
`50`	`50`	- `allow`: `string[]`
`51`	`51`	`- 無視するアルファベットの配列`
`52`		- - デフォルト: `["a", "i", "u", "e", "o", "n"]`
	`52`	+ - デフォルト: `["a", "i", "u", "e", "o", "n", "/[a-zA-Zａ-ｚＡ-Ｚ]言語/"]`
`53`	`53`	`- デフォルトでは母音とnを除外している`
	`54`	+ - `"/正規表現/" のような文字列もサポート
`54`	`55`
`55`	`56`	```json5
`56`	`57`	`{`
Original file line number	Diff line number	Diff line change
`@@ -34,6 +34,7 @@`
`34`	`34`	`"textlint-scripts": "^1.2.2"`
`35`	`35`	`},`
`36`	`36`	`"dependencies": {`
	`37`	`+ "escape-string-regexp": "^1.0.5",`
`37`	`38`	`"match-index": "^1.0.1",`
`38`	`39`	`"regx": "^1.0.4"`
`39`	`40`	`}`
Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,8 @@ tester.run("textlint-rule-ja-unnatural-alphabet", rule, {`
`10`	`10`	`"リリース",`
`11`	`11`	`"aiueo",`
`12`	`12`	`"This is pen.",`
	`13`	`+ "これはC言語",`
	`14`	`+ "これはD言語",`
`13`	`15`	`{`
`14`	`16`	`text: "アンドロイドNは良し",`
`15`	`17`	`options: {`
`@@ -33,6 +35,14 @@ tester.run("textlint-rule-ja-unnatural-alphabet", rule, {`
`33`	`35`	`message: "不自然なアルファベットがあります: ｋ"`
`34`	`36`	`}`
`35`	`37`	`]`
	`38`	`+ },`
	`39`	`+ {`
	`40`	`+ text: "無駄なk脳",`
	`41`	`+ errors: [`
	`42`	`+ {`
	`43`	`+ message: "不自然なアルファベットがあります: k"`
	`44`	`+ }`
	`45`	`+ ]`
`36`	`46`	`}`
`37`	`47`	`]`
`38`	`48`	`});`