Skip to content

Commit e74fcbe

Browse files
committed
feat(rule): "には" を 一つの助詞として認識するように
close #15
1 parent 4de3c24 commit e74fcbe

File tree

3 files changed

+61
-14
lines changed

3 files changed

+61
-14
lines changed

src/no-doubled-joshi.js

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,21 @@ import {split as splitSentences, Syntax as SentenceSyntax} from "sentence-splitt
66
import StringSource from "textlint-util-to-string";
77
import {
88
is助詞Token, is読点Token,
9-
createKeyFromKey, restoreToSurfaceFromKey
9+
concatJoishiTokens,
10+
createKeyFromKey,
11+
restoreToSurfaceFromKey
1012
} from "./token-utils";
1113
/**
1214
* Create token map object
1315
* {
14-
* "で": [token, token],
15-
* "の": [token, token]
16+
* "は:助詞.係助詞": [token, token]
1617
* }
1718
* @param tokens
1819
* @returns {*}
1920
*/
2021
function createSurfaceKeyMap(tokens) {
2122
// 助詞のみを対象とする
22-
return tokens.filter(is助詞Token).reduce((keyMap, token) => {
23+
return tokens.reduce((keyMap, token) => {
2324
// "は:助詞.係助詞" : [token]
2425
const tokenKey = createKeyFromKey(token);
2526
if (!keyMap[tokenKey]) {
@@ -70,7 +71,7 @@ export default function(context, options = {}) {
7071
const isStrict = options.strict || defaultOptions.strict;
7172
const allow = options.allow || defaultOptions.allow;
7273
const separatorChars = options.separatorChars || defaultOptions.separatorChars;
73-
const {Syntax, report, getSource, RuleError} = context;
74+
const {Syntax, report, RuleError} = context;
7475
return {
7576
[Syntax.Paragraph](node){
7677
if (helper.isChildNode(node, [Syntax.Link, Syntax.Image, Syntax.BlockQuote, Syntax.Emphasis])) {
@@ -81,13 +82,16 @@ export default function(context, options = {}) {
8182
const isSentenceNode = node => {
8283
return node.type === SentenceSyntax.Sentence;
8384
};
84-
let sentences = splitSentences(text, {
85+
const sentences = splitSentences(text, {
8586
separatorChars: separatorChars
8687
}).filter(isSentenceNode);
8788
return getTokenizer().then(tokenizer => {
8889
const checkSentence = (sentence) => {
89-
let tokens = tokenizer.tokenizeForSentence(sentence.raw);
90-
let countableTokens = tokens.filter(token => {
90+
const tokens = tokenizer.tokenizeForSentence(sentence.raw);
91+
// 助詞 + 助詞は 一つの助詞として扱う
92+
// https://github.com/textlint-ja/textlint-rule-no-doubled-joshi/issues/15
93+
const concatTokens = concatJoishiTokens(tokens);
94+
const countableTokens = concatTokens.filter(token => {
9195
if (isStrict) {
9296
return is助詞Token(token);
9397
}
@@ -96,14 +100,14 @@ export default function(context, options = {}) {
96100
// https://github.com/azu/textlint-rule-no-doubled-joshi/issues/2
97101
return is助詞Token(token) || is読点Token(token);
98102
});
99-
let joshiTokenSurfaceKeyMap = createSurfaceKeyMap(countableTokens);
103+
const joshiTokenSurfaceKeyMap = createSurfaceKeyMap(countableTokens);
100104
/*
101105
# Data Structure
102106
103107
joshiTokens = [tokenA, tokenB, tokenC, tokenD, tokenE, tokenF]
104108
joshiTokenSurfaceKeyMap = {
105-
"は:助詞.係助詞": [tokenA, tokenC, tokenE],
106-
"で:助詞.係助詞": [tokenB, tokenD, tokenF]
109+
"は:助詞.係助詞": [tokenA, tokenC, tokenE],
110+
"で:助詞.係助詞": [tokenB, tokenD, tokenF]
107111
}
108112
*/
109113
Object.keys(joshiTokenSurfaceKeyMap).forEach(key => {

src/token-utils.js

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,48 @@
22
"use strict";
33
// 助詞どうか
44
export const is助詞Token = (token) => {
5-
return token.pos === "助詞";
5+
// 結合しているtokenは助詞助詞のようになってるため先頭一致で見る
6+
return token && /^/.test(token.pos);
67
};
78

89
export const is読点Token = (token) => {
910
return token.surface_form === "、" && token.pos === "名詞";
1011
};
11-
12+
/**
13+
* aTokenの_extraKeyに結合したkeyを追加する
14+
* @param {Object} aToken
15+
* @param {Object} bToken
16+
* @returns {Object}
17+
*/
18+
const concatToken = (aToken, bToken) => {
19+
aToken.surface_form += bToken.surface_form;
20+
aToken.pos += bToken.pos;
21+
aToken.pos_detail_1 += bToken.surface_form;
22+
return aToken;
23+
};
24+
/**
25+
* 助詞+助詞 というように連続しているtokenを結合し直したtokenの配列を返す
26+
* @param {Array} tokens
27+
* @returns {Array}
28+
*/
29+
export const concatJoishiTokens = (tokens) => {
30+
const newTokens = [];
31+
tokens.forEach((token) => {
32+
const prevToken = newTokens[newTokens.length - 1];
33+
if (is助詞Token(token) && is助詞Token(prevToken)) {
34+
newTokens[newTokens.length - 1] = concatToken(prevToken, token);
35+
} else {
36+
newTokens.push(token);
37+
}
38+
});
39+
return newTokens;
40+
};
1241
// 助詞tokenから品詞細分類1までを元にしたkeyを作る
1342
// http://www.unixuser.org/~euske/doc/postag/index.html#chasen
1443
// http://chasen.naist.jp/snapshot/ipadic/ipadic/doc/ipadic-ja.pdf
1544
export const createKeyFromKey = (token) => {
1645
// e.g.) "は:助詞.係助詞"
17-
return `${token.surface_form}:${token.pos}.${token.pos_detail_1}`
46+
return `${token.surface_form}:${token.pos}.${token.pos_detail_1}`;
1847
};
1948
// keyからsurfaceを取り出す
2049
export const restoreToSurfaceFromKey = (key) => {

test/no-doubled-joshi-test.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ tester.run("no-double-joshi", rule, {
1717
"ナイフで切断した後、ハンマーで破砕した。",
1818
// 接続助詞のてが重複は許容
1919
"まずは試していただいて",
20+
// **に**と**には**は別の助動詞と認識
21+
"そのため、文字列の長さを正確に測るにはある程度の妥協が必要になります。",
2022
// 1個目の「と」は格助詞、2個めの「と」は接続助詞
2123
"ターミナルで「test」**と**入力する**と**、画面に表示されます。",
2224
{
@@ -161,6 +163,18 @@ tester.run("no-double-joshi", rule, {
161163
column: 38
162164
}
163165
]
166+
},
167+
{
168+
// に + は と に + は
169+
// https://github.com/textlint-ja/textlint-rule-no-doubled-joshi/issues/15
170+
text: "文字列にはそこには問題がある。",
171+
errors: [
172+
{
173+
message: `一文に二回以上利用されている助詞 "には" がみつかりました。`,
174+
line: 1,
175+
column: 8
176+
}
177+
]
164178
}
165179
]
166180
});

0 commit comments

Comments
 (0)