blob: 66c65d95810243e0a775692984547533bcc06191 [file] [log] [blame]
/*
* Copyright 2000-2010 JetBrains s.r.o.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.intellij.spellchecker.inspections;
import com.intellij.openapi.util.TextRange;
import com.intellij.spellchecker.util.Strings;
import com.intellij.util.Consumer;
import org.jetbrains.annotations.NonNls;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class IdentifierSplitter extends BaseSplitter {
private static final IdentifierSplitter INSTANCE = new IdentifierSplitter();
public static IdentifierSplitter getInstance() {
return INSTANCE;
}
@NonNls
private static final Pattern WORD = Pattern.compile("\\b\\p{L}*'?\\p{L}*");
@NonNls
private static final Pattern WORD_IN_QUOTES = Pattern.compile("'([^']*)'");
@Override
public void split(@Nullable String text, @NotNull TextRange range, Consumer<TextRange> consumer) {
if (text == null || range.getLength() < 1 || range.getStartOffset() < 0) {
return;
}
List<TextRange> extracted = excludeByPattern(text, range, WORD_IN_QUOTES, 1);
for (TextRange textRange : extracted) {
List<TextRange> words = splitByCase(text, textRange);
if (words.size() == 0) {
continue;
}
if (words.size() == 1) {
addWord(consumer, false, words.get(0));
continue;
}
boolean isCapitalized = Strings.isCapitalized(text, words.get(0));
boolean containsShortWord = containsShortWord(words);
if (isCapitalized && containsShortWord) {
continue;
}
boolean isAllWordsAreUpperCased = isAllWordsAreUpperCased(text, words);
for (TextRange word : words) {
boolean uc = Strings.isUpperCased(text, word);
boolean flag = (uc && !isAllWordsAreUpperCased);
Matcher matcher = WORD.matcher(text.substring(word.getStartOffset(), word.getEndOffset()));
if (matcher.find()) {
TextRange found = matcherRange(word, matcher);
addWord(consumer, flag, found);
}
}
}
}
@NotNull
private static List<TextRange> splitByCase(@NotNull String text, @NotNull TextRange range) {
//System.out.println("text = " + text + " range = " + range);
List<TextRange> result = new ArrayList<TextRange>();
int i = range.getStartOffset();
int s = -1;
int prevType = Character.MATH_SYMBOL;
while (i < range.getEndOffset()) {
final char ch = text.charAt(i);
if (ch >= '\u3040' && ch <= '\u309f' || // Hiragana
ch >= '\u30A0' && ch <= '\u30ff' || // Katakana
ch >= '\u4E00' && ch <= '\u9FFF' || // CJK Unified ideographs
ch >= '\uF900' && ch <= '\uFAFF' || // CJK Compatibility Ideographs
ch >= '\uFF00' && ch <= '\uFFEF' //Halfwidth and Fullwidth Forms of Katakana & Fullwidth ASCII variants
) {
if (s >= 0) {
add(text, result, i, s);
s = -1;
}
prevType = Character.MATH_SYMBOL;
++i;
continue;
}
final int type = Character.getType(ch);
if (type == Character.LOWERCASE_LETTER ||
type == Character.UPPERCASE_LETTER ||
type == Character.TITLECASE_LETTER ||
type == Character.OTHER_LETTER ||
type == Character.MODIFIER_LETTER ||
type == Character.OTHER_PUNCTUATION
) {
//letter
if (s < 0) {
//start
s = i;
}
else if (s >= 0 && type == Character.UPPERCASE_LETTER && prevType == Character.LOWERCASE_LETTER) {
//a|Camel
add(text, result, i, s);
s = i;
}
else if (i - s >= 1 && type == Character.LOWERCASE_LETTER && prevType == Character.UPPERCASE_LETTER) {
//CAPITALN|ext
add(text, result, i - 1, s);
s = i - 1;
}
}
else if (s >= 0) {
//non-letter
add(text, result, i, s);
s = -1;
}
prevType = type;
i++;
}
//remainder
if (s >= 0) {
add(text, result, i, s);
}
return result;
}
private static void add(String text, List<TextRange> result, int i, int s) {
if (i - s > 3) {
final TextRange textRange = new TextRange(s, i);
//System.out.println("textRange = " + textRange + " = "+ textRange.substring(text));
result.add(textRange);
}
}
}