| /* |
| * Copyright 2000-2010 JetBrains s.r.o. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package com.intellij.spellchecker.inspections; |
| |
| import com.intellij.openapi.util.TextRange; |
| import com.intellij.spellchecker.util.Strings; |
| import com.intellij.util.Consumer; |
| import org.jetbrains.annotations.NonNls; |
| import org.jetbrains.annotations.NotNull; |
| import org.jetbrains.annotations.Nullable; |
| |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| |
| public class IdentifierSplitter extends BaseSplitter { |
| private static final IdentifierSplitter INSTANCE = new IdentifierSplitter(); |
| |
| public static IdentifierSplitter getInstance() { |
| return INSTANCE; |
| } |
| |
| @NonNls |
| private static final Pattern WORD = Pattern.compile("\\b\\p{L}*'?\\p{L}*"); |
| |
| |
| @NonNls |
| private static final Pattern WORD_IN_QUOTES = Pattern.compile("'([^']*)'"); |
| |
| @Override |
| public void split(@Nullable String text, @NotNull TextRange range, Consumer<TextRange> consumer) { |
| if (text == null || range.getLength() < 1 || range.getStartOffset() < 0) { |
| return; |
| } |
| |
| List<TextRange> extracted = excludeByPattern(text, range, WORD_IN_QUOTES, 1); |
| |
| for (TextRange textRange : extracted) { |
| List<TextRange> words = splitByCase(text, textRange); |
| |
| if (words.size() == 0) { |
| continue; |
| } |
| |
| if (words.size() == 1) { |
| addWord(consumer, false, words.get(0)); |
| continue; |
| } |
| |
| boolean isCapitalized = Strings.isCapitalized(text, words.get(0)); |
| boolean containsShortWord = containsShortWord(words); |
| |
| if (isCapitalized && containsShortWord) { |
| continue; |
| } |
| |
| boolean isAllWordsAreUpperCased = isAllWordsAreUpperCased(text, words); |
| |
| for (TextRange word : words) { |
| boolean uc = Strings.isUpperCased(text, word); |
| boolean flag = (uc && !isAllWordsAreUpperCased); |
| Matcher matcher = WORD.matcher(text.substring(word.getStartOffset(), word.getEndOffset())); |
| if (matcher.find()) { |
| TextRange found = matcherRange(word, matcher); |
| addWord(consumer, flag, found); |
| } |
| } |
| } |
| } |
| |
| @NotNull |
| private static List<TextRange> splitByCase(@NotNull String text, @NotNull TextRange range) { |
| //System.out.println("text = " + text + " range = " + range); |
| List<TextRange> result = new ArrayList<TextRange>(); |
| int i = range.getStartOffset(); |
| int s = -1; |
| int prevType = Character.MATH_SYMBOL; |
| while (i < range.getEndOffset()) { |
| final char ch = text.charAt(i); |
| if (ch >= '\u3040' && ch <= '\u309f' || // Hiragana |
| ch >= '\u30A0' && ch <= '\u30ff' || // Katakana |
| ch >= '\u4E00' && ch <= '\u9FFF' || // CJK Unified ideographs |
| ch >= '\uF900' && ch <= '\uFAFF' || // CJK Compatibility Ideographs |
| ch >= '\uFF00' && ch <= '\uFFEF' //Halfwidth and Fullwidth Forms of Katakana & Fullwidth ASCII variants |
| ) { |
| if (s >= 0) { |
| add(text, result, i, s); |
| s = -1; |
| } |
| prevType = Character.MATH_SYMBOL; |
| ++i; |
| continue; |
| } |
| |
| final int type = Character.getType(ch); |
| if (type == Character.LOWERCASE_LETTER || |
| type == Character.UPPERCASE_LETTER || |
| type == Character.TITLECASE_LETTER || |
| type == Character.OTHER_LETTER || |
| type == Character.MODIFIER_LETTER || |
| type == Character.OTHER_PUNCTUATION |
| ) { |
| //letter |
| if (s < 0) { |
| //start |
| s = i; |
| } |
| else if (s >= 0 && type == Character.UPPERCASE_LETTER && prevType == Character.LOWERCASE_LETTER) { |
| //a|Camel |
| add(text, result, i, s); |
| s = i; |
| } |
| else if (i - s >= 1 && type == Character.LOWERCASE_LETTER && prevType == Character.UPPERCASE_LETTER) { |
| //CAPITALN|ext |
| add(text, result, i - 1, s); |
| s = i - 1; |
| } |
| } |
| else if (s >= 0) { |
| //non-letter |
| add(text, result, i, s); |
| s = -1; |
| } |
| prevType = type; |
| i++; |
| } |
| //remainder |
| if (s >= 0) { |
| add(text, result, i, s); |
| } |
| return result; |
| } |
| |
| private static void add(String text, List<TextRange> result, int i, int s) { |
| if (i - s > 3) { |
| final TextRange textRange = new TextRange(s, i); |
| //System.out.println("textRange = " + textRange + " = "+ textRange.substring(text)); |
| result.add(textRange); |
| } |
| } |
| } |