blob: 882e11aea7c4fd5ec693be3e85354471f6e93ddb [file] [log] [blame]
/*
* Copyright 2000-2014 JetBrains s.r.o.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.intellij.lexer;
import com.intellij.openapi.diagnostic.Logger;
import com.intellij.openapi.util.text.StringUtil;
import com.intellij.psi.StringEscapesTokenTypes;
import com.intellij.psi.tree.IElementType;
import org.jetbrains.annotations.NotNull;
/**
* @author max
*/
public class StringLiteralLexer extends LexerBase {
private static final Logger LOG = Logger.getInstance("#com.intellij.lexer.StringLiteralLexer");
private static final short AFTER_FIRST_QUOTE = 1;
private static final short AFTER_LAST_QUOTE = 2;
public static final char NO_QUOTE_CHAR = (char)-1;
private CharSequence myBuffer;
private int myStart;
private int myEnd;
private int myState;
private int myLastState;
private int myBufferEnd;
private final char myQuoteChar;
private final IElementType myOriginalLiteralToken;
private final boolean myCanEscapeEolOrFramingSpaces;
private final String myAdditionalValidEscapes;
private boolean mySeenEscapedSpacesOnly;
private final boolean myAllowOctal;
private final boolean myAllowHex;
public StringLiteralLexer(char quoteChar, final IElementType originalLiteralToken) {
this(quoteChar, originalLiteralToken, false, null);
}
/**
* @param canEscapeEolOrFramingSpaces true if following sequences are acceptable
* '\' in the end of the buffer (meaning escaped end of line) or
* '\ ' (escaped space) in the beginning and in the end of the buffer (meaning escaped space, to avoid auto trimming on load)
*/
public StringLiteralLexer(char quoteChar,
final IElementType originalLiteralToken,
boolean canEscapeEolOrFramingSpaces,
String additionalValidEscapes) {
this(quoteChar, originalLiteralToken, canEscapeEolOrFramingSpaces, additionalValidEscapes, true, false);
}
/**
* @param canEscapeEolOrFramingSpaces true if following sequences are acceptable
* '\' in the end of the buffer (meaning escaped end of line) or
*/
public StringLiteralLexer(char quoteChar,
final IElementType originalLiteralToken,
boolean canEscapeEolOrFramingSpaces,
String additionalValidEscapes,
boolean allowOctal,
boolean allowHex) {
myQuoteChar = quoteChar;
myOriginalLiteralToken = originalLiteralToken;
myCanEscapeEolOrFramingSpaces = canEscapeEolOrFramingSpaces;
myAdditionalValidEscapes = additionalValidEscapes;
myAllowOctal = allowOctal;
myAllowHex = allowHex;
}
@Override
public void start(@NotNull CharSequence buffer, int startOffset, int endOffset, int initialState) {
myBuffer = buffer;
myStart = startOffset;
myState = myQuoteChar == NO_QUOTE_CHAR ? AFTER_FIRST_QUOTE : initialState;
myLastState = initialState;
myBufferEnd = endOffset;
myEnd = locateToken(myStart);
mySeenEscapedSpacesOnly = true;
}
@Override
public int getState() {
return myLastState;
}
@Override
public IElementType getTokenType() {
if (myStart >= myEnd) return null;
if (myBuffer.charAt(myStart) != '\\') {
mySeenEscapedSpacesOnly = false;
return myOriginalLiteralToken;
}
if (myStart + 1 >= myEnd) return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN;
char nextChar = myBuffer.charAt(myStart + 1);
mySeenEscapedSpacesOnly &= nextChar == ' ';
if (myCanEscapeEolOrFramingSpaces &&
(nextChar == '\n' || nextChar == ' ' && (mySeenEscapedSpacesOnly || isTrailingSpace(myStart+2)))
) {
return StringEscapesTokenTypes.VALID_STRING_ESCAPE_TOKEN;
}
if (nextChar == 'u') {
for(int i = myStart + 2; i < myStart + 6; i++) {
if (i >= myEnd || !StringUtil.isHexDigit(myBuffer.charAt(i))) return StringEscapesTokenTypes.INVALID_UNICODE_ESCAPE_TOKEN;
}
return StringEscapesTokenTypes.VALID_STRING_ESCAPE_TOKEN;
}
if (nextChar == 'x' && myAllowHex) {
for(int i = myStart + 2; i < myStart + 4; i++) {
if (i >= myEnd || !StringUtil.isHexDigit(myBuffer.charAt(i))) return StringEscapesTokenTypes.INVALID_UNICODE_ESCAPE_TOKEN;
}
return StringEscapesTokenTypes.VALID_STRING_ESCAPE_TOKEN;
}
switch (nextChar) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
if (!myAllowOctal) return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN;
//noinspection fallthrough
case 'n':
case 'r':
case 'b':
case 't':
case 'f':
case '\'':
case '\"':
case '\\':
return StringEscapesTokenTypes.VALID_STRING_ESCAPE_TOKEN;
}
if (myAdditionalValidEscapes != null && myAdditionalValidEscapes.indexOf(nextChar) != -1) {
return StringEscapesTokenTypes.VALID_STRING_ESCAPE_TOKEN;
}
return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN;
}
// all subsequent chars are escaped spaces
private boolean isTrailingSpace(final int start) {
for (int i=start;i<myBufferEnd;i+=2) {
final char c = myBuffer.charAt(i);
if (c != '\\') return false;
if (i==myBufferEnd-1) return false;
if (myBuffer.charAt(i+1) != ' ') return false;
}
return true;
}
@Override
public int getTokenStart() {
return myStart;
}
@Override
public int getTokenEnd() {
return myEnd;
}
private int locateToken(int start) {
if (start == myBufferEnd) {
myState = AFTER_LAST_QUOTE;
}
if (myState == AFTER_LAST_QUOTE) return start;
int i = start;
if (myBuffer.charAt(i) == '\\') {
LOG.assertTrue(myState == AFTER_FIRST_QUOTE);
i++;
if (i == myBufferEnd || myBuffer.charAt(i) == '\n' && !myCanEscapeEolOrFramingSpaces) {
myState = AFTER_LAST_QUOTE;
return i;
}
if (myAllowOctal && myBuffer.charAt(i) >= '0' && myBuffer.charAt(i) <= '7') {
char first = myBuffer.charAt(i);
i++;
if (i < myBufferEnd && myBuffer.charAt(i) >= '0' && myBuffer.charAt(i) <= '7') {
i++;
if (i < myBufferEnd && first <= '3' && myBuffer.charAt(i) >= '0' && myBuffer.charAt(i) <= '7') {
i++;
}
}
return i;
}
if (myAllowHex && myBuffer.charAt(i) == 'x') {
i++;
for (; i < start + 4; i++) {
if (i == myBufferEnd || myBuffer.charAt(i) == '\n' || myBuffer.charAt(i) == myQuoteChar) {
return i;
}
}
return i;
}
if (myBuffer.charAt(i) == 'u') {
i++;
for (; i < start + 6; i++) {
if (i == myBufferEnd || myBuffer.charAt(i) == '\n' || myBuffer.charAt(i) == myQuoteChar) {
return i;
}
}
return i;
}
else {
return i + 1;
}
}
LOG.assertTrue(myState == AFTER_FIRST_QUOTE || myBuffer.charAt(i) == myQuoteChar, this);
while (i < myBufferEnd) {
if (myBuffer.charAt(i) == '\\') {
return i;
}
if (myState == AFTER_FIRST_QUOTE && myBuffer.charAt(i) == myQuoteChar) {
if (i + 1 == myBufferEnd) myState = AFTER_LAST_QUOTE;
return i + 1;
}
i++;
myState = AFTER_FIRST_QUOTE;
}
return i;
}
@Override
public void advance() {
myLastState = myState;
myStart = myEnd;
myEnd = locateToken(myStart);
}
@NotNull
@Override
public CharSequence getBufferSequence() {
return myBuffer;
}
@Override
public int getBufferEnd() {
return myBufferEnd;
}
@SuppressWarnings("HardCodedStringLiteral")
@Override
public String toString() {
return "StringLiteralLexer {" +
"myAllowHex=" + myAllowHex +
", myAllowOctal=" + myAllowOctal +
", mySeenEscapedSpacesOnly=" + mySeenEscapedSpacesOnly +
", myAdditionalValidEscapes='" + myAdditionalValidEscapes + '\'' +
", myCanEscapeEolOrFramingSpaces=" + myCanEscapeEolOrFramingSpaces +
", myOriginalLiteralToken=" + myOriginalLiteralToken +
", myQuoteChar=" + myQuoteChar +
", myBufferEnd=" + myBufferEnd +
", myLastState=" + myLastState +
", myState=" + myState +
", myEnd=" + myEnd +
", myStart=" + myStart +
", myToken=" + (myBuffer == null ? null : myBuffer.subSequence(myStart, myEnd)) +
'}';
}
}