summaryrefslogtreecommitdiffstats
path: root/scintilla/lexlib/StyleContext.h
diff options
context:
space:
mode:
Diffstat (limited to 'scintilla/lexlib/StyleContext.h')
-rw-r--r--scintilla/lexlib/StyleContext.h150
1 files changed, 69 insertions, 81 deletions
diff --git a/scintilla/lexlib/StyleContext.h b/scintilla/lexlib/StyleContext.h
index 2af34cd..abd69e9 100644
--- a/scintilla/lexlib/StyleContext.h
+++ b/scintilla/lexlib/StyleContext.h
@@ -1,5 +1,5 @@
// Scintilla source code edit control
-/** @file StyleContext.cxx
+/** @file StyleContext.h
** Lexer infrastructure.
**/
// Copyright 1998-2004 by Neil Hodgson <neilh@scintilla.org>
@@ -19,67 +19,36 @@ static inline int MakeLowerCase(int ch) {
return ch - 'A' + 'a';
}
-inline int UnicodeCodePoint(const unsigned char *us) {
- if (us[0] < 0xC2) {
- return us[0];
- } else if (us[0] < 0xE0) {
- return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
- } else if (us[0] < 0xF0) {
- return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
- } else if (us[0] < 0xF5) {
- return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
- }
- return us[0];
-}
-
-inline int BytesInUnicodeCodePoint(int codePoint) {
- if (codePoint < 0x80)
- return 1;
- else if (codePoint < 0x800)
- return 2;
- else if (codePoint < 0x10000)
- return 3;
- else
- return 4;
-}
-
// All languages handled so far can treat all characters >= 0x80 as one class
// which just continues the current token or starts an identifier if in default.
// DBCS treated specially as the second character can be < 0x80 and hence
// syntactically significant. UTF-8 avoids this as all trail bytes are >= 0x80
class StyleContext {
LexAccessor &styler;
+ IDocumentWithLineEnd *multiByteAccess;
unsigned int endPos;
unsigned int lengthDocument;
+
+ // Used for optimizing GetRelativeCharacter
+ unsigned int posRelative;
+ unsigned int currentPosLastRelative;
+ int offsetRelative;
+
StyleContext &operator=(const StyleContext &);
- void GetNextChar(unsigned int pos) {
- chNext = static_cast<unsigned char>(styler.SafeGetCharAt(pos+1, 0));
- if (styler.Encoding() == encUnicode) {
- if (chNext >= 0x80) {
- unsigned char bytes[4] = { static_cast<unsigned char>(chNext), 0, 0, 0 };
- for (int trail=1; trail<3; trail++) {
- bytes[trail] = static_cast<unsigned char>(styler.SafeGetCharAt(pos+1+trail, 0));
- if (!((bytes[trail] >= 0x80) && (bytes[trail] < 0xc0))) {
- bytes[trail] = 0;
- break;
- }
- }
- chNext = UnicodeCodePoint(bytes);
- }
- } else if (styler.Encoding() == encDBCS) {
- if (styler.IsLeadByte(static_cast<char>(chNext))) {
- chNext = chNext << 8;
- chNext |= static_cast<unsigned char>(styler.SafeGetCharAt(pos+2, 0));
- }
+ void GetNextChar() {
+ if (multiByteAccess) {
+ chNext = multiByteAccess->GetCharacterAndWidth(currentPos+width, &widthNext);
+ } else {
+ chNext = static_cast<unsigned char>(styler.SafeGetCharAt(currentPos+width, 0));
+ widthNext = 1;
}
- // End of line?
- // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win)
- // or on LF alone (Unix). Avoid triggering two times on Dos/Win.
+ // End of line determined from line end position, allowing CR, LF,
+ // CRLF and Unicode line ends as set by document.
if (currentLine < lineDocEnd)
- atLineEnd = static_cast<int>(pos) >= (lineStartNext-1);
+ atLineEnd = static_cast<int>(currentPos) >= (lineStartNext-1);
else // Last line
- atLineEnd = static_cast<int>(pos) >= lineStartNext;
+ atLineEnd = static_cast<int>(currentPos) >= lineStartNext;
}
public:
@@ -92,12 +61,18 @@ public:
int state;
int chPrev;
int ch;
+ int width;
int chNext;
+ int widthNext;
StyleContext(unsigned int startPos, unsigned int length,
int initStyle, LexAccessor &styler_, char chMask=31) :
styler(styler_),
+ multiByteAccess(0),
endPos(startPos + length),
+ posRelative(0),
+ currentPosLastRelative(0x7FFFFFFF),
+ offsetRelative(0),
currentPos(startPos),
currentLine(-1),
lineStartNext(-1),
@@ -105,7 +80,12 @@ public:
state(initStyle & chMask), // Mask off all bits which aren't in the chMask.
chPrev(0),
ch(0),
- chNext(0) {
+ width(0),
+ chNext(0),
+ widthNext(1) {
+ if (styler.Encoding() != enc8bit) {
+ multiByteAccess = styler.MultiByteAccess();
+ }
styler.StartAt(startPos, chMask);
styler.StartSegment(startPos);
currentLine = styler.GetLine(startPos);
@@ -115,21 +95,14 @@ public:
endPos++;
lineDocEnd = styler.GetLine(lengthDocument);
atLineStart = static_cast<unsigned int>(styler.LineStart(currentLine)) == startPos;
- unsigned int pos = currentPos;
- ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos, 0));
- if (styler.Encoding() == encUnicode) {
- // Get the current char
- GetNextChar(pos-1);
- ch = chNext;
- pos += BytesInUnicodeCodePoint(ch) - 1;
- } else if (styler.Encoding() == encDBCS) {
- if (styler.IsLeadByte(static_cast<char>(ch))) {
- pos++;
- ch = ch << 8;
- ch |= static_cast<unsigned char>(styler.SafeGetCharAt(pos, 0));
- }
- }
- GetNextChar(pos);
+
+ // Variable width is now 0 so GetNextChar gets the char at currentPos into chNext/widthNext
+ width = 0;
+ GetNextChar();
+ ch = chNext;
+ width = widthNext;
+
+ GetNextChar();
}
void Complete() {
styler.ColourTo(currentPos - ((currentPos > lengthDocument) ? 2 : 1), state);
@@ -146,23 +119,10 @@ public:
lineStartNext = styler.LineStart(currentLine+1);
}
chPrev = ch;
- if (styler.Encoding() == encUnicode) {
- currentPos += BytesInUnicodeCodePoint(ch);
- } else if (styler.Encoding() == encDBCS) {
- currentPos++;
- if (ch >= 0x100)
- currentPos++;
- } else {
- currentPos++;
- }
+ currentPos += width;
ch = chNext;
- if (styler.Encoding() == encUnicode) {
- GetNextChar(currentPos + BytesInUnicodeCodePoint(ch)-1);
- } else if (styler.Encoding() == encDBCS) {
- GetNextChar(currentPos + ((ch >= 0x100) ? 1 : 0));
- } else {
- GetNextChar(currentPos);
- }
+ width = widthNext;
+ GetNextChar();
} else {
atLineStart = false;
chPrev = ' ';
@@ -176,6 +136,12 @@ public:
Forward();
}
}
+ void ForwardBytes(int nb) {
+ size_t forwardPos = currentPos + nb;
+ while (forwardPos > currentPos) {
+ Forward();
+ }
+ }
void ChangeState(int state_) {
state = state_;
}
@@ -194,6 +160,28 @@ public:
int GetRelative(int n) {
return static_cast<unsigned char>(styler.SafeGetCharAt(currentPos+n, 0));
}
+ int GetRelativeCharacter(int n) {
+ if (n == 0)
+ return ch;
+ if (multiByteAccess) {
+ if ((currentPosLastRelative != currentPos) ||
+ ((n > 0) && ((offsetRelative < 0) || (n < offsetRelative))) ||
+ ((n < 0) && ((offsetRelative > 0) || (n > offsetRelative)))) {
+ posRelative = currentPos;
+ offsetRelative = 0;
+ }
+ int diffRelative = n - offsetRelative;
+ int posNew = multiByteAccess->GetRelativePosition(posRelative, diffRelative);
+ int ch = multiByteAccess->GetCharacterAndWidth(posNew, 0);
+ posRelative = posNew;
+ currentPosLastRelative = currentPos;
+ offsetRelative = n;
+ return ch;
+ } else {
+ // fast version for single byte encodings
+ return static_cast<unsigned char>(styler.SafeGetCharAt(currentPos + n, 0));
+ }
+ }
bool Match(char ch0) const {
return ch == static_cast<unsigned char>(ch0);
}