1 files changed, 69 insertions, 81 deletions
diff --git a/scintilla/lexlib/StyleContext.h b/scintilla/lexlib/StyleContext.h
index 2af34cd..abd69e9 100644
--- a/scintilla/lexlib/StyleContext.h
+++ b/scintilla/lexlib/StyleContext.h
@@ -1,5 +1,5 @@
 // Scintilla source code edit control
-/** @file StyleContext.cxx
+/** @file StyleContext.h
  ** Lexer infrastructure.
  **/
 // Copyright 1998-2004 by Neil Hodgson <neilh@scintilla.org>
@@ -19,67 +19,36 @@ static inline int MakeLowerCase(int ch) {
 		return ch - 'A' + 'a';
 }
 
-inline int UnicodeCodePoint(const unsigned char *us) {
-	if (us[0] < 0xC2) {
-		return us[0];
-	} else if (us[0] < 0xE0) {
-		return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
-	} else if (us[0] < 0xF0) {
-		return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
-	} else if (us[0] < 0xF5) {
-		return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
-	}
-	return us[0];
-}
-
-inline int BytesInUnicodeCodePoint(int codePoint) {
-	if (codePoint < 0x80)
-		return 1;
-	else if (codePoint < 0x800)
-		return 2;
-	else if (codePoint < 0x10000)
-		return 3;
-	else
-		return 4;
-}
-
 // All languages handled so far can treat all characters >= 0x80 as one class
 // which just continues the current token or starts an identifier if in default.
 // DBCS treated specially as the second character can be < 0x80 and hence
 // syntactically significant. UTF-8 avoids this as all trail bytes are >= 0x80
 class StyleContext {
 	LexAccessor &styler;
+	IDocumentWithLineEnd *multiByteAccess;
 	unsigned int endPos;
 	unsigned int lengthDocument;
+	
+	// Used for optimizing GetRelativeCharacter
+	unsigned int posRelative;
+	unsigned int currentPosLastRelative;
+	int offsetRelative;
+
 	StyleContext &operator=(const StyleContext &);
 
-	void GetNextChar(unsigned int pos) {
-		chNext = static_cast<unsigned char>(styler.SafeGetCharAt(pos+1, 0));
-		if (styler.Encoding() == encUnicode) {
-			if (chNext >= 0x80) {
-				unsigned char bytes[4] = { static_cast<unsigned char>(chNext), 0, 0, 0 };
-				for (int trail=1; trail<3; trail++) {
-					bytes[trail] = static_cast<unsigned char>(styler.SafeGetCharAt(pos+1+trail, 0));
-					if (!((bytes[trail] >= 0x80) && (bytes[trail] < 0xc0))) {
-						bytes[trail] = 0;
-						break;
-					}
-				}
-				chNext = UnicodeCodePoint(bytes);
-			}
-		} else if (styler.Encoding() == encDBCS) {
-			if (styler.IsLeadByte(static_cast<char>(chNext))) {
-				chNext = chNext << 8;
-				chNext |= static_cast<unsigned char>(styler.SafeGetCharAt(pos+2, 0));
-			}
+	void GetNextChar() {
+		if (multiByteAccess) {
+			chNext = multiByteAccess->GetCharacterAndWidth(currentPos+width, &widthNext);
+		} else {
+			chNext = static_cast<unsigned char>(styler.SafeGetCharAt(currentPos+width, 0));
+			widthNext = 1;
 		}
-		// End of line?
-		// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win)
-		// or on LF alone (Unix). Avoid triggering two times on Dos/Win.
+		// End of line determined from line end position, allowing CR, LF, 
+		// CRLF and Unicode line ends as set by document.
 		if (currentLine < lineDocEnd)
-			atLineEnd = static_cast<int>(pos) >= (lineStartNext-1);
+			atLineEnd = static_cast<int>(currentPos) >= (lineStartNext-1);
 		else // Last line
-			atLineEnd = static_cast<int>(pos) >= lineStartNext;
+			atLineEnd = static_cast<int>(currentPos) >= lineStartNext;
 	}
 
 public:
@@ -92,12 +61,18 @@ public:
 	int state;
 	int chPrev;
 	int ch;
+	int width;
 	int chNext;
+	int widthNext;
 
 	StyleContext(unsigned int startPos, unsigned int length,
                         int initStyle, LexAccessor &styler_, char chMask=31) :
 		styler(styler_),
+		multiByteAccess(0),
 		endPos(startPos + length),
+		posRelative(0),
+		currentPosLastRelative(0x7FFFFFFF),
+		offsetRelative(0),
 		currentPos(startPos),
 		currentLine(-1),
 		lineStartNext(-1),
@@ -105,7 +80,12 @@ public:
 		state(initStyle & chMask), // Mask off all bits which aren't in the chMask.
 		chPrev(0),
 		ch(0),
-		chNext(0) {
+		width(0),
+		chNext(0),
+		widthNext(1) {
+		if (styler.Encoding() != enc8bit) {
+			multiByteAccess = styler.MultiByteAccess();
+		}
 		styler.StartAt(startPos, chMask);
 		styler.StartSegment(startPos);
 		currentLine = styler.GetLine(startPos);
@@ -115,21 +95,14 @@ public:
 			endPos++;
 		lineDocEnd = styler.GetLine(lengthDocument);
 		atLineStart = static_cast<unsigned int>(styler.LineStart(currentLine)) == startPos;
-		unsigned int pos = currentPos;
-		ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos, 0));
-		if (styler.Encoding() == encUnicode) {
-			// Get the current char
-			GetNextChar(pos-1);
-			ch = chNext;
-			pos += BytesInUnicodeCodePoint(ch) - 1;
-		} else if (styler.Encoding() == encDBCS) {
-			if (styler.IsLeadByte(static_cast<char>(ch))) {
-				pos++;
-				ch = ch << 8;
-				ch |= static_cast<unsigned char>(styler.SafeGetCharAt(pos, 0));
-			}
-		}
-		GetNextChar(pos);
+
+		// Variable width is now 0 so GetNextChar gets the char at currentPos into chNext/widthNext
+		width = 0;
+		GetNextChar();
+		ch = chNext;
+		width = widthNext;
+
+		GetNextChar();
 	}
 	void Complete() {
 		styler.ColourTo(currentPos - ((currentPos > lengthDocument) ? 2 : 1), state);
@@ -146,23 +119,10 @@ public:
 				lineStartNext = styler.LineStart(currentLine+1);
 			}
 			chPrev = ch;
-			if (styler.Encoding() == encUnicode) {
-				currentPos += BytesInUnicodeCodePoint(ch);
-			} else if (styler.Encoding() == encDBCS) {
-				currentPos++;
-				if (ch >= 0x100)
-					currentPos++;
-			} else {
-				currentPos++;
-			}
+			currentPos += width;
 			ch = chNext;
-			if (styler.Encoding() == encUnicode) {
-				GetNextChar(currentPos + BytesInUnicodeCodePoint(ch)-1);
-			} else if (styler.Encoding() == encDBCS) {
-				GetNextChar(currentPos + ((ch >= 0x100) ? 1 : 0));
-			} else {
-				GetNextChar(currentPos);
-			}
+			width = widthNext;
+			GetNextChar();
 		} else {
 			atLineStart = false;
 			chPrev = ' ';
@@ -176,6 +136,12 @@ public:
 			Forward();
 		}
 	}
+	void ForwardBytes(int nb) {
+		size_t forwardPos = currentPos + nb;
+		while (forwardPos > currentPos) {
+			Forward();
+		}
+	}
 	void ChangeState(int state_) {
 		state = state_;
 	}
@@ -194,6 +160,28 @@ public:
 	int GetRelative(int n) {
 		return static_cast<unsigned char>(styler.SafeGetCharAt(currentPos+n, 0));
 	}
+	int GetRelativeCharacter(int n) {
+		if (n == 0)
+			return ch;
+		if (multiByteAccess) {
+			if ((currentPosLastRelative != currentPos) ||
+				((n > 0) && ((offsetRelative < 0) || (n < offsetRelative))) ||
+				((n < 0) && ((offsetRelative > 0) || (n > offsetRelative)))) {
+				posRelative = currentPos;
+				offsetRelative = 0;
+			}
+			int diffRelative = n - offsetRelative;
+			int posNew = multiByteAccess->GetRelativePosition(posRelative, diffRelative);
+			int ch = multiByteAccess->GetCharacterAndWidth(posNew, 0);
+			posRelative = posNew;
+			currentPosLastRelative = currentPos;
+			offsetRelative = n;
+			return ch;
+		} else {
+			// fast version for single byte encodings
+			return static_cast<unsigned char>(styler.SafeGetCharAt(currentPos + n, 0));
+		}
+	}
 	bool Match(char ch0) const {
 		return ch == static_cast<unsigned char>(ch0);
 	}