update scintilla (HG 9739512b19c4)

git-svn-id: https://notepad2-mod.googlecode.com/svn/trunk@741 28bd50df-7adb-d945-0439-6e466c6a13cc
author: XhmikosR <xhmikosr@users.sourceforge.net> 2012-05-27 15:08:02 +0000
committer: XhmikosR <xhmikosr@users.sourceforge.net> 2012-05-27 15:08:02 +0000
commit: 9e03d34a5411bd1736e67f7d6a5a4a7ec4b42345 (patch)
tree: 10c2f7519222c810563a80f3e02e99bcd79401ee
parent: 72f1da668f0176d3cbea477af14e3c0ab6cfa504 (diff)
download: notepad2-mod-9e03d34a5411bd1736e67f7d6a5a4a7ec4b42345.zip
notepad2-mod-9e03d34a5411bd1736e67f7d6a5a4a7ec4b42345.tar.gz
notepad2-mod-9e03d34a5411bd1736e67f7d6a5a4a7ec4b42345.tar.bz2
15 files changed, 335 insertions, 209 deletions
diff --git a/Readme-mod.txt b/Readme-mod.txt
index 71b09ee..d6ad800 100644
--- a/Readme-mod.txt
+++ b/Readme-mod.txt
@@ -31,6 +31,6 @@ Ctrl+Alt+F2         Expand selection to next match.
 Ctrl+Alt+Shift+F2   Expand selection to previous match.
 Ctrl+Shift+Enter    New line with toggled auto indent option.
 
-Notepad2-mod 4.2.25 has been created with Scintilla 3.1.0 HG 326600b303dc.
+Notepad2-mod 4.2.25 has been created with Scintilla 3.1.0 HG 9739512b19c4.
 You can use WDK 7.1, MSVC 2010 or Intel C++ Compiler XE 2011 SP1 Update 10
 to build Notepad2-mod.
diff --git a/scintilla/doc/ScintillaDoc.html b/scintilla/doc/ScintillaDoc.html
index e02da3c..307b664 100644
--- a/scintilla/doc/ScintillaDoc.html
+++ b/scintilla/doc/ScintillaDoc.html
@@ -387,6 +387,7 @@
      <a class="message" href="#SCI_APPENDTEXT">SCI_APPENDTEXT(int length, const char *s)</a><br />
      <a class="message" href="#SCI_INSERTTEXT">SCI_INSERTTEXT(int pos, const char *text)</a><br />
      <a class="message" href="#SCI_CLEARALL">SCI_CLEARALL</a><br />
+     <a class="message" href="#SCI_DELETERANGE">SCI_DELETERANGE(int pos, int deleteLength)</a><br />
      <a class="message" href="#SCI_CLEARDOCUMENTSTYLE">SCI_CLEARDOCUMENTSTYLE</a><br />
      <a class="message" href="#SCI_GETCHARAT">SCI_GETCHARAT(int position)</a><br />
      <a class="message" href="#SCI_GETSTYLEAT">SCI_GETSTYLEAT(int position)</a><br />
@@ -519,6 +520,9 @@
     <p><b id="SCI_CLEARALL">SCI_CLEARALL</b><br />
      Unless the document is read-only, this deletes all the text.</p>
 
+    <p><b id="SCI_DELETERANGE">SCI_DELETERANGE(int pos, int deleteLength)</b><br />
+     Deletes a range of text in the document.</p>
+
     <p><b id="SCI_CLEARDOCUMENTSTYLE">SCI_CLEARDOCUMENTSTYLE</b><br />
      When wanting to completely restyle the document, for example after choosing a lexer, the
     <code>SCI_CLEARDOCUMENTSTYLE</code> can be used to clear all styling information and reset the
@@ -4770,6 +4774,8 @@ struct Sci_RangeToFormat {
     <code><a class="message" href="#SCI_GETDIRECTFUNCTION">SCI_GETDIRECTFUNCTION</a><br />
      <a class="message" href="#SCI_GETDIRECTPOINTER">SCI_GETDIRECTPOINTER</a><br />
      <a class="message" href="#SCI_GETCHARACTERPOINTER">SCI_GETCHARACTERPOINTER</a><br />
+     <a class="message" href="#SCI_GETRANGEPOINTER">SCI_GETRANGEPOINTER(int position, int rangeLength)</a><br />
+     <a class="message" href="#SCI_GETGAPPOSITION">SCI_GETGAPPOSITION</a><br />
     </code>
 
     <p>On Windows, the message-passing scheme used to communicate between the container and
@@ -4815,8 +4821,13 @@ sptr_t CallScintilla(unsigned int iMessage, uptr_t wParam, sptr_t lParam){
     pass in the direct pointer associated with the target window.</p>
 
     <p><b id="SCI_GETCHARACTERPOINTER">SCI_GETCHARACTERPOINTER</b><br />
-     Move the gap within Scintilla so that the text of the document is stored consecutively
-     and ensure there is a NUL character after the text, then return a pointer to the first character.
+    <b id="SCI_GETRANGEPOINTER">SCI_GETRANGEPOINTER(int position, int rangeLength)</b><br />
+    <b id="SCI_GETGAPPOSITION">SCI_GETGAPPOSITION</b><br />
+     Grant temporary direct read-only access to the memory used by Scintilla to store
+     the document.</p>
+     <p><code>SCI_GETCHARACTERPOINTER</code> moves the gap within Scintilla so that the
+     text of the document is stored consecutively
+     and ensure there is a NUL character after the text, then returns a pointer to the first character.
      Applications may then pass this to a function that accepts a character pointer such as a regular
      expression search or a parser. The pointer should <em>not</em> be written to as that may desynchronize
      the internal state of Scintilla.</p>
@@ -4833,6 +4844,15 @@ sptr_t CallScintilla(unsigned int iMessage, uptr_t wParam, sptr_t lParam){
      each replacement then the operation will become O(n^2) rather than O(n). Instead, all
      matches should be found and remembered, then all the replacements performed.</p>
 
+     <p><code>SCI_GETRANGEPOINTER</code> provides direct access to just the
+     range requested. The gap is not moved unless it is within the requested range so this call
+     can be faster than <code>SCI_GETCHARACTERPOINTER</code>. 
+     This can be used by application code that is able to act on blocks of text or ranges of lines.</p>
+
+     <p><code>SCI_GETGAPPOSITION</code> returns the current gap position.
+     This is a hint that applications can use to avoid calling <code>SCI_GETRANGEPOINTER</code>
+     with a range that contains the gap and consequent costs of moving the gap.</p>
+
     <h2 id="MultipleViews">Multiple views</h2>
 
     <p>A Scintilla window and the document that it displays are separate entities. When you create
diff --git a/scintilla/include/Platform.h b/scintilla/include/Platform.h
index 46c3642..0f9fb00 100644
--- a/scintilla/include/Platform.h
+++ b/scintilla/include/Platform.h
@@ -20,6 +20,7 @@
 #define PLAT_MACOSX 0
 #define PLAT_WIN 0
 #define PLAT_WX  0
+#define PLAT_QT 0
 #define PLAT_FOX 0
 
 #if defined(FOX)
@@ -34,6 +35,10 @@
 #undef PLAT_GTK
 #define PLAT_GTK 1
 
+#elif defined(SCINTILLA_QT)
+#undef PLAT_QT
+#define PLAT_QT 1
+
 #if defined(__WIN32__) || defined(_MSC_VER)
 #undef PLAT_GTK_WIN32
 #define PLAT_GTK_WIN32 1
@@ -511,4 +516,10 @@ public:
 #pragma warning(disable: 4244 4309 4514 4710)
 #endif
 
+#if defined(__GNUC__) && defined(SCINTILLA_QT)
+#pragma GCC diagnostic ignored "-Wmissing-braces"
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#pragma GCC diagnostic ignored "-Wchar-subscripts"
+#endif
+
 #endif
diff --git a/scintilla/include/Scintilla.h b/scintilla/include/Scintilla.h
index b9697e9..17a60c1 100644
--- a/scintilla/include/Scintilla.h
+++ b/scintilla/include/Scintilla.h
@@ -51,6 +51,7 @@ typedef sptr_t (*SciFnDirect)(sptr_t ptr, unsigned int iMessage, uptr_t wParam,
 #define SCI_ADDSTYLEDTEXT 2002
 #define SCI_INSERTTEXT 2003
 #define SCI_CLEARALL 2004
+#define SCI_DELETERANGE 2645
 #define SCI_CLEARDOCUMENTSTYLE 2005
 #define SCI_GETLENGTH 2006
 #define SCI_GETCHARAT 2007
@@ -728,6 +729,8 @@ typedef sptr_t (*SciFnDirect)(sptr_t ptr, unsigned int iMessage, uptr_t wParam,
 #define SCI_GETPOSITIONCACHE 2515
 #define SCI_COPYALLOWLINE 2519
 #define SCI_GETCHARACTERPOINTER 2520
+#define SCI_GETRANGEPOINTER 2643
+#define SCI_GETGAPPOSITION 2644
 #define SCI_SETKEYSUNICODE 2521
 #define SCI_GETKEYSUNICODE 2522
 #define SCI_INDICSETALPHA 2523
diff --git a/scintilla/include/Scintilla.iface b/scintilla/include/Scintilla.iface
index 7c51494..adc637c 100644
--- a/scintilla/include/Scintilla.iface
+++ b/scintilla/include/Scintilla.iface
@@ -101,6 +101,9 @@ fun void InsertText=2003(position pos, string text)
 # Delete all text in the document.
 fun void ClearAll=2004(,)
 
+# Delete a range of text in the document.
+fun void DeleteRange=2645(position pos, int deleteLength)
+
 # Set all style bytes to 0, remove all folding information.
 fun void ClearDocumentStyle=2005(,)
 
@@ -1924,6 +1927,15 @@ fun void CopyAllowLine=2519(,)
 # characters in the document.
 get int GetCharacterPointer=2520(,)
 
+# Return a read-only pointer to a range of characters in the document.
+# May move the gap so that the range is contiguous, but will only move up
+# to rangeLength bytes.
+get int GetRangePointer=2643(int position, int rangeLength)
+
+# Return a position which, to avoid performance costs, should not be within
+# the range of a call to GetRangePointer.
+get position GetGapPosition=2644(,)
+
 # Always interpret keyboard input as Unicode
 set void SetKeysUnicode=2521(bool keysUnicode,)
 
diff --git a/scintilla/lexers/LexHTML.cxx b/scintilla/lexers/LexHTML.cxx
index 7bda6cf..fd781e7 100644
--- a/scintilla/lexers/LexHTML.cxx
+++ b/scintilla/lexers/LexHTML.cxx
@@ -2134,7 +2134,8 @@ static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initSty
 		break;
 	default:
 		StateToPrint = statePrintForState(state, inScriptType);
-		styler.ColourTo(lengthDoc - 1, StateToPrint);
+		if (static_cast<int>(styler.GetStartSegment()) < lengthDoc)
+			styler.ColourTo(lengthDoc - 1, StateToPrint);
 		break;
 	}
 
diff --git a/scintilla/src/CellBuffer.cxx b/scintilla/src/CellBuffer.cxx
index 9fb25c2..003710f 100644
--- a/scintilla/src/CellBuffer.cxx
+++ b/scintilla/src/CellBuffer.cxx
@@ -375,6 +375,14 @@ const char *CellBuffer::BufferPointer() {
 	return substance.BufferPointer();
 }
 
+const char *CellBuffer::RangePointer(int position, int rangeLength) {
+	return substance.RangePointer(position, rangeLength);
+}
+
+int CellBuffer::GapPosition() const {
+	return substance.GapPosition();
+}
+
 // The char* returned is to an allocation owned by the undo history
 const char *CellBuffer::InsertString(int position, const char *s, int insertLength, bool &startSequence) {
 	char *data = 0;
diff --git a/scintilla/src/CellBuffer.h b/scintilla/src/CellBuffer.h
index 37bc58c..29743af 100644
--- a/scintilla/src/CellBuffer.h
+++ b/scintilla/src/CellBuffer.h
@@ -157,6 +157,8 @@ public:
 	char StyleAt(int position) const;
 	void GetStyleRange(unsigned char *buffer, int position, int lengthRetrieve) const;
 	const char *BufferPointer();
+	const char *RangePointer(int position, int rangeLength);
+	int GapPosition() const;
 
 	int Length() const;
 	void Allocate(int newSize);
diff --git a/scintilla/src/Document.cxx b/scintilla/src/Document.cxx
index 20387f5..f5ac169 100644
--- a/scintilla/src/Document.cxx
+++ b/scintilla/src/Document.cxx
@@ -112,6 +112,8 @@ Document::Document() {
 	matchesValid = false;
 	regex = 0;
 
+	UTF8BytesOfLeadInitialise();
+
 	perLineData[ldMarkers] = new LineMarkers();
 	perLineData[ldLevels] = new LineLevels();
 	perLineData[ldState] = new LineState();
@@ -449,19 +451,13 @@ int Document::LenChar(int pos) {
 	} else if (IsCrLf(pos)) {
 		return 2;
 	} else if (SC_CP_UTF8 == dbcsCodePage) {
-		unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
-		if (ch < 0x80)
-			return 1;
-		int len = 2;
-		if (ch >= (0x80 + 0x40 + 0x20 + 0x10))
-			len = 4;
-		else if (ch >= (0x80 + 0x40 + 0x20))
-			len = 3;
+		const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
+		const int widthCharBytes = UTF8BytesOfLead[leadByte];
 		int lengthDoc = Length();
-		if ((pos + len) > lengthDoc)
-			return lengthDoc -pos;
+		if ((pos + widthCharBytes) > lengthDoc)
+			return lengthDoc - pos;
 		else
-			return len;
+			return widthCharBytes;
 	} else if (dbcsCodePage) {
 		return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 	} else {
@@ -469,51 +465,29 @@ int Document::LenChar(int pos) {
 	}
 }
 
-static inline bool IsTrailByte(int ch) {
-	return (ch >= 0x80) && (ch < 0xc0);
-}
-
-static int BytesFromLead(int leadByte) {
-	if (leadByte > 0xF4) {
-		// Characters longer than 4 bytes not possible in current UTF-8
-		return 0;
-	} else if (leadByte >= 0xF0) {
-		return 4;
-	} else if (leadByte >= 0xE0) {
-		return 3;
-	} else if (leadByte >= 0xC2) {
-		return 2;
-	}
-	return 0;
-}
-
 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
-	int lead = pos;
-	while ((lead>0) && (pos-lead < 4) && IsTrailByte(static_cast<unsigned char>(cb.CharAt(lead-1))))
-		lead--;
-	start = 0;
-	if (lead > 0) {
-		start = lead-1;
-	}
-	int leadByte = static_cast<unsigned char>(cb.CharAt(start));
-	int bytes = BytesFromLead(leadByte);
-	if (bytes == 0) {
+	int trail = pos;
+	while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
+		trail--;
+	start = (trail > 0) ? trail-1 : trail;
+
+	const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
+	const int widthCharBytes = UTF8BytesOfLead[leadByte];
+	if (widthCharBytes == 1) {
 		return false;
 	} else {
-		int trailBytes = bytes - 1;
-		int len = pos - lead + 1;
+		int trailBytes = widthCharBytes - 1;
+		int len = pos - start;
 		if (len > trailBytes)
 			// pos too far from lead
 			return false;
-		// Check that there are enough trails for this lead
-		int trail = pos + 1;
-		while ((trail-lead<trailBytes) && (trail < Length())) {
-			if (!IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail)))) {
-				return false;
-			}
-			trail++;
-		}
-		end = start + bytes;
+		char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
+		for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
+			charBytes[b] = cb.CharAt(static_cast<int>(start+b));
+		int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
+		if (utf8status & UTF8MaskInvalid)
+			return false;
+		end = start + widthCharBytes;
 		return true;
 	}
 }
@@ -542,14 +516,18 @@ int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
 	if (dbcsCodePage) {
 		if (SC_CP_UTF8 == dbcsCodePage) {
 			unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
-			int startUTF = pos;
-			int endUTF = pos;
-			if (IsTrailByte(ch) && InGoodUTF8(pos, startUTF, endUTF)) {
-				// ch is a trail byte within a UTF-8 character
-				if (moveDir > 0)
-					pos = endUTF;
-				else
-					pos = startUTF;
+			// If ch is not a trail byte then pos is valid intercharacter position
+			if (UTF8IsTrailByte(ch)) {
+				int startUTF = pos;
+				int endUTF = pos;
+				if (InGoodUTF8(pos, startUTF, endUTF)) {
+					// ch is a trail byte within a UTF-8 character
+					if (moveDir > 0)
+						pos = endUTF;
+					else
+						pos = startUTF;
+				}
+				// Else invalid UTF-8 so return position of isolated trail byte
 			}
 		} else {
 			// Anchor DBCS calculations at start of line because start of line can
@@ -596,16 +574,37 @@ int Document::NextPosition(int pos, int moveDir) const {
 
 	if (dbcsCodePage) {
 		if (SC_CP_UTF8 == dbcsCodePage) {
-			pos += increment;
-			unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
-			int startUTF = pos;
-			int endUTF = pos;
-			if (IsTrailByte(ch) && InGoodUTF8(pos, startUTF, endUTF)) {
-				// ch is a trail byte within a UTF-8 character
-				if (moveDir > 0)
-					pos = endUTF;
-				else
-					pos = startUTF;
+			if (increment == 1) {
+				// Simple forward movement case so can avoid some checks
+				const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
+				if (UTF8IsAscii(leadByte)) {
+					// Single byte character or invalid
+					pos++;
+				} else {
+					const int widthCharBytes = UTF8BytesOfLead[leadByte];
+					char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
+					for (int b=1; b<widthCharBytes; b++)
+						charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
+					int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
+					if (utf8status & UTF8MaskInvalid)
+						pos++;
+					else
+						pos += utf8status & UTF8MaskWidth;
+				}
+			} else {
+				// Examine byte before position
+				pos--;
+				unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
+				// If ch is not a trail byte then pos is valid intercharacter position
+				if (UTF8IsTrailByte(ch)) {
+					// If ch is a trail byte in a valid UTF-8 character then return start of character
+					int startUTF = pos;
+					int endUTF = pos;
+					if (InGoodUTF8(pos, startUTF, endUTF)) {
+						pos = startUTF;
+					}
+					// Else invalid UTF-8 so return position of isolated trail byte
+				}
 			}
 		} else {
 			if (moveDir > 0) {
@@ -720,12 +719,7 @@ int Document::SafeSegment(const char *text, int length, int lengthSegment) {
 		lastEncodingAllowedBreak = j;
 
 		if (dbcsCodePage == SC_CP_UTF8) {
-			if (ch < 0x80) {
-				j++;
-			} else {
-				int bytes = BytesFromLead(ch);
-				j += bytes ? bytes : 1;
-			}
+			j += UTF8BytesOfLead[ch];
 		} else if (dbcsCodePage) {
 			j += IsDBCSLeadByte(ch) ? 2 : 1;
 		} else {
@@ -1255,7 +1249,7 @@ int Document::ParaDown(int pos) {
 }
 
 CharClassify::cc Document::WordCharClass(unsigned char ch) {
-	if ((SC_CP_UTF8 == dbcsCodePage) && (ch >= 0x80))
+	if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
 		return CharClassify::ccWord;
 	return charClass.GetClass(ch);
 }
@@ -1382,19 +1376,6 @@ static inline char MakeLowerCase(char ch) {
 		return static_cast<char>(ch - 'A' + 'a');
 }
 
-size_t Document::ExtractChar(int pos, char *bytes) {
-	unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
-	size_t widthChar = UTF8CharLength(ch);
-	bytes[0] = ch;
-	for (size_t i=1; i<widthChar; i++) {
-		bytes[i] = cb.CharAt(static_cast<int>(pos+i));
-		if (!IsTrailByte(static_cast<unsigned char>(bytes[i]))) { // Bad byte
-			widthChar = 1;
-		}
-	}
-	return widthChar;
-}
-
 CaseFolderTable::CaseFolderTable() {
 	for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {
 		mapping[iChar] = static_cast<char>(iChar);
@@ -1470,49 +1451,61 @@ long Document::FindText(int minPos, int maxPos, const char *search,
 		}
 		if (caseSensitive) {
 			const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
+			const char charStartSearch =  search[0];
 			while (forward ? (pos < endSearch) : (pos >= endSearch)) {
-				bool found = (pos + lengthFind) <= limitPos;
-				for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
-					found = CharAt(pos + indexSearch) == search[indexSearch];
-				}
-				if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
-					return pos;
+				if (CharAt(pos) == charStartSearch) {
+					bool found = (pos + lengthFind) <= limitPos;
+					for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
+						found = CharAt(pos + indexSearch) == search[indexSearch];
+					}
+					if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
+						return pos;
+					}
 				}
 				if (!NextCharacter(pos, increment))
 					break;
 			}
 		} else if (SC_CP_UTF8 == dbcsCodePage) {
-			const size_t maxBytesCharacter = 4;
 			const size_t maxFoldingExpansion = 4;
-			std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
+			std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
 			const int lenSearch = static_cast<int>(
 				pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
+			char bytes[UTF8MaxBytes + 1];
+			char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
 			while (forward ? (pos < endPos) : (pos >= endPos)) {
 				int widthFirstCharacter = 0;
-				int indexDocument = 0;
+				int posIndexDocument = pos;
 				int indexSearch = 0;
 				bool characterMatches = true;
-				while (characterMatches &&
-					((pos + indexDocument) < limitPos) &&
-					(indexSearch < lenSearch)) {
-					char bytes[maxBytesCharacter + 1];
-					bytes[maxBytesCharacter] = 0;
-					const int widthChar = static_cast<int>(ExtractChar(pos + indexDocument, bytes));
+				for (;;) {
+					const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
+					bytes[0] = leadByte;
+					int widthChar = 1;
+					if (!UTF8IsAscii(leadByte)) {
+						const int widthCharBytes = UTF8BytesOfLead[leadByte];
+						for (int b=1; b<widthCharBytes; b++) {
+							bytes[b] = cb.CharAt(posIndexDocument+b);
+						}
+						widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
+					}
 					if (!widthFirstCharacter)
 						widthFirstCharacter = widthChar;
-					if ((pos + indexDocument + widthChar) > limitPos)
+					if ((posIndexDocument + widthChar) > limitPos)
 						break;
-					char folded[maxBytesCharacter * maxFoldingExpansion + 1];
 					const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
 					folded[lenFlat] = 0;
 					// Does folded match the buffer
 					characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
-					indexDocument += widthChar;
+					if (!characterMatches)
+						break;
+					posIndexDocument += widthChar;
 					indexSearch += lenFlat;
+					if (indexSearch >= lenSearch)
+						break;
 				}
 				if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
-					if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
-						*length = indexDocument;
+					if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
+						*length = posIndexDocument - pos;
 						return pos;
 					}
 				}
diff --git a/scintilla/src/Document.h b/scintilla/src/Document.h
index 64571e6..dcdafd4 100644
--- a/scintilla/src/Document.h
+++ b/scintilla/src/Document.h
@@ -298,6 +298,8 @@ public:
 	void SetSavePoint();
 	bool IsSavePoint() { return cb.IsSavePoint(); }
 	const char * SCI_METHOD BufferPointer() { return cb.BufferPointer(); }
+	const char *RangePointer(int position, int rangeLength) { return cb.RangePointer(position, rangeLength); }
+	int GapPosition() const { return cb.GapPosition(); }
 
 	int SCI_METHOD GetLineIndentation(int line);
 	void SetLineIndentation(int line, int indent);
@@ -352,7 +354,6 @@ public:
 	int NextWordEnd(int pos, int delta);
 	int SCI_METHOD Length() const { return cb.Length(); }
 	void Allocate(int newSize) { cb.Allocate(newSize); }
-	size_t ExtractChar(int pos, char *bytes);
 	bool MatchesWordOptions(bool word, bool wordStart, int pos, int length);
 	long FindText(int minPos, int maxPos, const char *search, bool caseSensitive, bool word,
 		bool wordStart, bool regExp, int flags, int *length, CaseFolder *pcf);
diff --git a/scintilla/src/Editor.cxx b/scintilla/src/Editor.cxx
index e6d71c2..4d3a7ff 100644
--- a/scintilla/src/Editor.cxx
+++ b/scintilla/src/Editor.cxx
@@ -36,6 +36,7 @@
 #include "CharClassify.h"
 #include "Decoration.h"
 #include "Document.h"
+#include "UniConversion.h"
 #include "Selection.h"
 #include "PositionCache.h"
 #include "Editor.h"
@@ -2051,92 +2052,18 @@ LineLayout *Editor::RetrieveLineLayout(int lineNumber) {
 	        LinesOnScreen() + 1, pdoc->LinesTotal());
 }
 
-static bool GoodTrailByte(int v) {
-	return (v >= 0x80) && (v < 0xc0);
-}
-
 bool BadUTF(const char *s, int len, int &trailBytes) {
 	// For the rules: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
 	if (trailBytes) {
 		trailBytes--;
 		return false;
 	}
-	const unsigned char *us = reinterpret_cast<const unsigned char *>(s);
-	if (*us < 0x80) {
-		// Single bytes easy
-		return false;
-	} else if (*us > 0xF4) {
-		// Characters longer than 4 bytes not possible in current UTF-8
-		return true;
-	} else if (*us >= 0xF0) {
-		// 4 bytes
-		if (len < 4)
-			return true;
-		if (GoodTrailByte(us[1]) && GoodTrailByte(us[2]) && GoodTrailByte(us[3])) {
-			if (*us == 0xf4) {
-				// Check if encoding a value beyond the last Unicode character 10FFFF
-				if (us[1] > 0x8f) {
-					return true;
-				} else if (us[1] == 0x8f) {
-					if (us[2] > 0xbf) {
-						return true;
-					} else if (us[2] == 0xbf) {
-						if (us[3] > 0xbf) {
-							return true;
-						}
-					}
-				}
-			} else if ((*us == 0xf0) && ((us[1] & 0xf0) == 0x80)) {
-				// Overlong
-				return true;
-			}
-			trailBytes = 3;
-			return false;
-		} else {
-			return true;
-		}
-	} else if (*us >= 0xE0) {
-		// 3 bytes
-		if (len < 3)
-			return true;
-		if (GoodTrailByte(us[1]) && GoodTrailByte(us[2])) {
-			if ((*us == 0xe0) && ((us[1] & 0xe0) == 0x80)) {
-				// Overlong
-				return true;
-			}
-			if ((*us == 0xed) && ((us[1] & 0xe0) == 0xa0)) {
-				// Surrogate
-				return true;
-			}
-			if ((*us == 0xef) && (us[1] == 0xbf) && (us[2] == 0xbe)) {
-				// U+FFFE
-				return true;
-			}
-			if ((*us == 0xef) && (us[1] == 0xbf) && (us[2] == 0xbf)) {
-				// U+FFFF
-				return true;
-			}
-			trailBytes = 2;
-			return false;
-		} else {
-			return true;
-		}
-	} else if (*us >= 0xC2) {
-		// 2 bytes
-		if (len < 2)
-			return true;
-		if (GoodTrailByte(us[1])) {
-			trailBytes = 1;
-			return false;
-		} else {
-			return true;
-		}
-	} else if (*us >= 0xC0) {
-		// Overlong encoding
+	int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(s), len);
+	if (utf8status & UTF8MaskInvalid) {
 		return true;
 	} else {
-		// Trail byte
-		return true;
+		trailBytes = (utf8status & UTF8MaskWidth) - 1;
+		return false;
 	}
 }
 
@@ -2160,11 +2087,7 @@ void Editor::LayoutLine(int line, Surface *surface, ViewStyle &vstyle, LineLayou
 	if (ll->validity == LineLayout::llCheckTextAndStyle) {
 		int lineLength = posLineEnd - posLineStart;
 		if (!vstyle.viewEOL) {
-			int cid = posLineEnd - 1;
-			while ((cid > posLineStart) && IsEOLChar(pdoc->CharAt(cid))) {
-				cid--;
-				lineLength--;
-			}
+			lineLength = pdoc->LineEnd(line) - posLineStart;
 		}
 		if (lineLength == ll->numCharsInLine) {
 			// See if chars, styles, indicators, are all the same
@@ -2221,10 +2144,7 @@ void Editor::LayoutLine(int line, Surface *surface, ViewStyle &vstyle, LineLayou
 		const int lineLength = posLineEnd - posLineStart;
 		pdoc->GetCharRange(ll->chars, posLineStart, lineLength);
 		pdoc->GetStyleRange(ll->styles, posLineStart, lineLength);
-		int numCharsBeforeEOL = lineLength;
-		while ((numCharsBeforeEOL > 0) && IsEOLChar(ll->chars[numCharsBeforeEOL-1])) {
-			numCharsBeforeEOL--;
-		}
+		int numCharsBeforeEOL = pdoc->LineEnd(line) - posLineStart;
 		const int numCharsInLine = (vstyle.viewEOL) ? lineLength : numCharsBeforeEOL;
 		for (int styleInLine = 0; styleInLine < numCharsInLine; styleInLine++) {
 			styleByte = ll->styles[styleInLine];
@@ -2428,7 +2348,7 @@ ColourDesired Editor::TextBackground(ViewStyle &vsDraw, bool overrideBackground,
 	} else {
 		if ((vsDraw.edgeState == EDGE_BACKGROUND) &&
 		        (i >= ll->edgeColumn) &&
-		        !IsEOLChar(ll->chars[i]))
+		        (i < ll->numCharsBeforeEOL))
 			return vsDraw.edgecolour;
 		if (inHotspot && vsDraw.hotspotBackgroundSet)
 			return vsDraw.hotspotBackground;
@@ -7577,6 +7497,10 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {
 		ClearAll();
 		return 0;
 
+	case SCI_DELETERANGE:
+		pdoc->DeleteChars(wParam, lParam);
+		return 0;
+
 	case SCI_CLEARDOCUMENTSTYLE:
 		ClearDocumentStyle();
 		return 0;
@@ -9007,6 +8931,12 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {
 	case SCI_GETCHARACTERPOINTER:
 		return reinterpret_cast<sptr_t>(pdoc->BufferPointer());
 
+	case SCI_GETRANGEPOINTER:
+		return reinterpret_cast<sptr_t>(pdoc->RangePointer(wParam, lParam));
+
+	case SCI_GETGAPPOSITION:
+		return pdoc->GapPosition();
+
 	case SCI_SETEXTRAASCENT:
 		vs.extraAscent = wParam;
 		InvalidateStyleRedraw();
diff --git a/scintilla/src/SplitVector.h b/scintilla/src/SplitVector.h
index 69ae3e7..c64358e 100644
--- a/scintilla/src/SplitVector.h
+++ b/scintilla/src/SplitVector.h
@@ -261,6 +261,24 @@ public:
 		body[lengthBody] = 0;
 		return body;
 	}
+
+	T *RangePointer(int position, int rangeLength) {
+		if (position < part1Length) {
+			if ((position + rangeLength) > part1Length) {
+				// Range overlaps gap, so move gap to start of range.
+				GapTo(position);
+				return body + position + gapLength;
+			} else {
+				return body + position ;
+			}
+		} else {
+			return body + position + gapLength;
+		}
+	}
+
+	int GapPosition() const {
+		return part1Length; 
+	}
 };
 
 #endif
diff --git a/scintilla/src/UniConversion.cxx b/scintilla/src/UniConversion.cxx
index e965c58..ed26c5c 100644
--- a/scintilla/src/UniConversion.cxx
+++ b/scintilla/src/UniConversion.cxx
@@ -129,3 +129,120 @@ unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsig
 	}
 	return ui;
 }
+
+int UTF8BytesOfLead[256];
+static bool initialisedBytesOfLead = false;
+
+static int BytesFromLead(int leadByte) {
+	if (leadByte < 0xC2) {
+		// Single byte or invalid
+		return 1;
+	} else if (leadByte < 0xE0) {
+		return 2;
+	} else if (leadByte < 0xF0) {
+		return 3;
+	} else if (leadByte < 0xF5) {
+		return 4;
+	} else {
+		// Characters longer than 4 bytes not possible in current UTF-8
+		return 1;
+	}
+}
+
+void UTF8BytesOfLeadInitialise() {
+	if (!initialisedBytesOfLead) {
+		for (int i=0;i<256;i++) {
+			UTF8BytesOfLead[i] = BytesFromLead(i);
+		}
+		initialisedBytesOfLead = true;
+	}
+}
+
+// Return both the width of the first character in the string and a status
+// saying whether it is valid or invalid.
+// Most invalid sequences return a width of 1 so are treated as isolated bytes but
+// the non-characters *FFFE, *FFFF and FDD0 .. FDEF return 3 or 4 as they can be
+// reasonably treated as code points in some circumstances. They will, however,
+// not have associated glyphs.
+int UTF8Classify(const unsigned char *us, int len) {
+	// For the rules: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
+	if (*us < 0x80) {
+		// Single bytes easy
+		return 1;
+	} else if (*us > 0xf4) {
+		// Characters longer than 4 bytes not possible in current UTF-8
+		return UTF8MaskInvalid | 1;
+	} else if (*us >= 0xf0) {
+		// 4 bytes
+		if (len < 4)
+			return UTF8MaskInvalid | 1;
+		if (UTF8IsTrailByte(us[1]) && UTF8IsTrailByte(us[2]) && UTF8IsTrailByte(us[3])) {
+			if (((us[1] & 0xf) == 0xf) && (us[2] == 0xbf) && ((us[3] == 0xbe) || (us[3] == 0xbf))) {
+				// *FFFE or *FFFF non-character
+				return UTF8MaskInvalid | 4;
+			}
+			if (*us == 0xf4) {
+				// Check if encoding a value beyond the last Unicode character 10FFFF
+				if (us[1] > 0x8f) {
+					return UTF8MaskInvalid | 1;
+				} else if (us[1] == 0x8f) {
+					if (us[2] > 0xbf) {
+						return UTF8MaskInvalid | 1;
+					} else if (us[2] == 0xbf) {
+						if (us[3] > 0xbf) {
+							return UTF8MaskInvalid | 1;
+						}
+					}
+				}
+			} else if ((*us == 0xf0) && ((us[1] & 0xf0) == 0x80)) {
+				// Overlong
+				return UTF8MaskInvalid | 1;
+			}
+			return 4;
+		} else {
+			return UTF8MaskInvalid | 1;
+		}
+	} else if (*us >= 0xe0) {
+		// 3 bytes
+		if (len < 3)
+			return UTF8MaskInvalid | 1;
+		if (UTF8IsTrailByte(us[1]) && UTF8IsTrailByte(us[2])) {
+			if ((*us == 0xe0) && ((us[1] & 0xe0) == 0x80)) {
+				// Overlong
+				return UTF8MaskInvalid | 1;
+			}
+			if ((*us == 0xed) && ((us[1] & 0xe0) == 0xa0)) {
+				// Surrogate
+				return UTF8MaskInvalid | 1;
+			}
+			if ((*us == 0xef) && (us[1] == 0xbf) && (us[2] == 0xbe)) {
+				// U+FFFE non-character - 3 bytes long
+				return UTF8MaskInvalid | 3;
+			}
+			if ((*us == 0xef) && (us[1] == 0xbf) && (us[2] == 0xbf)) {
+				// U+FFFF non-character - 3 bytes long
+				return UTF8MaskInvalid | 3;
+			}
+			if ((*us == 0xef) && (us[1] == 0xb7) && (((us[2] & 0xf0) == 0x90) || ((us[2] & 0xf0) == 0xa0))) {
+				// U+FDD0 .. U+FDEF
+				return UTF8MaskInvalid | 3;
+			}
+			return 3;
+		} else {
+			return UTF8MaskInvalid | 1;
+		}
+	} else if (*us >= 0xc2) {
+		// 2 bytes
+		if (len < 2)
+			return UTF8MaskInvalid | 1;
+		if (UTF8IsTrailByte(us[1])) {
+			return 2;
+		} else {
+			return UTF8MaskInvalid | 1;
+		}
+	} else {
+		// 0xc0 .. 0xc1 is overlong encoding
+		// 0x80 .. 0xbf is trail byte
+		return UTF8MaskInvalid | 1;
+	}
+}
diff --git a/scintilla/src/UniConversion.h b/scintilla/src/UniConversion.h
index 222e55f..28b491c 100644
--- a/scintilla/src/UniConversion.h
+++ b/scintilla/src/UniConversion.h
@@ -5,9 +5,24 @@
 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
 // The License.txt file describes the conditions under which this software may be distributed.
 
+const int UTF8MaxBytes = 4;
+
 unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen);
 void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len);
 unsigned int UTF8CharLength(unsigned char ch);
 unsigned int UTF16Length(const char *s, unsigned int len);
 unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen);
 
+extern int UTF8BytesOfLead[256];
+void UTF8BytesOfLeadInitialise();
+
+inline bool UTF8IsTrailByte(int ch) {
+	return (ch >= 0x80) && (ch < 0xc0);
+}
+
+inline bool UTF8IsAscii(int ch) {
+	return ch < 0x80;
+}
+
+enum { UTF8MaskWidth=0x7, UTF8MaskInvalid=0x8 };
+int UTF8Classify(const unsigned char *us, int len);
diff --git a/scintilla/win32/PlatWin.cxx b/scintilla/win32/PlatWin.cxx
index a791001..483e981 100644
--- a/scintilla/win32/PlatWin.cxx
+++ b/scintilla/win32/PlatWin.cxx
@@ -1019,6 +1019,8 @@ void SurfaceGDI::MeasureWidths(Font &font_, const char *s, int len, XYPOSITION *
 			} else if (fit < lenBlock) {
 				// For some reason, such as an incomplete DBCS character
 				// Not all the positions are filled in so make them equal to end.
+				if (fit == 0)
+					poses.buffer[fit++] = 0;
 				for (int i = fit;i<lenBlock;i++)
 					poses.buffer[i] = poses.buffer[fit-1];
 			}
@@ -1574,7 +1576,6 @@ void SurfaceD2D::Copy(PRectangle rc, Point from, Surface &surfaceSource) {
 
 void SurfaceD2D::DrawTextCommon(PRectangle rc, Font &font_, XYPOSITION ybase, const char *s, int len, UINT) {
 	SetFont(font_);
-	RECT rcw = RectFromPRectangle(rc);
 
 	// Use Unicode calls
 	const TextWide tbuf(s, len, unicodeMode, codePage);
@@ -1587,13 +1588,7 @@ void SurfaceD2D::DrawTextCommon(PRectangle rc, Font &font_, XYPOSITION ybase, co
 		if (SUCCEEDED(hr)) {
 			D2D1_POINT_2F origin = {rc.left, ybase-yAscent};
 			pRenderTarget->DrawTextLayout(origin, pTextLayout, pBrush, D2D1_DRAW_TEXT_OPTIONS_NONE);
-		} else {
-			D2D1_RECT_F layoutRect = D2D1::RectF(
-				static_cast<FLOAT>(rcw.left) / dpiScaleX,
-				static_cast<FLOAT>(ybase-yAscent) / dpiScaleY,
-				static_cast<FLOAT>(rcw.right + 1) / dpiScaleX,
-				static_cast<FLOAT>(rcw.bottom) / dpiScaleY);
-			pRenderTarget->DrawText(tbuf.buffer, tbuf.tlen, pTextFormat, layoutRect, pBrush, D2D1_DRAW_TEXT_OPTIONS_NONE);
+			pTextLayout->Release();
 		}
 	}
 }
author	XhmikosR <xhmikosr@users.sourceforge.net>	2012-05-27 15:08:02 +0000
committer	XhmikosR <xhmikosr@users.sourceforge.net>	2012-05-27 15:08:02 +0000
commit	9e03d34a5411bd1736e67f7d6a5a4a7ec4b42345 (patch)
tree	10c2f7519222c810563a80f3e02e99bcd79401ee
parent	72f1da668f0176d3cbea477af14e3c0ab6cfa504 (diff)
download	notepad2-mod-9e03d34a5411bd1736e67f7d6a5a4a7ec4b42345.zip notepad2-mod-9e03d34a5411bd1736e67f7d6a5a4a7ec4b42345.tar.gz notepad2-mod-9e03d34a5411bd1736e67f7d6a5a4a7ec4b42345.tar.bz2