diff options
Diffstat (limited to 'scintilla/src/LexBash.cxx')
-rw-r--r-- | scintilla/src/LexBash.cxx | 521 |
1 files changed, 521 insertions, 0 deletions
diff --git a/scintilla/src/LexBash.cxx b/scintilla/src/LexBash.cxx new file mode 100644 index 0000000..ec3e04a --- /dev/null +++ b/scintilla/src/LexBash.cxx @@ -0,0 +1,521 @@ +// Scintilla source code edit control
+/** @file LexBash.cxx
+ ** Lexer for Bash.
+ **/
+// Copyright 2004-2008 by Neil Hodgson <neilh@scintilla.org>
+// Adapted from LexPerl by Kein-Hong Man 2004
+// The License.txt file describes the conditions under which this software may be distributed.
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+#include "Platform.h"
+
+#include "PropSet.h"
+#include "Accessor.h"
+#include "StyleContext.h"
+#include "KeyWords.h"
+#include "Scintilla.h"
+#include "SciLexer.h"
+#include "CharacterSet.h"
+
+#ifdef SCI_NAMESPACE
+using namespace Scintilla;
+#endif
+
+#define HERE_DELIM_MAX 256
+
+// define this if you want 'invalid octals' to be marked as errors
+// usually, this is not a good idea, permissive lexing is better
+#undef PEDANTIC_OCTAL
+
+#define BASH_BASE_ERROR 65
+#define BASH_BASE_DECIMAL 66
+#define BASH_BASE_HEX 67
+#ifdef PEDANTIC_OCTAL
+#define BASH_BASE_OCTAL 68
+#define BASH_BASE_OCTAL_ERROR 69
+#endif
+
+static inline int translateBashDigit(int ch) {
+ if (ch >= '0' && ch <= '9') {
+ return ch - '0';
+ } else if (ch >= 'a' && ch <= 'z') {
+ return ch - 'a' + 10;
+ } else if (ch >= 'A' && ch <= 'Z') {
+ return ch - 'A' + 36;
+ } else if (ch == '@') {
+ return 62;
+ } else if (ch == '_') {
+ return 63;
+ }
+ return BASH_BASE_ERROR;
+}
+
+static inline int getBashNumberBase(char *s) {
+ int i = 0;
+ int base = 0;
+ while (*s) {
+ base = base * 10 + (*s++ - '0');
+ i++;
+ }
+ if (base > 64 || i > 2) {
+ return BASH_BASE_ERROR;
+ }
+ return base;
+}
+
+static int opposite(int ch) {
+ if (ch == '(') return ')';
+ if (ch == '[') return ']';
+ if (ch == '{') return '}';
+ if (ch == '<') return '>';
+ return ch;
+}
+
+static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle,
+ WordList *keywordlists[], Accessor &styler) {
+
+ WordList &keywords = *keywordlists[0];
+
+ CharacterSet setWordStart(CharacterSet::setAlpha, "_");
+ // note that [+-] are often parts of identifiers in shell scripts
+ CharacterSet setWord(CharacterSet::setAlphaNum, "._+-");
+ CharacterSet setBashOperator(CharacterSet::setNone, "^&\\%()-+=|{}[]:;>,*/<?!.~@");
+ CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
+ CharacterSet setParam(CharacterSet::setAlphaNum, "$_");
+ CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!");
+ CharacterSet setHereDoc2(CharacterSet::setAlphaNum, "_-+!");
+ CharacterSet setLeftShift(CharacterSet::setDigits, "=$");
+
+ class HereDocCls { // Class to manage HERE document elements
+ public:
+ int State; // 0: '<<' encountered
+ // 1: collect the delimiter
+ // 2: here doc text (lines after the delimiter)
+ int Quote; // the char after '<<'
+ bool Quoted; // true if Quote in ('\'','"','`')
+ bool Indent; // indented delimiter (for <<-)
+ int DelimiterLength; // strlen(Delimiter)
+ char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
+ HereDocCls() {
+ State = 0;
+ Quote = 0;
+ Quoted = false;
+ Indent = 0;
+ DelimiterLength = 0;
+ Delimiter = new char[HERE_DELIM_MAX];
+ Delimiter[0] = '\0';
+ }
+ void Append(int ch) {
+ Delimiter[DelimiterLength++] = static_cast<char>(ch);
+ Delimiter[DelimiterLength] = '\0';
+ }
+ ~HereDocCls() {
+ delete []Delimiter;
+ }
+ };
+ HereDocCls HereDoc;
+
+ class QuoteCls { // Class to manage quote pairs (simplified vs LexPerl)
+ public:
+ int Count;
+ int Up, Down;
+ QuoteCls() {
+ Count = 0;
+ Up = '\0';
+ Down = '\0';
+ }
+ void Open(int u) {
+ Count++;
+ Up = u;
+ Down = opposite(Up);
+ }
+ void Start(int u) {
+ Count = 0;
+ Open(u);
+ }
+ };
+ QuoteCls Quote;
+
+ int numBase = 0;
+ int digit;
+ unsigned int endPos = startPos + length;
+
+ // Backtrack to beginning of style if required...
+ // If in a long distance lexical state, backtrack to find quote characters
+ if (initStyle == SCE_SH_HERE_Q) {
+ while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_SH_HERE_DELIM)) {
+ startPos--;
+ }
+ startPos = styler.LineStart(styler.GetLine(startPos));
+ initStyle = styler.StyleAt(startPos - 1);
+ }
+ // Bash strings can be multi-line with embedded newlines, so backtrack.
+ // Bash numbers have additional state during lexing, so backtrack too.
+ if (initStyle == SCE_SH_STRING
+ || initStyle == SCE_SH_BACKTICKS
+ || initStyle == SCE_SH_CHARACTER
+ || initStyle == SCE_SH_NUMBER
+ || initStyle == SCE_SH_IDENTIFIER
+ || initStyle == SCE_SH_COMMENTLINE) {
+ while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
+ startPos--;
+ }
+ initStyle = SCE_SH_DEFAULT;
+ }
+
+ StyleContext sc(startPos, endPos - startPos, initStyle, styler);
+
+ for (; sc.More(); sc.Forward()) {
+
+ // Determine if the current state should terminate.
+ switch (sc.state) {
+ case SCE_SH_OPERATOR:
+ sc.SetState(SCE_SH_DEFAULT);
+ break;
+ case SCE_SH_WORD:
+ // "." never used in Bash variable names but used in file names
+ if (!setWord.Contains(sc.ch)) {
+ char s[1000];
+ sc.GetCurrent(s, sizeof(s));
+ if (s[0] != '-' && // for file operators
+ !keywords.InList(s)) {
+ sc.ChangeState(SCE_SH_IDENTIFIER);
+ }
+ sc.SetState(SCE_SH_DEFAULT);
+ }
+ break;
+ case SCE_SH_IDENTIFIER:
+ if (sc.chPrev == '\\') { // for escaped chars
+ sc.ForwardSetState(SCE_SH_DEFAULT);
+ } else if (!setWord.Contains(sc.ch)) {
+ sc.SetState(SCE_SH_DEFAULT);
+ }
+ break;
+ case SCE_SH_NUMBER:
+ digit = translateBashDigit(sc.ch);
+ if (numBase == BASH_BASE_DECIMAL) {
+ if (sc.ch == '#') {
+ char s[10];
+ sc.GetCurrent(s, sizeof(s));
+ numBase = getBashNumberBase(s);
+ if (numBase != BASH_BASE_ERROR)
+ break;
+ } else if (IsADigit(sc.ch))
+ break;
+ } else if (numBase == BASH_BASE_HEX) {
+ if (IsADigit(sc.ch, 16))
+ break;
+#ifdef PEDANTIC_OCTAL
+ } else if (numBase == BASH_BASE_OCTAL ||
+ numBase == BASH_BASE_OCTAL_ERROR) {
+ if (digit <= 7)
+ break;
+ if (digit <= 9) {
+ numBase = BASH_BASE_OCTAL_ERROR;
+ break;
+ }
+#endif
+ } else if (numBase == BASH_BASE_ERROR) {
+ if (digit <= 9)
+ break;
+ } else { // DD#DDDD number style handling
+ if (digit != BASH_BASE_ERROR) {
+ if (numBase <= 36) {
+ // case-insensitive if base<=36
+ if (digit >= 36) digit -= 26;
+ }
+ if (digit < numBase)
+ break;
+ if (digit <= 9) {
+ numBase = BASH_BASE_ERROR;
+ break;
+ }
+ }
+ }
+ // fallthrough when number is at an end or error
+ if (numBase == BASH_BASE_ERROR
+#ifdef PEDANTIC_OCTAL
+ || numBase == BASH_BASE_OCTAL_ERROR
+#endif
+ ) {
+ sc.ChangeState(SCE_SH_ERROR);
+ }
+ sc.SetState(SCE_SH_DEFAULT);
+ break;
+ case SCE_SH_COMMENTLINE:
+ if (sc.atLineEnd && sc.chPrev != '\\') {
+ sc.SetState(SCE_SH_DEFAULT);
+ }
+ break;
+ case SCE_SH_HERE_DELIM:
+ // From Bash info:
+ // ---------------
+ // Specifier format is: <<[-]WORD
+ // Optional '-' is for removal of leading tabs from here-doc.
+ // Whitespace acceptable after <<[-] operator
+ //
+ if (HereDoc.State == 0) { // '<<' encountered
+ HereDoc.Quote = sc.chNext;
+ HereDoc.Quoted = false;
+ HereDoc.DelimiterLength = 0;
+ HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
+ if (sc.chNext == '\'' || sc.chNext == '\"') { // a quoted here-doc delimiter (' or ")
+ sc.Forward();
+ HereDoc.Quoted = true;
+ HereDoc.State = 1;
+ } else if (!HereDoc.Indent && sc.chNext == '-') { // <<- indent case
+ HereDoc.Indent = true;
+ } else if (setHereDoc.Contains(sc.chNext)) {
+ // an unquoted here-doc delimiter, no special handling
+ // TODO check what exactly bash considers part of the delim
+ HereDoc.State = 1;
+ } else if (sc.chNext == '<') { // HERE string <<<
+ sc.Forward();
+ sc.ForwardSetState(SCE_SH_DEFAULT);
+ } else if (IsASpace(sc.chNext)) {
+ // eat whitespace
+ } else if (setLeftShift.Contains(sc.chNext)) {
+ // left shift << or <<= operator cases
+ sc.ChangeState(SCE_SH_OPERATOR);
+ sc.ForwardSetState(SCE_SH_DEFAULT);
+ } else {
+ // symbols terminates; deprecated zero-length delimiter
+ HereDoc.State = 1;
+ }
+ } else if (HereDoc.State == 1) { // collect the delimiter
+ if (setHereDoc2.Contains(sc.ch) || sc.chPrev == '\\') {
+ HereDoc.Append(sc.ch);
+ } else if (HereDoc.Quoted && sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
+ sc.ForwardSetState(SCE_SH_DEFAULT);
+ } else if (sc.ch == '\\') {
+ // skip escape prefix
+ } else {
+ sc.SetState(SCE_SH_DEFAULT);
+ }
+ if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { // force blowup
+ sc.SetState(SCE_SH_ERROR);
+ HereDoc.State = 0;
+ }
+ }
+ break;
+ case SCE_SH_HERE_Q:
+ // HereDoc.State == 2
+ if (sc.atLineStart) {
+ sc.SetState(SCE_SH_HERE_Q);
+ int prefixws = 0;
+ while (IsASpace(sc.ch) && !sc.atLineEnd) { // whitespace prefix
+ sc.Forward();
+ prefixws++;
+ }
+ if (prefixws > 0)
+ sc.SetState(SCE_SH_HERE_Q);
+ while (!sc.atLineEnd) {
+ sc.Forward();
+ }
+ char s[HERE_DELIM_MAX];
+ sc.GetCurrent(s, sizeof(s));
+ if (sc.LengthCurrent() == 0)
+ break;
+ if (s[strlen(s) - 1] == '\r')
+ s[strlen(s) - 1] = '\0';
+ if (strcmp(HereDoc.Delimiter, s) == 0) {
+ if ((prefixws > 0 && HereDoc.Indent) || // indentation rule
+ (prefixws == 0 && !HereDoc.Indent)) {
+ sc.SetState(SCE_SH_DEFAULT);
+ break;
+ }
+ }
+ }
+ break;
+ case SCE_SH_SCALAR: // variable names
+ if (!setParam.Contains(sc.ch)) {
+ if (sc.LengthCurrent() == 1) {
+ // Special variable: $(, $_ etc.
+ sc.ForwardSetState(SCE_SH_DEFAULT);
+ } else {
+ sc.SetState(SCE_SH_DEFAULT);
+ }
+ }
+ break;
+ case SCE_SH_STRING: // delimited styles
+ case SCE_SH_CHARACTER:
+ case SCE_SH_BACKTICKS:
+ case SCE_SH_PARAM:
+ if (sc.ch == '\\' && Quote.Up != '\\') {
+ sc.Forward();
+ } else if (sc.ch == Quote.Down) {
+ Quote.Count--;
+ if (Quote.Count == 0) {
+ sc.ForwardSetState(SCE_SH_DEFAULT);
+ }
+ } else if (sc.ch == Quote.Up) {
+ Quote.Count++;
+ }
+ break;
+ }
+
+ // Must check end of HereDoc state 1 before default state is handled
+ if (HereDoc.State == 1 && sc.atLineEnd) {
+ // Begin of here-doc (the line after the here-doc delimiter):
+ // Lexically, the here-doc starts from the next line after the >>, but the
+ // first line of here-doc seem to follow the style of the last EOL sequence
+ HereDoc.State = 2;
+ if (HereDoc.Quoted) {
+ if (sc.state == SCE_SH_HERE_DELIM) {
+ // Missing quote at end of string! We are stricter than bash.
+ // Colour here-doc anyway while marking this bit as an error.
+ sc.ChangeState(SCE_SH_ERROR);
+ }
+ // HereDoc.Quote always == '\''
+ }
+ sc.SetState(SCE_SH_HERE_Q);
+ }
+
+ // Determine if a new state should be entered.
+ if (sc.state == SCE_SH_DEFAULT) {
+ if (sc.ch == '\\') { // escaped character
+ sc.SetState(SCE_SH_IDENTIFIER);
+ } else if (IsADigit(sc.ch)) {
+ sc.SetState(SCE_SH_NUMBER);
+ numBase = BASH_BASE_DECIMAL;
+ if (sc.ch == '0') { // hex,octal
+ if (sc.chNext == 'x' || sc.chNext == 'X') {
+ numBase = BASH_BASE_HEX;
+ sc.Forward();
+ } else if (IsADigit(sc.chNext)) {
+#ifdef PEDANTIC_OCTAL
+ numBase = BASH_BASE_OCTAL;
+#else
+ numBase = BASH_BASE_HEX;
+#endif
+ }
+ }
+ } else if (setWordStart.Contains(sc.ch)) {
+ sc.SetState(SCE_SH_WORD);
+ } else if (sc.ch == '#') {
+ sc.SetState(SCE_SH_COMMENTLINE);
+ } else if (sc.ch == '\"') {
+ sc.SetState(SCE_SH_STRING);
+ Quote.Start(sc.ch);
+ } else if (sc.ch == '\'') {
+ sc.SetState(SCE_SH_CHARACTER);
+ Quote.Start(sc.ch);
+ } else if (sc.ch == '`') {
+ sc.SetState(SCE_SH_BACKTICKS);
+ Quote.Start(sc.ch);
+ } else if (sc.ch == '$') {
+ sc.SetState(SCE_SH_SCALAR);
+ sc.Forward();
+ if (sc.ch == '{') {
+ sc.ChangeState(SCE_SH_PARAM);
+ } else if (sc.ch == '\'') {
+ sc.ChangeState(SCE_SH_CHARACTER);
+ } else if (sc.ch == '"') {
+ sc.ChangeState(SCE_SH_STRING);
+ } else if (sc.ch == '(' || sc.ch == '`') {
+ sc.ChangeState(SCE_SH_BACKTICKS);
+ if (sc.chNext == '(') { // $(( is lexed as operator
+ sc.ChangeState(SCE_SH_OPERATOR);
+ }
+ } else {
+ continue; // scalar has no delimiter pair
+ }
+ // fallthrough, open delim for $[{'"(`]
+ Quote.Start(sc.ch);
+ } else if (sc.Match('<', '<')) {
+ sc.SetState(SCE_SH_HERE_DELIM);
+ HereDoc.State = 0;
+ HereDoc.Indent = false;
+ } else if (sc.ch == '-' && // one-char file test operators
+ setSingleCharOp.Contains(sc.chNext) &&
+ !setWord.Contains(sc.GetRelative(2)) &&
+ IsASpace(sc.chPrev)) {
+ sc.SetState(SCE_SH_WORD);
+ sc.Forward();
+ } else if (setBashOperator.Contains(sc.ch)) {
+ sc.SetState(SCE_SH_OPERATOR);
+ }
+ }
+ }
+ sc.Complete();
+}
+
+static bool IsCommentLine(int line, Accessor &styler) {
+ int pos = styler.LineStart(line);
+ int eol_pos = styler.LineStart(line + 1) - 1;
+ for (int i = pos; i < eol_pos; i++) {
+ char ch = styler[i];
+ if (ch == '#')
+ return true;
+ else if (ch != ' ' && ch != '\t')
+ return false;
+ }
+ return false;
+}
+
+static void FoldBashDoc(unsigned int startPos, int length, int, WordList *[],
+ Accessor &styler) {
+ bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
+ bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
+ unsigned int endPos = startPos + length;
+ int visibleChars = 0;
+ int lineCurrent = styler.GetLine(startPos);
+ int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
+ int levelCurrent = levelPrev;
+ char chNext = styler[startPos];
+ int styleNext = styler.StyleAt(startPos);
+ for (unsigned int i = startPos; i < endPos; i++) {
+ char ch = chNext;
+ chNext = styler.SafeGetCharAt(i + 1);
+ int style = styleNext;
+ styleNext = styler.StyleAt(i + 1);
+ bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
+ // Comment folding
+ if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
+ {
+ if (!IsCommentLine(lineCurrent - 1, styler)
+ && IsCommentLine(lineCurrent + 1, styler))
+ levelCurrent++;
+ else if (IsCommentLine(lineCurrent - 1, styler)
+ && !IsCommentLine(lineCurrent + 1, styler))
+ levelCurrent--;
+ }
+ if (style == SCE_SH_OPERATOR) {
+ if (ch == '{') {
+ levelCurrent++;
+ } else if (ch == '}') {
+ levelCurrent--;
+ }
+ }
+ if (atEOL) {
+ int lev = levelPrev;
+ if (visibleChars == 0 && foldCompact)
+ lev |= SC_FOLDLEVELWHITEFLAG;
+ if ((levelCurrent > levelPrev) && (visibleChars > 0))
+ lev |= SC_FOLDLEVELHEADERFLAG;
+ if (lev != styler.LevelAt(lineCurrent)) {
+ styler.SetLevel(lineCurrent, lev);
+ }
+ lineCurrent++;
+ levelPrev = levelCurrent;
+ visibleChars = 0;
+ }
+ if (!isspacechar(ch))
+ visibleChars++;
+ }
+ // Fill in the real level of the next line, keeping the current flags as they will be filled in later
+ int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
+ styler.SetLevel(lineCurrent, levelPrev | flagsNext);
+}
+
+static const char * const bashWordListDesc[] = {
+ "Keywords",
+ 0
+};
+
+LexerModule lmBash(SCLEX_BASH, ColouriseBashDoc, "bash", FoldBashDoc, bashWordListDesc);
|