dparse.lexer

Types 19

aliasIdType = TokenIdType!(operators, dynamicTokens, keywords)

Token ID type for the D lexer.

aliasstr = tokenStringRepresentation!(IdType, operators, dynamicTokens, keywords)

Function used for converting an IdType to a string.

Examples

IdType c = tok!"case";
assert (str(c) == "case");

The token type in the D lexer

Same as Token, but doesn't contain child TriviaTokens

enumWhitespaceBehavior : ubyte

Configure whitespace handling

include = 0b0000_0000
skip = 0b0000_0001

Configure string lexing behavior

Fields
StringBehavior compilerDo not include quote characters, process escape sequences
StringBehavior includeQuoteCharsOpening quotes, closing quotes, and string suffixes are included in the string token
StringBehavior notEscapedString escape sequences are not replaced
StringBehavior sourceNot modified at all. Useful for formatters or highlighters
ubyte behavior
enumCommentBehavior : bool
intern = true
noIntern = false

Lexer configuration struct

Fields
string fileName
StringBehavior stringBehavior
WhitespaceBehavior whitespaceBehavior
CommentBehavior commentBehavior
aliasBasicTypes = AliasSeq!(tok!"int", tok!"bool", tok!"byte", tok!"cdouble", tok!"cent", tok!"cfloat", tok!"char", tok!"creal", tok!"dchar", tok!"double", tok!"float", tok!"idouble", tok!"ifloat", tok!"ireal", tok!"long", tok!"real", tok!"short", tok!"ubyte", tok!"ucent", tok!"uint", tok!"ulong", tok!"ushort", tok!"void", tok!"wchar")

Basic type token types.

aliasNumberLiterals = AliasSeq!(tok!"doubleLiteral", tok!"floatLiteral", tok!"idoubleLiteral", tok!"ifloatLiteral", tok!"intLiteral", tok!"longLiteral", tok!"realLiteral", tok!"irealLiteral", tok!"uintLiteral", tok!"ulongLiteral")

Number literal token types.

aliasIntegerLiterals = AliasSeq!(tok!"intLiteral", tok!"longLiteral", tok!"uintLiteral", tok!"ulongLiteral")

Number literal token types.

aliasOperators = AliasSeq!(tok!",", tok!".", tok!"..", tok!"...", tok!"/", tok!"/=", tok!"!", tok!"!<", tok!"!<=", tok!"!<>", tok!"!<>=", tok!"!=", tok!"!>", tok!"!>=", tok!"$", tok!"%", tok!"%=", tok!"&", tok!"&&", tok!"&=", tok!"(", tok!")", tok!"*", tok!"*=", tok!"+", tok!"++", tok!"+=", tok!"-", tok!"--", tok!"-=", tok!":", tok!";", tok!"<", tok!"<<", tok!"<<=", tok!"<=", tok!"<>", tok!"<>=", tok!"=", tok!"==", tok!"=>", tok!">", tok!">=", tok!">>", tok!">>=", tok!">>>", tok!">>>=", tok!"?", tok!"@", tok!"[", tok!"]", tok!"^", tok!"^=", tok!"^^", tok!"^^=", tok!"{", tok!"|", tok!"|=", tok!"||", tok!"}", tok!"~", tok!"~=")

Operator token types.

aliasKeywords = AliasSeq!(tok!"abstract", tok!"alias", tok!"align", tok!"asm", tok!"assert", tok!"auto", tok!"break", tok!"case", tok!"cast", tok!"catch", tok!"class", tok!"const", tok!"continue", tok!"debug", tok!"default", tok!"delegate", tok!"delete", tok!"deprecated", tok!"do", tok!"else", tok!"enum", tok!"export", tok!"extern", tok!"false", tok!"final", tok!"finally", tok!"for", tok!"foreach", tok!"foreach_reverse", tok!"function", tok!"goto", tok!"if", tok!"immutable", tok!"import", tok!"in", tok!"inout", tok!"interface", tok!"invariant", tok!"is", tok!"lazy", tok!"macro", tok!"mixin", tok!"module", tok!"new", tok!"nothrow", tok!"null", tok!"out", tok!"override", tok!"package", tok!"pragma", tok!"private", tok!"protected", tok!"public", tok!"pure", tok!"ref", tok!"return", tok!"scope", tok!"shared", tok!"static", tok!"struct", tok!"super", tok!"switch", tok!"synchronized", tok!"template", tok!"this", tok!"throw", tok!"true", tok!"try", tok!"typedef", tok!"typeid", tok!"typeof", tok!"union", tok!"unittest", tok!"version", tok!"while", tok!"with", tok!"__DATE__", tok!"__EOF__", tok!"__FILE__", tok!"__FILE_FULL_PATH__", tok!"__FUNCTION__", tok!"__gshared", tok!"__LINE__", tok!"__MODULE__", tok!"__parameters", tok!"__PRETTY_FUNCTION__", tok!"__TIME__", tok!"__TIMESTAMP__", tok!"__traits", tok!"__vector", tok!"__VENDOR__", tok!"__VERSION__")

Keyword token types.

aliasStringLiterals = AliasSeq!(tok!"dstringLiteral", tok!"stringLiteral", tok!"wstringLiteral")

String literal token types

aliasProtections = AliasSeq!(tok!"export", tok!"package", tok!"private", tok!"public", tok!"protected")

Protection token types.

aliasSpecialTokens = AliasSeq!(tok!"__DATE__", tok!"__TIME__", tok!"__TIMESTAMP__", tok!"__VENDOR__", tok!"__VERSION__", tok!"__FILE__", tok!"__FILE_FULL_PATH__", tok!"__LINE__", tok!"__MODULE__", tok!"__FUNCTION__", tok!"__PRETTY_FUNCTION__")
aliasLiterals = AliasSeq!(StringLiterals, NumberLiterals, tok!"characterLiteral", SpecialTokens, tok!"true", tok!"false", tok!"null", tok!"$")
structDLexer

The D lexer struct.

Fields
tokenStart
Message[] _messages
StringCache * cache
bool haveSSE42
IstringState[] istringStack
Methods
void popFront()() pure nothrow @safe
private void _popFrontNoIstring() pure nothrow @safe
const(Message[]) messages() const @propertyReturns: An array of all of the warnings and errors generated so far during lexing. It may make sense to only check this when `empty` returns `true`.
void lexWhitespace(ref Token token) @trusted
void lexNumber(ref Token token)
void lexHex(ref Token token)
void lexHex(ref Token token, size_t mark, size_t line, size_t column, size_t index) @trusted
void lexBinary(ref Token token)
void lexBinary(ref Token token, size_t mark, size_t line, size_t column, size_t index) @trusted
void lexDecimal(ref Token token)
void lexDecimal(ref Token token, size_t mark, size_t line, size_t column, size_t index) @trusted
void lexIntSuffix(ref IdType type) pure nothrow @safe
void lexFloatSuffix(ref IdType type) pure nothrow @safe
void lexExponent(ref IdType type) pure nothrow @safe
void lexScriptLine(ref Token token)
void lexSlashStarComment(ref Token token) @trusted
void lexSlashSlashComment(ref Token token) @trusted
void lexSlashPlusComment(ref Token token) @trusted
void lexStringLiteral(ref Token token) @trusted
void lexWysiwygString(ref Token token) @trusted
private ubyte lexStringSuffix() pure nothrow @safe
private ubyte lexStringSuffix(ref IdType type) pure nothrow @safe
void lexDelimitedString(ref Token token)
void lexNormalDelimitedString(ref Token token, size_t mark, size_t line, size_t column, size_t index, ubyte open, ubyte close)
void lexHeredocString(ref Token token, size_t mark, size_t line, size_t column, size_t index)
void lexTokenString(ref Token token)
void lexHexString(ref Token token)
void lexCharacterLiteral(ref Token token)
void lexIdentifier(ref Token token, const bool silent = false) @trusted
void lexDot(ref Token token)
void lexLongNewline(ref Token token) @nogc
bool isNewline() @nogc
bool isSeparating(size_t offset) @nogc
void error(ref Token token, string message)
void error(string message)
void warning(string message)
Constructors
this(R range, const LexerConfig config, StringCache * cache, bool haveSSE42 = sse42())Params: range = the bytes that compose the source code that will be lexed. config = the lexer configuration to use. cache = the string interning cache for de-duplicating identifiers and other token...
Nested Templates
MessageLexer error/warning message.
IstringState

The string cache is used for string interning.

It will only store a single copy of any string that it is asked to hold. Interned strings can be compared for equality by comparing their .ptr field.

Default and postbilt constructors are disabled. When a StringCache goes out of scope, the memory held by it is freed.

See_also: http://en.wikipedia.org/wiki/String_interning

Fields
defaultBucketCountThe default bucket count for the string cache.
BLOCK_SIZE
BIG_STRING
Node *[] buckets
Block * rootBlock
Methods
string intern(const(ubyte)[] str) @safeCaches a string.
string intern(string str) @trustedditto
string _intern(const(ubyte)[] bytes) @trusted
Node * find(const(ubyte)[] bytes, uint hash) @trusted
uint hashBytes(const(ubyte)[] data) pure nothrow @trusted @nogc
ubyte[] allocate(size_t numBytes) pure nothrow @trusted @nogc
Constructors
this(size_t bucketCount)Params: bucketCount = the initial number of buckets. Must be a power of two
Destructors
Nested Templates
Node
Block

Functions 17

fnbool isBasicType(IdType type) nothrow pure @safe @nogcReturns: true if the given ID is for a basic type.
fnbool isNumberLiteral(IdType type) nothrow pure @safe @nogcReturns: true if the given ID type is for a number literal.
fnbool isIntegerLiteral(IdType type) nothrow pure @safe @nogcReturns: true if the given ID type is for a integer literal.
fnbool isOperator(IdType type) nothrow pure @safe @nogcReturns: true if the given ID type is for an operator.
fnbool isKeyword(IdType type) pure nothrow @safe @nogcReturns: true if the given ID type is for a keyword.
fnbool isStringLiteral(IdType type) pure nothrow @safe @nogcReturns: true if the given ID type is for a string literal.
fnbool isProtection(IdType type) pure nothrow @safe @nogcReturns: true if the given ID type is for a protection attribute.
fnbool isSpecialToken(IdType type) pure nothrow @safe @nogc
fnbool isLiteral(IdType type) pure nothrow @safe @nogc
fnToken[] getTokensForParser(R)(R sourceCode, LexerConfig config, StringCache * cache) if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)Returns: an array of tokens lexed from the given source code to the output range. All whitespace, comment and specialTokenSequence tokens (trivia) are attached to the token nearest to them.
fnauto byToken(R)(R range) if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)Creates a token range from the given source code. Creates a default lexer configuration and a GC-managed string cache.
fnauto byToken(R)(R range, StringCache * cache) if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)Creates a token range from the given source code. Uses the given string cache.
fnauto byToken(R)(R range, const LexerConfig config, StringCache * cache) if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)Creates a token range from the given source code. Uses the provided lexer configuration and string cache.
fnsize_t optimalBucketCount(size_t size)Helper function used to avoid too much allocations while lexing.
private fnvoid * calloc(size_t, size_t) nothrow pure @nogc @trusted
private fnvoid * malloc(size_t) nothrow pure @nogc @trusted
private fnvoid free(void *) nothrow pure @nogc @trusted

Variables 7

private varoperators = [ ",", ".", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=", "!>", "!>=", "$", "%", "%=", "&", "&&", "&=", "(", ")", "*", "*=", "+", "++", "+=", "-", "--", "-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=", "==", "=>", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "]", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "}", "~", "~=" ]

Operators

private varkeywords = [ "abstract", "alias", "align", "asm", "assert", "auto", "bool", "break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat", "char", "class", "const", "continue", "creal", "dchar", "debug", "default", "delegate", "delete", "deprecated", "do", "double", "else", "enum", "export", "extern", "false", "final", "finally", "float", "for", "foreach", "foreach_reverse", "function", "goto", "idouble", "if", "ifloat", "immutable", "import", "in", "inout", "int", "interface", "invariant", "ireal", "is", "lazy", "long", "macro", "mixin", "module", "new", "nothrow", "null", "out", "override", "package", "pragma", "private", "protected", "public", "pure", "real", "ref", "return", "scope", "shared", "short", "static", "struct", "super", "switch", "synchronized", "template", "this", "throw", "true", "try", "typedef", "typeid", "typeof", "ubyte", "ucent", "uint", "ulong", "union", "unittest", "ushort", "version", "void", "wchar", "while", "with", "__DATE__", "__EOF__", "__FILE__", "__FILE_FULL_PATH__", "__FUNCTION__", "__gshared", "__LINE__", "__MODULE__", "__parameters", "__PRETTY_FUNCTION__", "__TIME__", "__TIMESTAMP__", "__traits", "__vector", "__VENDOR__", "__VERSION__" ]

Kewords

private vardynamicTokens = [ "specialTokenSequence", "comment", "identifier", "scriptLine", "whitespace", "doubleLiteral", "floatLiteral", "idoubleLiteral", "ifloatLiteral", "intLiteral", "longLiteral", "realLiteral", "irealLiteral", "uintLiteral", "ulongLiteral", "characterLiteral", "dstringLiteral", "stringLiteral", "wstringLiteral", "istringLiteralStart", "istringLiteralText", "istringLiteralEnd" ]

Other tokens

private varpseudoTokenHandlers = [ "\"", "lexStringLiteral", "`", "lexWysiwygString", "//", "lexSlashSlashComment", "/*", "lexSlashStarComment", "/+", "lexSlashPlusComment", ".", "lexDot", "'", "lexCharacterLiteral", "0", "lexNumber", "1", "lexDecimal", "2", "lexDecimal", "3", "lexDecimal", "4", "lexDecimal", "5", "lexDecimal", "6", "lexDecimal", "7", "lexDecimal", "8", "lexDecimal", "9", "lexDecimal", "i\"", "lexInterpolatedString", "i`", "lexInterpolatedString", "iq{", "lexInterpolatedString", "q\"", "lexDelimitedString", "q{", "lexTokenString", "r\"", "lexWysiwygString", "x\"", "lexHexString", " ", "lexWhitespace", "\t", "lexWhitespace", "\r", "lexWhitespace", "\n", "lexWhitespace", "\v", "lexWhitespace", "\f", "lexWhitespace", "\u2028", "lexLongNewline", "\u2029", "lexLongNewline", "#!", "lexScriptLine", "#line", "lexSpecialTokenSequence" ]
private varextraFields = "import dparse.lexer:TokenTriviaFields,TriviaToken; mixin TokenTriviaFields;"
private varextraFieldsBare = q{ import dparse.lexer : Token; this(Token token) pure nothrow @safe @nogc { this(token.type, token.text, token.line, token.column, token.index); } int opCmp(size_t i) const pure nothrow @safe @nogc { if (index < i) return -1; if (index > i) return 1; return 0; } int opCmp(ref const typeof(this) other) const pure nothrow @safe @nogc { return opCmp(other.index); } string toString() const @safe pure { import std.array : appender; auto sink = appender!string; toString(sink); return sink.data; } void toString(R)(auto ref R sink) const { import std.conv : to; import dparse.lexer : str; sink.put(`trivia!"`); sink.put(str(type)); sink.put(`"(`); sink.put("text: "); sink.put([text].to!string[1 .. $ - 1]); // escape hack sink.put(", index: "); sink.put(index.to!string); sink.put(", line: "); sink.put(line.to!string); sink.put(", column: "); sink.put(column.to!string); sink.put(")"); } }
private varstringBehaviorNotWorking = "Automatic string parsing is not " ~ "supported and was previously not working. To unescape strings use the " ~ "`dparse.strings : unescapeString` function on the token texts instead."

Templates 1

tmpltok(string token)

Template used to refer to D token types.

See the operators, keywords, and dynamicTokens enums for values that can be passed to this template. Example:

import dparse.lexer;
IdType t = tok!"floatLiteral";