dparse.lexer

var dynamicTokens extraFields extraFieldsBare keywords operators pseudoTokenHandlers stringBehaviorNotWorking

tmpl tok

alias BasicTypes IdType IntegerLiterals Keywords Literals NumberLiterals Operators Protections SpecialTokens str StringLiterals Token TriviaToken

enum CommentBehavior WhitespaceBehavior

struct DLexer LexerConfig StringBehavior StringCache

fn byToken calloc free getTokensForParser isBasicType isIntegerLiteral isKeyword isLiteral isNumberLiteral isOperator isProtection isSpecialToken isStringLiteral malloc optimalBucketCount

Types 19

aliasIdType = TokenIdType!(operators, dynamicTokens, keywords)

Token ID type for the D lexer.

aliasstr = tokenStringRepresentation!(IdType, operators, dynamicTokens, keywords)

Function used for converting an IdType to a string.

Examples

IdType c = tok!"case";
assert (str(c) == "case");

aliasToken = std.experimental.lexer.TokenStructure!(IdType, extraFields)

The token type in the D lexer

aliasTriviaToken = std.experimental.lexer.TokenStructure!(IdType, extraFieldsBare)

Same as Token, but doesn't contain child TriviaTokens

enumWhitespaceBehavior : ubyte

Configure whitespace handling

include = 0b0000_0000

skip = 0b0000_0001

structStringBehavior

Configure string lexing behavior

Fields

StringBehavior compilerDo not include quote characters, process escape sequences

StringBehavior includeQuoteCharsOpening quotes, closing quotes, and string suffixes are included in the string token

StringBehavior notEscapedString escape sequences are not replaced

StringBehavior sourceNot modified at all. Useful for formatters or highlighters

ubyte behavior

enumCommentBehavior : bool

intern = true

noIntern = false

structLexerConfig

Lexer configuration struct

Fields

string fileName

StringBehavior stringBehavior

WhitespaceBehavior whitespaceBehavior

CommentBehavior commentBehavior

aliasBasicTypes = AliasSeq!(tok!"int", tok!"bool", tok!"byte", tok!"cdouble", tok!"cent", tok!"cfloat", tok!"char", tok!"creal", tok!"dchar", tok!"double", tok!"float", tok!"idouble", tok!"ifloat", tok!"ireal", tok!"long", tok!"real", tok!"short", tok!"ubyte", tok!"ucent", tok!"uint", tok!"ulong", tok!"ushort", tok!"void", tok!"wchar")

Basic type token types.

aliasNumberLiterals = AliasSeq!(tok!"doubleLiteral", tok!"floatLiteral", tok!"idoubleLiteral", tok!"ifloatLiteral", tok!"intLiteral", tok!"longLiteral", tok!"realLiteral", tok!"irealLiteral", tok!"uintLiteral", tok!"ulongLiteral")

Number literal token types.

aliasIntegerLiterals = AliasSeq!(tok!"intLiteral", tok!"longLiteral", tok!"uintLiteral", tok!"ulongLiteral")

Number literal token types.

aliasOperators = AliasSeq!(tok!",", tok!".", tok!"..", tok!"...", tok!"/", tok!"/=", tok!"!", tok!"!<", tok!"!<=", tok!"!<>", tok!"!<>=", tok!"!=", tok!"!>", tok!"!>=", tok!"$", tok!"%", tok!"%=", tok!"&", tok!"&&", tok!"&=", tok!"(", tok!")", tok!"*", tok!"*=", tok!"+", tok!"++", tok!"+=", tok!"-", tok!"--", tok!"-=", tok!":", tok!";", tok!"<", tok!"<<", tok!"<<=", tok!"<=", tok!"<>", tok!"<>=", tok!"=", tok!"==", tok!"=>", tok!">", tok!">=", tok!">>", tok!">>=", tok!">>>", tok!">>>=", tok!"?", tok!"@", tok!"[", tok!"]", tok!"^", tok!"^=", tok!"^^", tok!"^^=", tok!"{", tok!"|", tok!"|=", tok!"||", tok!"}", tok!"~", tok!"~=")

Operator token types.

aliasKeywords = AliasSeq!(tok!"abstract", tok!"alias", tok!"align", tok!"asm", tok!"assert", tok!"auto", tok!"break", tok!"case", tok!"cast", tok!"catch", tok!"class", tok!"const", tok!"continue", tok!"debug", tok!"default", tok!"delegate", tok!"delete", tok!"deprecated", tok!"do", tok!"else", tok!"enum", tok!"export", tok!"extern", tok!"false", tok!"final", tok!"finally", tok!"for", tok!"foreach", tok!"foreach_reverse", tok!"function", tok!"goto", tok!"if", tok!"immutable", tok!"import", tok!"in", tok!"inout", tok!"interface", tok!"invariant", tok!"is", tok!"lazy", tok!"macro", tok!"mixin", tok!"module", tok!"new", tok!"nothrow", tok!"null", tok!"out", tok!"override", tok!"package", tok!"pragma", tok!"private", tok!"protected", tok!"public", tok!"pure", tok!"ref", tok!"return", tok!"scope", tok!"shared", tok!"static", tok!"struct", tok!"super", tok!"switch", tok!"synchronized", tok!"template", tok!"this", tok!"throw", tok!"true", tok!"try", tok!"typedef", tok!"typeid", tok!"typeof", tok!"union", tok!"unittest", tok!"version", tok!"while", tok!"with", tok!"__DATE__", tok!"__EOF__", tok!"__FILE__", tok!"__FILE_FULL_PATH__", tok!"__FUNCTION__", tok!"__gshared", tok!"__LINE__", tok!"__MODULE__", tok!"__parameters", tok!"__PRETTY_FUNCTION__", tok!"__TIME__", tok!"__TIMESTAMP__", tok!"__traits", tok!"__vector", tok!"__VENDOR__", tok!"__VERSION__")

Keyword token types.

aliasStringLiterals = AliasSeq!(tok!"dstringLiteral", tok!"stringLiteral", tok!"wstringLiteral")

String literal token types

aliasProtections = AliasSeq!(tok!"export", tok!"package", tok!"private", tok!"public", tok!"protected")

Protection token types.

aliasSpecialTokens = AliasSeq!(tok!"__DATE__", tok!"__TIME__", tok!"__TIMESTAMP__", tok!"__VENDOR__", tok!"__VERSION__", tok!"__FILE__", tok!"__FILE_FULL_PATH__", tok!"__LINE__", tok!"__MODULE__", tok!"__FUNCTION__", tok!"__PRETTY_FUNCTION__")

aliasLiterals = AliasSeq!(StringLiterals, NumberLiterals, tok!"characterLiteral", SpecialTokens, tok!"true", tok!"false", tok!"null", tok!"$")

structDLexer

The D lexer struct.

Fields

tokenStart

Message[] _messages

StringCache * cache

LexerConfig config

bool haveSSE42

IstringState[] istringStack

Methods

void popFront()() pure nothrow @safe

private void _popFrontNoIstring() pure nothrow @safe

const(Message[]) messages() const @propertyReturns: An array of all of the warnings and errors generated so far during lexing. It may make sense to only check this when `empty` returns `true`.

bool isWhitespace()

void popFrontWhitespaceAware()

void lexWhitespace(ref Token token) @trusted

void lexNumber(ref Token token)

void lexHex(ref Token token)

void lexHex(ref  Token token,  size_t mark,  size_t line,  size_t column,
         size_t index)

@trusted

void lexBinary(ref Token token)

void lexBinary(ref  Token token,  size_t mark,  size_t line,  size_t column,
         size_t index)

@trusted

void lexDecimal(ref Token token)

void lexDecimal(ref  Token token,  size_t mark,  size_t line,  size_t column,
         size_t index)

@trusted

void lexIntSuffix(ref IdType type) pure nothrow @safe

void lexFloatSuffix(ref IdType type) pure nothrow @safe

void lexExponent(ref IdType type) pure nothrow @safe

void lexScriptLine(ref Token token)

void lexSpecialTokenSequence(ref Token token)

void lexSlashStarComment(ref Token token) @trusted

void lexSlashSlashComment(ref Token token) @trusted

void lexSlashPlusComment(ref Token token) @trusted

void lexStringLiteral(ref Token token) @trusted

void lexWysiwygString(ref Token token) @trusted

private ubyte lexStringSuffix() pure nothrow @safe

private ubyte lexStringSuffix(ref IdType type) pure nothrow @safe

void lexInterpolatedString(ref Token token)

void _popFrontIstringContent()

void _popFrontIstringPlain()

bool isAtIstringExpression()

void lexDelimitedString(ref Token token)

void lexNormalDelimitedString(ref  Token token,  size_t mark,  size_t line,  size_t column,
         size_t index,  ubyte  open,  ubyte  close)

void lexHeredocString(ref Token token, size_t mark, size_t line, size_t column, size_t index)

void lexTokenString(ref Token token)

void lexHexString(ref Token token)

bool lexNamedEntity()

bool lexEscapeSequence()

void lexCharacterLiteral(ref Token token)

void lexIdentifier(ref Token token, const bool silent = false) @trusted

void lexDot(ref Token token)

void lexLongNewline(ref Token token) @nogc

bool isNewline() @nogc

bool isSeparating(size_t offset) @nogc

void error(ref Token token, string message)

void error(string message)

void warning(string message)

Constructors

this()

this(R range,  const  LexerConfig config,  StringCache *  cache,
         bool  haveSSE42 =  sse42())

Params: range = the bytes that compose the source code that will be lexed. config = the lexer configuration to use. cache = the string interning cache for de-duplicating identifiers and other token...

Nested Templates

MessageLexer error/warning message.

IstringState

structStringCache

The string cache is used for string interning.

It will only store a single copy of any string that it is asked to hold. Interned strings can be compared for equality by comparing their .ptr field.

Default and postbilt constructors are disabled. When a StringCache goes out of scope, the memory held by it is freed.

See_also: http://en.wikipedia.org/wiki/String_interning

Fields

defaultBucketCountThe default bucket count for the string cache.

BLOCK_SIZE

BIG_STRING

Node *[] buckets

Block * rootBlock

Methods

string intern(const(ubyte)[] str) @safeCaches a string.

string intern(string str) @trustedditto

string _intern(const(ubyte)[] bytes) @trusted

Node * find(const(ubyte)[] bytes, uint hash) @trusted

uint hashBytes(const(ubyte)[] data) pure nothrow @trusted @nogc

ubyte[] allocate(size_t numBytes) pure nothrow @trusted @nogc

Constructors

this()

this(size_t bucketCount)Params: bucketCount = the initial number of buckets. Must be a power of two

Destructors

~this

Nested Templates

Node

Block

Functions 17

fnbool isBasicType(IdType type) nothrow pure @safe @nogcReturns: true if the given ID is for a basic type.

fnbool isNumberLiteral(IdType type) nothrow pure @safe @nogcReturns: true if the given ID type is for a number literal.

fnbool isIntegerLiteral(IdType type) nothrow pure @safe @nogcReturns: true if the given ID type is for a integer literal.

fnbool isOperator(IdType type) nothrow pure @safe @nogcReturns: true if the given ID type is for an operator.

fnbool isKeyword(IdType type) pure nothrow @safe @nogcReturns: true if the given ID type is for a keyword.

fnbool isStringLiteral(IdType type) pure nothrow @safe @nogcReturns: true if the given ID type is for a string literal.

fnbool isProtection(IdType type) pure nothrow @safe @nogcReturns: true if the given ID type is for a protection attribute.

fnbool isSpecialToken(IdType type) pure nothrow @safe @nogc

fnbool isLiteral(IdType type) pure nothrow @safe @nogc

Token[] getTokensForParser(R)(R sourceCode,  LexerConfig config,  StringCache *  cache) if (is(Unqual!(ElementEncodingType!R) :  ubyte)  &&  isDynamicArray!R)

Returns: an array of tokens lexed from the given source code to the output range. All whitespace, comment and specialTokenSequence tokens (trivia) are attached to the token nearest to them.

fnauto byToken(R)(R range) if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)Creates a token range from the given source code. Creates a default lexer configuration and a GC-managed string cache.

fnauto byToken(R)(R range, StringCache * cache) if (is(Unqual!(ElementEncodingType!R) : ubyte) && isDynamicArray!R)Creates a token range from the given source code. Uses the given string cache.

auto byToken(R)(R range,  const  LexerConfig config,  StringCache *  cache) if (is(Unqual!(ElementEncodingType!R) :  ubyte)  &&  isDynamicArray!R)

Creates a token range from the given source code. Uses the provided lexer configuration and string cache.

fnsize_t optimalBucketCount(size_t size)Helper function used to avoid too much allocations while lexing.

private fnvoid * calloc(size_t, size_t) nothrow pure @nogc @trusted

private fnvoid * malloc(size_t) nothrow pure @nogc @trusted

private fnvoid free(void *) nothrow pure @nogc @trusted

Variables 7

private varoperators = [ ",", ".", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=", "!>", "!>=", "$", "%", "%=", "&", "&&", "&=", "(", ")", "*", "*=", "+", "++", "+=", "-", "--", "-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=", "==", "=>", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "]", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "}", "~", "~=" ]

Operators

private varkeywords = [ "abstract", "alias", "align", "asm", "assert", "auto", "bool", "break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat", "char", "class", "const", "continue", "creal", "dchar", "debug", "default", "delegate", "delete", "deprecated", "do", "double", "else", "enum", "export", "extern", "false", "final", "finally", "float", "for", "foreach", "foreach_reverse", "function", "goto", "idouble", "if", "ifloat", "immutable", "import", "in", "inout", "int", "interface", "invariant", "ireal", "is", "lazy", "long", "macro", "mixin", "module", "new", "nothrow", "null", "out", "override", "package", "pragma", "private", "protected", "public", "pure", "real", "ref", "return", "scope", "shared", "short", "static", "struct", "super", "switch", "synchronized", "template", "this", "throw", "true", "try", "typedef", "typeid", "typeof", "ubyte", "ucent", "uint", "ulong", "union", "unittest", "ushort", "version", "void", "wchar", "while", "with", "__DATE__", "__EOF__", "__FILE__", "__FILE_FULL_PATH__", "__FUNCTION__", "__gshared", "__LINE__", "__MODULE__", "__parameters", "__PRETTY_FUNCTION__", "__TIME__", "__TIMESTAMP__", "__traits", "__vector", "__VENDOR__", "__VERSION__" ]

Kewords

private vardynamicTokens = [ "specialTokenSequence", "comment", "identifier", "scriptLine", "whitespace", "doubleLiteral", "floatLiteral", "idoubleLiteral", "ifloatLiteral", "intLiteral", "longLiteral", "realLiteral", "irealLiteral", "uintLiteral", "ulongLiteral", "characterLiteral", "dstringLiteral", "stringLiteral", "wstringLiteral", "istringLiteralStart", "istringLiteralText", "istringLiteralEnd" ]

Other tokens

private varpseudoTokenHandlers = [ "\"", "lexStringLiteral", "`", "lexWysiwygString", "//", "lexSlashSlashComment", "/*", "lexSlashStarComment", "/+", "lexSlashPlusComment", ".", "lexDot", "'", "lexCharacterLiteral", "0", "lexNumber", "1", "lexDecimal", "2", "lexDecimal", "3", "lexDecimal", "4", "lexDecimal", "5", "lexDecimal", "6", "lexDecimal", "7", "lexDecimal", "8", "lexDecimal", "9", "lexDecimal", "i\"", "lexInterpolatedString", "i`", "lexInterpolatedString", "iq{", "lexInterpolatedString", "q\"", "lexDelimitedString", "q{", "lexTokenString", "r\"", "lexWysiwygString", "x\"", "lexHexString", " ", "lexWhitespace", "\t", "lexWhitespace", "\r", "lexWhitespace", "\n", "lexWhitespace", "\v", "lexWhitespace", "\f", "lexWhitespace", "\u2028", "lexLongNewline", "\u2029", "lexLongNewline", "#!", "lexScriptLine", "#line", "lexSpecialTokenSequence" ]

private varextraFields = "import dparse.lexer:TokenTriviaFields,TriviaToken; mixin TokenTriviaFields;"

private varextraFieldsBare = q{ import dparse.lexer : Token; this(Token token) pure nothrow @safe @nogc { this(token.type, token.text, token.line, token.column, token.index); } int opCmp(size_t i) const pure nothrow @safe @nogc { if (index < i) return -1; if (index > i) return 1; return 0; } int opCmp(ref const typeof(this) other) const pure nothrow @safe @nogc { return opCmp(other.index); } string toString() const @safe pure { import std.array : appender; auto sink = appender!string; toString(sink); return sink.data; } void toString(R)(auto ref R sink) const { import std.conv : to; import dparse.lexer : str; sink.put(`trivia!"`); sink.put(str(type)); sink.put(`"(`); sink.put("text: "); sink.put([text].to!string[1 .. $ - 1]); // escape hack sink.put(", index: "); sink.put(index.to!string); sink.put(", line: "); sink.put(line.to!string); sink.put(", column: "); sink.put(column.to!string); sink.put(")"); } }

private varstringBehaviorNotWorking = "Automatic string parsing is not " ~ "supported and was previously not working. To unescape strings use the " ~ "`dparse.strings : unescapeString` function on the token texts instead."

Templates 1

tmpltok(string token)

Template used to refer to D token types.

See the operators, keywords, and dynamicTokens enums for values that can be passed to this template. Example:

import dparse.lexer;
IdType t = tok!"floatLiteral";