core.internal.utf

Encode and decode UTF-8, UTF-16 and UTF-32 strings.

For Win32 systems, the C wchar_t type is UTF-16 and corresponds to the D wchar type. For Posix systems, the C wchar_t type is UTF-32 and corresponds to the D utf.dchar type.

UTF character support is restricted to (\u0000 <= character <= \U0010FFFF).

See Also

Types 1

aliaswptr = const(wchar) *

Functions 31

fnvoid onUnicodeError( string msg, size_t idx, string file = __FILE__, size_t line = __LINE__ ) @safe pure;
fnbool isValidDchar(dchar c) @safe @nogc pure nothrowTest if c is a valid UTF-32 character.
fnuint stride(const scope char[] s, size_t i) @safe @nogc pure nothrowstride() returns the length of a UTF-8 sequence starting at index i in string s. Returns: The number of bytes in the UTF-8 sequence or 0xFF meaning s[i] is not the start of of UTF-8 sequence.
fnuint stride(const scope wchar[] s, size_t i) @safe @nogc pure nothrowstride() returns the length of a UTF-16 sequence starting at index i in string s.
fnuint stride(const scope dchar[] s, size_t i) @safe @nogc pure nothrowstride() returns the length of a UTF-32 sequence starting at index i in string s. Returns: The return value will always be 1.
fnsize_t toUCSindex(const scope char[] s, size_t i) @safe pureGiven an index i into an array of characters s[], and assuming that index i is at the start of a UTF character, determine the number of UCS characters up to that index i.
fnsize_t toUCSindex(const scope wchar[] s, size_t i) @safe pureditto
fnsize_t toUCSindex(const scope dchar[] s, size_t i) @safe @nogc pure nothrowditto
fnsize_t toUTFindex(const scope char[] s, size_t n) @safe pureGiven a UCS index n into an array of characters s[], return the UTF index.
fnsize_t toUTFindex(const scope wchar[] s, size_t n) @safe @nogc pure nothrowditto
fnsize_t toUTFindex(const scope dchar[] s, size_t n) @safe @nogc pure nothrowditto
fndchar decode(const scope char[] s, ref size_t idx) @safe pureDecodes and returns character starting at s[idx]. idx is advanced past the decoded character. If the character is not well formed, a UtfException is thrown and idx remains unchanged.
fndchar decode(const scope wchar[] s, ref size_t idx) @safe pureditto
fndchar decode(const scope dchar[] s, ref size_t idx) @safe pureditto
fnvoid encode(ref char[] s, dchar c) @safe pure nothrowEncodes character c and appends it to array s[].
fnvoid encode(ref wchar[] s, dchar c) @safe pure nothrowditto
fnvoid encode(ref dchar[] s, dchar c) @safe pure nothrowditto
fnubyte codeLength(C)(dchar c) @safe pure nothrow @nogcReturns the code length of c in the encoding using C as a code point. The code is returned in character count, not in bytes.
fnbool isValidString(S)(const scope S s) @safe pure nothrowChecks to see if string is well formed or not. S can be an array of char, wchar, or dchar. Returns false if it is not. Use to check all untrusted input for correctness.
fnchar[] toUTF8(return scope char[] buf, dchar c) @safe pure nothrow @nogc
fnstring toUTF8(return scope string s) @safe pure nothrowEncodes string s into UTF-8 and returns the encoded string.
fnstring toUTF8(const scope wchar[] s) @trusted pureditto
fnstring toUTF8(const scope dchar[] s) @trusted pureditto
fnwchar[] toUTF16(return scope wchar[] buf, dchar c) @safe pure nothrow @nogc
fnwstring toUTF16(const scope char[] s) @trusted pureEncodes string s into UTF-16 and returns the encoded string. toUTF16z() is suitable for calling the 'W' functions in the Win32 API that take an LPWSTR or LPCWSTR argument.
fnwptr toUTF16z(const scope char[] s) @safe pureditto
fnwstring toUTF16(return scope wstring s) @safe pure nothrowditto
fnwstring toUTF16(const scope dchar[] s) @trusted pure nothrowditto
fndstring toUTF32(const scope char[] s) @trusted pureEncodes string s into UTF-32 and returns the encoded string.
fndstring toUTF32(const scope wchar[] s) @trusted pureditto
fndstring toUTF32(return scope dstring s) @safe pure nothrowditto

Variables 1

var[ cast(ubyte) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0xFF, 0xFF, ] UTF8stride