From 532230254b244fb7a0c2ce5abd95381dadd3e406 Mon Sep 17 00:00:00 2001 From: Kindi Date: Sun, 27 Aug 2023 16:48:00 +0800 Subject: [PATCH] add documentation --- components/lua/utf8.cpp | 3 +- files/lua_api/utf8.doclua | 77 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 files/lua_api/utf8.doclua diff --git a/components/lua/utf8.cpp b/components/lua/utf8.cpp index 6a80505411..69160fdd53 100644 --- a/components/lua/utf8.cpp +++ b/components/lua/utf8.cpp @@ -108,7 +108,8 @@ namespace LuaUtf8 return result; }; - utf8["codes"] = [pos_byte = std::vector{ 1 }](const std::string_view& s) { + utf8["codes"] = [](const std::string_view& s) { + std::vector pos_byte{ 1 }; return sol::as_function([s, pos_byte]() mutable -> sol::optional> { if (pos_byte.back() <= static_cast(s.size())) { diff --git a/files/lua_api/utf8.doclua b/files/lua_api/utf8.doclua new file mode 100644 index 0000000000..6c054f4401 --- /dev/null +++ b/files/lua_api/utf8.doclua @@ -0,0 +1,77 @@ +------------------------------------------------------------------------------- +-- UTF-8 Support. +-- This library provides basic support for UTF-8 encoding. +-- It provides all its functions inside the table utf8. +-- This library does not provide any support for Unicode other than the handling of the encoding. +-- Any operation that needs the meaning of a character, such as character classification, is outside its scope. +-- +-- Unless stated otherwise, all functions that expect a byte position as a parameter assume that +-- the given position is either the start of a byte sequence or one plus the length of the subject string. +-- As in the string library, negative indices count from the end of the string. +-- @module utf8 + +------------------------------------------------------------------------------- +-- Receives zero or more integers, converts each one to its +-- corresponding UTF-8 byte sequence, and returns a string with the concatenation +-- of all these sequences. +-- @function [parent=#utf8] char +-- @param ... zero or more integers. +-- @return #string + +------------------------------------------------------------------------------- +-- The pattern which matches exactly one UTF-8 byte sequence, assuming that +-- the subject is a valid UTF-8 string. +-- @function [parent=#utf8] charpattern +-- @return #string + +------------------------------------------------------------------------------- +-- Returns values so that the construction +-- +-- for p, c in utf8.codes(s) do body end +-- +-- will iterate over all characters in string s, with p being the position (in bytes) +-- and c the code point of each character. +-- It raises an error if it meets any invalid byte sequence. +-- @function [parent=#utf8] codes +-- @param #string s string to handle. + +------------------------------------------------------------------------------- +-- Returns the codepoints (as integers) from all characters in s that start +-- between byte position i and j (both included). The default for i is 1 and for j is i. +-- It raises an error if it meets any invalid byte sequence. +-- @function [parent=#utf8] codepoint +-- @param #string s string to handle +-- @param #number i the initial position (default value is 1) +-- @param #number j the final position (default value is i) +-- @return #number the codepoints of each character in s + +------------------------------------------------------------------------------- +-- Returns the number of UTF-8 characters in string s that start +-- between positions i and j (both inclusive). +-- The default for i is 1 and for j is -1. +-- If it finds any invalid byte sequence, +-- returns a false value plus the position of the first invalid byte. +-- @function [parent=#utf8] len +-- @param #string s string to handle +-- @param #number i the initial position (default value is 1) +-- @param #number j the final position (default value is -1) +-- @return #number the number of utf8 characters in s + +------------------------------------------------------------------------------- +-- Returns the position (in bytes) where the encoding of the n-th character of s +-- (counting from position i) starts. A negative n gets characters before position i. +-- The default for i is 1 when n is non-negative and #s + 1 otherwise, +-- so that utf8.offset(s, -n) gets the offset of the n-th character from the end of the string. +-- If the specified character is neither in the subject nor right after its end, the function returns nil. +-- +-- As a special case, when n is 0 the function returns the +-- start of the encoding of the character that contains the i-th byte of s. +-- +-- This function assumes that s is a valid UTF-8 string. +-- @function [parent=#utf8] offset +-- @param #string s string to handle +-- @param #number n the n-th character +-- @param #number i the initial position (default value is 1 if n is is non-negative and #s + 1 otherwise) +-- @return #number + +return nil