mirror of
				https://github.com/OpenMW/openmw.git
				synced 2025-10-25 23:26:37 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			77 lines
		
	
	
	
		
			3.8 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
			
		
		
	
	
			77 lines
		
	
	
	
		
			3.8 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
| -------------------------------------------------------------------------------
 | |
| -- UTF-8 Support. 
 | |
| -- This library provides basic support for UTF-8 encoding.
 | |
| -- It provides all its functions inside the table utf8.
 | |
| -- This library does not provide any support for Unicode other than the handling of the encoding.
 | |
| -- Any operation that needs the meaning of a character, such as character classification, is outside its scope.
 | |
| --
 | |
| -- Unless stated otherwise, all functions that expect a byte position as a parameter assume that 
 | |
| -- the given position is either the start of a byte sequence or one plus the length of the subject string.
 | |
| -- As in the string library, negative indices count from the end of the string.
 | |
| -- @module utf8
 | |
| 
 | |
| -------------------------------------------------------------------------------
 | |
| -- Receives zero or more integers, converts each one to its
 | |
| -- corresponding UTF-8 byte sequence, and returns a string with the concatenation
 | |
| -- of all these sequences.
 | |
| -- @function [parent=#utf8] char
 | |
| -- @param ... zero or more integers.
 | |
| -- @return #string
 | |
| 
 | |
| -------------------------------------------------------------------------------
 | |
| -- The pattern which matches exactly one UTF-8 byte sequence, assuming that
 | |
| -- the subject is a valid UTF-8 string.
 | |
| -- @function [parent=#utf8] charpattern
 | |
| -- @return #string
 | |
| 
 | |
| -------------------------------------------------------------------------------
 | |
| -- Returns values so that the construction
 | |
| --
 | |
| --     for p, c in utf8.codes(s) do body end
 | |
| --
 | |
| -- will iterate over all characters in string s, with p being the position (in bytes)
 | |
| -- and c the code point of each character.
 | |
| -- It raises an error if it meets any invalid byte sequence.
 | |
| -- @function [parent=#utf8] codes
 | |
| -- @param #string s string to handle.
 | |
| 
 | |
| -------------------------------------------------------------------------------
 | |
| -- Returns the codepoints (as integers) from all characters in s that start
 | |
| -- between byte position i and j (both included). The default for i is 1 and for j is i.
 | |
| -- It raises an error if it meets any invalid byte sequence.
 | |
| -- @function [parent=#utf8] codepoint
 | |
| -- @param #string s string to handle
 | |
| -- @param #number i the initial position (default value is 1)
 | |
| -- @param #number j the final position (default value is i)
 | |
| -- @return #number the codepoints of each character in s
 | |
| 
 | |
| -------------------------------------------------------------------------------
 | |
| -- Returns the number of UTF-8 characters in string s that start
 | |
| -- between positions i and j (both inclusive).
 | |
| -- The default for i is 1 and for j is -1.
 | |
| -- If it finds any invalid byte sequence,
 | |
| -- returns a false value plus the position of the first invalid byte.
 | |
| -- @function [parent=#utf8] len
 | |
| -- @param #string s string to handle
 | |
| -- @param #number i the initial position (default value is 1)
 | |
| -- @param #number j the final position (default value is -1)
 | |
| -- @return #number the number of utf8 characters in s
 | |
| 
 | |
| -------------------------------------------------------------------------------
 | |
| -- Returns the position (in bytes) where the encoding of the n-th character of s
 | |
| -- (counting from position i) starts. A negative n gets characters before position i.
 | |
| -- The default for i is 1 when n is non-negative and #s + 1 otherwise,
 | |
| -- so that utf8.offset(s, -n) gets the offset of the n-th character from the end of the string.
 | |
| -- If the specified character is neither in the subject nor right after its end, the function returns nil.
 | |
| --
 | |
| -- As a special case, when n is 0 the function returns the
 | |
| -- start of the encoding of the character that contains the i-th byte of s.
 | |
| --
 | |
| -- This function assumes that s is a valid UTF-8 string.
 | |
| -- @function [parent=#utf8] offset
 | |
| -- @param #string s string to handle
 | |
| -- @param #number n the n-th character
 | |
| -- @param #number i the initial position (default value is 1 if n is is non-negative and #s + 1 otherwise)
 | |
| -- @return #number
 | |
| 
 | |
| return nil
 |