Skolson5903
01/19/2022, 12:42 AMephemient
01/19/2022, 12:54 AMephemient
01/19/2022, 1:33 AMfun UShortArray.utf16ToUtf8(): UByteArray {
var i = if (this.firstOrNull() == 0xFFEF.toUShort()) 1 else 0 // skip BOM
val bytes = UByteArray((this.size - i) * 3)
var j = 0
while (i < this.size) {
val codepoint = when (val unit = this[i++].toInt()) {
in Char.MIN_HIGH_SURROGATE.code..Char.MAX_HIGH_SURROGATE.code -> {
if (i !in this.indices) throw CharacterCodingException() // unpaired high surrogate
val next = this[i++].toInt()
if (next !in Char.MIN_LOW_SURROGATE.code..Char.MAX_LOW_SURROGATE.code) {
throw CharacterCodingException() // unpaired high surrogate
}
val code = unit and 0x3F shl 10 or (next and 0x3F)
if (code !in Char.MIN_SUPPLEMENTARY_CODE_POINT..Char.MAX_CODE_POINT) {
throw CharacterCodingException() // non-canonical encoding
}
code
}
in Char.MIN_LOW_SURROGATE.code..Char.MAX_LOW_SURROGATE.code -> {
throw CharacterCodingException() // unpaired low surrogate
}
else -> unit.toInt()
}
when (codepoint) {
in 0x00..0x7F -> bytes[j++] = codepoint.toUByte()
in 0x80..0x07FF -> {
bytes[j++] = 0xC0.or(codepoint and 0x07C0 shr 6).toUByte()
bytes[j++] = 0x80.or(codepoint and 0x003F).toUByte()
}
in 0x0800..0xFFFF -> {
bytes[j++] = 0xE0.or(codepoint and 0xF000 shr 12).toUByte()
bytes[j++] = 0x80.or(codepoint and 0x0FC0 shr 6).toUByte()
bytes[j++] = 0x80.or(codepoint and 0x003F).toUByte()
}
in 0x10000..Char.MAX_CODE_POINT -> {
bytes[j++] = 0xF0.or(codepoint and 0x3C0000 shr 18).toUByte()
bytes[j++] = 0x80.or(codepoint and 0x03F000 shr 12).toUByte()
bytes[j++] = 0x80.or(codepoint and 0x000FC0 shr 6).toUByte()
bytes[j++] = 0x80.or(codepoint and 0x00003F).toUByte()
}
else -> throw IllegalStateException()
}
}
return bytes.sliceArray(0 until j)
}napperley
01/19/2022, 2:45 AMtoKStringFromUtf8 function to convert a UTF-16 byte array to a Kotlin String.ephemient
01/19/2022, 2:56 AMtoKStringFromUtf16 is closer to what OP wantsnapperley
01/19/2022, 3:00 AMtoKStringFromUtf16 function looks like the one but doesn't exist anymore.ephemient
01/19/2022, 3:08 AMSkolson5903
01/19/2022, 5:18 PMSkolson5903
01/19/2022, 7:08 PMfunĀ CPointer<ShortVar>.toKStringFromUtf16():Ā String which would work if I can copy my COpaquePointer to a CPointer<ShortVar> and add a null terminator. That's would strain my wimpy native knowledge, but even if I figured that out it doesn't handle the big endian vs little endian issue (independent of Platform.isLittleEndian). I'm gonna try iterating the bytearray first and handle endianess myself, see how it goes.napperley
01/20/2022, 1:49 AMnapperley
01/20/2022, 1:50 AMSkolson5903
01/20/2022, 1:53 AMSkolson5903
01/20/2022, 1:55 AMnapperley
01/20/2022, 1:56 AMephemient
01/20/2022, 1:57 AMnapperley
01/20/2022, 1:57 AMSkolson5903
01/20/2022, 1:57 AMephemient
01/20/2022, 1:58 AMnapperley
01/20/2022, 1:59 AMSkolson5903
01/20/2022, 1:59 AMephemient
01/20/2022, 2:00 AMnapperley
01/20/2022, 2:00 AMephemient
01/20/2022, 2:01 AMephemient
01/20/2022, 2:02 AMephemient
01/20/2022, 2:04 AMephemient
01/20/2022, 2:06 AMephemient
01/20/2022, 4:03 AM