Skolson5903
01/19/2022, 12:42 AMephemient
01/19/2022, 12:54 AMephemient
01/19/2022, 1:33 AMfun UShortArray.utf16ToUtf8(): UByteArray {
var i = if (this.firstOrNull() == 0xFFEF.toUShort()) 1 else 0 // skip BOM
val bytes = UByteArray((this.size - i) * 3)
var j = 0
while (i < this.size) {
val codepoint = when (val unit = this[i++].toInt()) {
in Char.MIN_HIGH_SURROGATE.code..Char.MAX_HIGH_SURROGATE.code -> {
if (i !in this.indices) throw CharacterCodingException() // unpaired high surrogate
val next = this[i++].toInt()
if (next !in Char.MIN_LOW_SURROGATE.code..Char.MAX_LOW_SURROGATE.code) {
throw CharacterCodingException() // unpaired high surrogate
}
val code = unit and 0x3F shl 10 or (next and 0x3F)
if (code !in Char.MIN_SUPPLEMENTARY_CODE_POINT..Char.MAX_CODE_POINT) {
throw CharacterCodingException() // non-canonical encoding
}
code
}
in Char.MIN_LOW_SURROGATE.code..Char.MAX_LOW_SURROGATE.code -> {
throw CharacterCodingException() // unpaired low surrogate
}
else -> unit.toInt()
}
when (codepoint) {
in 0x00..0x7F -> bytes[j++] = codepoint.toUByte()
in 0x80..0x07FF -> {
bytes[j++] = 0xC0.or(codepoint and 0x07C0 shr 6).toUByte()
bytes[j++] = 0x80.or(codepoint and 0x003F).toUByte()
}
in 0x0800..0xFFFF -> {
bytes[j++] = 0xE0.or(codepoint and 0xF000 shr 12).toUByte()
bytes[j++] = 0x80.or(codepoint and 0x0FC0 shr 6).toUByte()
bytes[j++] = 0x80.or(codepoint and 0x003F).toUByte()
}
in 0x10000..Char.MAX_CODE_POINT -> {
bytes[j++] = 0xF0.or(codepoint and 0x3C0000 shr 18).toUByte()
bytes[j++] = 0x80.or(codepoint and 0x03F000 shr 12).toUByte()
bytes[j++] = 0x80.or(codepoint and 0x000FC0 shr 6).toUByte()
bytes[j++] = 0x80.or(codepoint and 0x00003F).toUByte()
}
else -> throw IllegalStateException()
}
}
return bytes.sliceArray(0 until j)
}
napperley
01/19/2022, 2:45 AMtoKStringFromUtf8
function to convert a UTF-16 byte array to a Kotlin String.ephemient
01/19/2022, 2:56 AMtoKStringFromUtf16
is closer to what OP wantsnapperley
01/19/2022, 3:00 AMtoKStringFromUtf16
function looks like the one but doesn't exist anymore.ephemient
01/19/2022, 3:08 AMSkolson5903
01/19/2022, 5:18 PMSkolson5903
01/19/2022, 7:08 PMfun CPointer<ShortVar>.toKStringFromUtf16(): String
which would work if I can copy my COpaquePointer to a CPointer<ShortVar> and add a null terminator. That's would strain my wimpy native knowledge, but even if I figured that out it doesn't handle the big endian vs little endian issue (independent of Platform.isLittleEndian)
. I'm gonna try iterating the bytearray first and handle endianess myself, see how it goes.napperley
01/20/2022, 1:49 AMnapperley
01/20/2022, 1:50 AMSkolson5903
01/20/2022, 1:53 AMSkolson5903
01/20/2022, 1:55 AMnapperley
01/20/2022, 1:56 AMephemient
01/20/2022, 1:57 AMnapperley
01/20/2022, 1:57 AMSkolson5903
01/20/2022, 1:57 AMephemient
01/20/2022, 1:58 AMnapperley
01/20/2022, 1:59 AMSkolson5903
01/20/2022, 1:59 AMephemient
01/20/2022, 2:00 AMnapperley
01/20/2022, 2:00 AMephemient
01/20/2022, 2:01 AMephemient
01/20/2022, 2:02 AMephemient
01/20/2022, 2:04 AMephemient
01/20/2022, 2:06 AMephemient
01/20/2022, 4:03 AM