Skip to content

Commit

Permalink
recognize null terminated strings
Browse files Browse the repository at this point in the history
  • Loading branch information
rec0de committed Dec 10, 2024
1 parent dfd7a65 commit bb09d0c
Showing 1 changed file with 15 additions and 4 deletions.
19 changes: 15 additions & 4 deletions src/commonMain/kotlin/decoders/BasicDecoders.kt
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ object Utf8Decoder : ByteWitchDecoder {
override fun decodesAsValid(data: ByteArray) = confidence(data) > 0.6

override fun confidence(data: ByteArray): Double {
val effectiveData = stripNullTerminator(data)

try {
val score = looksLikeUtf8String(data)
val score = looksLikeUtf8String(effectiveData)
//Logger.log(data.decodeToString())
//Logger.log(score)
return score
Expand All @@ -25,7 +27,7 @@ object Utf8Decoder : ByteWitchDecoder {
}

override fun decode(data: ByteArray, sourceOffset: Int, inlineDisplay: Boolean): ByteWitchResult {
return BWString(data.decodeToString(), Pair(sourceOffset, sourceOffset+data.size))
return BWString(stripNullTerminator(data).decodeToString(), Pair(sourceOffset, sourceOffset+data.size))
}

override fun tryhardDecode(data: ByteArray): ByteWitchResult? {
Expand All @@ -34,14 +36,23 @@ object Utf8Decoder : ByteWitchDecoder {
else
null
}

fun stripNullTerminator(data: ByteArray): ByteArray {
// to hell with it, we'll support arbitrarily long null terminators
var end = data.size
while (end > 0 && data[end-1] == 0.toByte()) {
end -= 1
}
return data.sliceArray(0 until end)
}
}

object Utf16Decoder : ByteWitchDecoder {
override val name = "utf16"

override fun confidence(data: ByteArray): Double {
try {
val string = data.decodeAsUTF16BE()
val string = Utf8Decoder.stripNullTerminator(data).decodeAsUTF16BE()
return looksLikeUtf8String(string.encodeToByteArray())
} catch (e: Exception) {
return 0.0
Expand All @@ -51,7 +62,7 @@ object Utf16Decoder : ByteWitchDecoder {
override fun decodesAsValid(data: ByteArray) = confidence(data) > 0.6

override fun decode(data: ByteArray, sourceOffset: Int, inlineDisplay: Boolean): ByteWitchResult {
return BWString(data.decodeAsUTF16BE(), Pair(sourceOffset, sourceOffset+data.size))
return BWString(Utf8Decoder.stripNullTerminator(data).decodeAsUTF16BE(), Pair(sourceOffset, sourceOffset+data.size))
}

override fun tryhardDecode(data: ByteArray): ByteWitchResult? {
Expand Down

0 comments on commit bb09d0c

Please sign in to comment.