Skip to content

Commit

Permalink
Use XML encoding when present while parsing the feed
Browse files Browse the repository at this point in the history
  • Loading branch information
msasikanth committed Feb 22, 2025
1 parent 2c8b715 commit 555b685
Showing 1 changed file with 24 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import io.ktor.http.set
import io.ktor.utils.io.ByteReadChannel
import io.ktor.utils.io.core.readBytes
import korlibs.io.lang.Charset
import korlibs.io.lang.Charsets
import kotlin.coroutines.CoroutineContext
import kotlin.coroutines.EmptyCoroutineContext
import kotlinx.coroutines.runBlocking
Expand Down Expand Up @@ -124,6 +125,7 @@ private fun ByteReadChannel.toCharIterator(

private val DEFAULT_BUFFER_SIZE = 1024L

private var encodingCharset: Charset? = null
private var currentIndex = 0
private var currentBuffer = String()

Expand All @@ -132,13 +134,34 @@ private fun ByteReadChannel.toCharIterator(
if (this@toCharIterator.isClosedForRead) return false

val packet = runBlocking(context) { this@toCharIterator.readRemaining(DEFAULT_BUFFER_SIZE) }
currentBuffer = buildString { charset.decode(this, packet.readBytes()) }
val bytes = packet.readBytes()
val encodingRegex = """<?xml.*encoding=["']([^"']+)["'].*?>""".toRegex()
if (encodingCharset == null) {
val encodingContent = buildString { Charsets.UTF8.decode(this, bytes) }
encodingCharset = findEncodingCharset(encodingRegex, encodingContent, charset)
}

currentBuffer = buildString { (encodingCharset ?: charset).decode(this, bytes) }

packet.release()
currentIndex = 0
return currentBuffer.isNotEmpty()
}

private fun findEncodingCharset(
encodingRegex: Regex,
encodingContent: String,
fallbackCharset: Charset,
) =
(encodingRegex.find(encodingContent)?.groupValues?.get(1)?.let { encoding ->
try {
Charset.forName(encoding)
} catch (e: Exception) {
null
}
}
?: fallbackCharset)

override fun nextChar(): Char {
if (!hasNext()) throw NoSuchElementException()
return currentBuffer[currentIndex++]
Expand Down

0 comments on commit 555b685

Please sign in to comment.