diff --git a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/FeedParser.kt b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/FeedParser.kt index c25057ec5..943fea497 100644 --- a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/FeedParser.kt +++ b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/FeedParser.kt @@ -23,6 +23,7 @@ import io.ktor.http.set import io.ktor.utils.io.ByteReadChannel import io.ktor.utils.io.core.readBytes import korlibs.io.lang.Charset +import korlibs.io.lang.Charsets import kotlin.coroutines.CoroutineContext import kotlin.coroutines.EmptyCoroutineContext import kotlinx.coroutines.runBlocking @@ -124,6 +125,7 @@ private fun ByteReadChannel.toCharIterator( private val DEFAULT_BUFFER_SIZE = 1024L + private var encodingCharset: Charset? = null private var currentIndex = 0 private var currentBuffer = String() @@ -132,13 +134,34 @@ private fun ByteReadChannel.toCharIterator( if (this@toCharIterator.isClosedForRead) return false val packet = runBlocking(context) { this@toCharIterator.readRemaining(DEFAULT_BUFFER_SIZE) } - currentBuffer = buildString { charset.decode(this, packet.readBytes()) } + val bytes = packet.readBytes() + val encodingRegex = """""".toRegex() + if (encodingCharset == null) { + val encodingContent = buildString { Charsets.UTF8.decode(this, bytes) } + encodingCharset = findEncodingCharset(encodingRegex, encodingContent, charset) + } + + currentBuffer = buildString { (encodingCharset ?: charset).decode(this, bytes) } packet.release() currentIndex = 0 return currentBuffer.isNotEmpty() } + private fun findEncodingCharset( + encodingRegex: Regex, + encodingContent: String, + fallbackCharset: Charset, + ) = + (encodingRegex.find(encodingContent)?.groupValues?.get(1)?.let { encoding -> + try { + Charset.forName(encoding) + } catch (e: Exception) { + null + } + } + ?: fallbackCharset) + override fun nextChar(): Char { if (!hasNext()) throw NoSuchElementException() return currentBuffer[currentIndex++]