diff --git a/core/core.h b/core/core.h index da0b8ce..276929a 100644 --- a/core/core.h +++ b/core/core.h @@ -36,7 +36,9 @@ typedef uint64_t uint64; #ifndef NSF_NOTEXT // define NSF_NOTEXT=1 to strip all unnecessary text from the build - // this disables ini parsing and key looking, error messages and keys will become blank strings + // this disables ini parsing and key lookup + // error messages and keys will become blank strings + // shift-jis decoding will be disabled // last_error_code can still be used to disambiguate errors #define NSF_NOTEXT 0 #endif diff --git a/core/core.vcxproj b/core/core.vcxproj index 8580432..1c0bdee 100644 --- a/core/core.vcxproj +++ b/core/core.vcxproj @@ -188,12 +188,14 @@ Create + + diff --git a/core/core.vcxproj.filters b/core/core.vcxproj.filters index d78ed7a..6a5a54e 100644 --- a/core/core.vcxproj.filters +++ b/core/core.vcxproj.filters @@ -26,6 +26,9 @@ Source Files + + Source Files + @@ -40,6 +43,9 @@ Header Files + + Header Files + diff --git a/core/nsf.cpp b/core/nsf.cpp index 61e7577..a9b47fc 100644 --- a/core/nsf.cpp +++ b/core/nsf.cpp @@ -167,13 +167,21 @@ inline static uint8 nsfe_nsf_shared_bit(const NSFCore* core, uint32 nsfe_fcc, ui return false; } +// from shift_jis.cpp +bool valid_utf8(const uint8* s); // true if s is value unicode +void sjis_to_utf8(const uint8* sjis, uint16 unmapped, uint8* output, uint32 output_len); + inline const char* legacy_string(const NSFCore* core, const uint8* data) { - NSF_UNUSED(core); - return reinterpret_cast(data); - // TODO detect impossibility of shift-jis (ASCII only?) and return direct reinterpret - //if (core->setting[SHIFT_JIS] == NSF_LK_ENABLE_AUTO_AUTO) // detect Shift-JIS, copy to temp_text - //if (core->setting[SHIFT_JIS] == NSF_LK_ENABLE_ON) // force Shift-JIS, copy to temp_text + sint32 sjis = core->setting[NSF_SET_SHIFT_JIS]; + if ((sjis == NSF_LK_ENCODING_AUTO && !valid_utf8(data)) || // automatic setting assumes Shift-JIS if not UTF-8 + (sjis == NSF_LK_ENCODING_SJIS)) // force Shift-JIS + { + sjis_to_utf8(data,'.',reinterpret_cast(core->temp_text),NSFCore::TEMP_TEXT_SIZE); + return core->temp_text; + } + //else: sjis == NSF_LK_ENCODING_UTF8 + return reinterpret_cast(data); // already UTF-8 } // check NSF type, NSFx = NSF/NSF2/NSFe diff --git a/core/shift_jis.cpp b/core/shift_jis.cpp new file mode 100644 index 0000000..e424b05 --- /dev/null +++ b/core/shift_jis.cpp @@ -0,0 +1,121 @@ +// shift_jis.cpp +// Detection and conversion of Shift-JIS (CP932) to UTF-8 + +#include "core.h" + +#if !NSF_NOTEXT + +bool valid_utf8(const uint8* s) // true if s is valid UTF-8 +{ + unsigned int multibyte = 0; + while (*s) + { + uint8 c = *s; + ++s; + if (multibyte) + { + if ((c & 0xC0) != 0x80) return false; // multibyte continuations always have 10xxxxxx + --multibyte; + } + else + { + if ((c & 0x80) != 0x00) // high bit marks start of multibyte + { + if ((c & 0xE0) == 0xC0) multibyte = 1; // 110xxxxx + else if ((c & 0xF0) == 0xE0) multibyte = 2; // 1110xxxx + else if ((c & 0xF8) == 0xF0) multibyte = 3; // 11110xxx + else return false; + } + } + } + return true; +} + +#include "shift_jis_table.h" +// CP932_00_00 - Single byte mappings 00-FF +// CP932_81_9F - Double byte group 8100-9FFF +// CP932_E0_EE - Double byte group E000-EEFF +// CP932_FA_FC - Double byte group FA00-FCFF + +void sjis_to_utf8(const uint8* sjis, uint16 unmapped, uint8* output, uint32 output_len) +{ + uint32 db = 0; + uint32 pos = 0; + while (*sjis && pos < output_len) + { + uint16 unicode = 0; + uint8 c = *sjis; + ++sjis; + if (db == 0) // first byte + { + if ((c >= 0x81 && c <= 0x9F) || + (c >= 0xE0 && c <= 0xFC)) + { + db = c << 8; // begin double byte + } + else // single byte + { + unicode = CP932_00_00[c]; + } + } + else // second byte of double-byte + { + db |= c; + unicode = 0xFFFF; // unmapped + if (db >= 0x8100 && db <= 0x9FFF) unicode = CP932_81_9F[db-0x8100]; + else if (db >= 0xE000 && db <= 0xEEFF) unicode = CP932_E0_EE[db-0xE000]; + else if (db >= 0xFA00 && db <= 0xFCFF) unicode = CP932_FA_FC[db-0xFA00]; + db = 0; + } + // emit unicode + if (unicode) + { + if (unicode == 0xFFFF) unicode = unmapped; + if (unicode < 0x80) + { + output[pos] = uint8(unicode); ++pos; + } + else if (unicode < 0x800) + { + if ((pos+1) >= output_len) break; + output[pos+0] = 0xC0 | ((unicode >> 6) & 0x1F); + output[pos+1] = 0x80 | ((unicode >> 0) & 0x3F); + pos += 2; + } + else //if (unicode < 0x10000) + { + if ((pos+2) >= output_len) break; + output[pos+0] = 0xE0 | ((unicode >> 12) & 0x0F); + output[pos+1] = 0x80 | ((unicode >> 6) & 0x3F); + output[pos+2] = 0x80 | ((unicode >> 0) & 0x3F); + pos += 3; + } + // there are no 4-byte mappings in this set + } + } + output[pos] = 0; +} + +#else // (NSF_NOTEXT) + +// NOTEXT won't convert, just assume UTF-8 +bool valid_utf8(const uint8* s) +{ + (void)s; + return true; +} + +// NOTEXT don't convert, just copy +void sjis_to_utf8(const uint8* sjis, uint16 unmapped, uint8* output, uint32 output_len) +{ + (void)unmapped; + for (uint32 i=0; i