diff --git a/core/core.h b/core/core.h
index da0b8ce..276929a 100644
--- a/core/core.h
+++ b/core/core.h
@@ -36,7 +36,9 @@ typedef uint64_t uint64;
#ifndef NSF_NOTEXT
// define NSF_NOTEXT=1 to strip all unnecessary text from the build
- // this disables ini parsing and key looking, error messages and keys will become blank strings
+ // this disables ini parsing and key lookup
+ // error messages and keys will become blank strings
+ // shift-jis decoding will be disabled
// last_error_code can still be used to disambiguate errors
#define NSF_NOTEXT 0
#endif
diff --git a/core/core.vcxproj b/core/core.vcxproj
index 8580432..1c0bdee 100644
--- a/core/core.vcxproj
+++ b/core/core.vcxproj
@@ -188,12 +188,14 @@
Create
+
+
diff --git a/core/core.vcxproj.filters b/core/core.vcxproj.filters
index d78ed7a..6a5a54e 100644
--- a/core/core.vcxproj.filters
+++ b/core/core.vcxproj.filters
@@ -26,6 +26,9 @@
Source Files
+
+ Source Files
+
@@ -40,6 +43,9 @@
Header Files
+
+ Header Files
+
diff --git a/core/nsf.cpp b/core/nsf.cpp
index 61e7577..a9b47fc 100644
--- a/core/nsf.cpp
+++ b/core/nsf.cpp
@@ -167,13 +167,21 @@ inline static uint8 nsfe_nsf_shared_bit(const NSFCore* core, uint32 nsfe_fcc, ui
return false;
}
+// from shift_jis.cpp
+bool valid_utf8(const uint8* s); // true if s is value unicode
+void sjis_to_utf8(const uint8* sjis, uint16 unmapped, uint8* output, uint32 output_len);
+
inline const char* legacy_string(const NSFCore* core, const uint8* data)
{
- NSF_UNUSED(core);
- return reinterpret_cast(data);
- // TODO detect impossibility of shift-jis (ASCII only?) and return direct reinterpret
- //if (core->setting[SHIFT_JIS] == NSF_LK_ENABLE_AUTO_AUTO) // detect Shift-JIS, copy to temp_text
- //if (core->setting[SHIFT_JIS] == NSF_LK_ENABLE_ON) // force Shift-JIS, copy to temp_text
+ sint32 sjis = core->setting[NSF_SET_SHIFT_JIS];
+ if ((sjis == NSF_LK_ENCODING_AUTO && !valid_utf8(data)) || // automatic setting assumes Shift-JIS if not UTF-8
+ (sjis == NSF_LK_ENCODING_SJIS)) // force Shift-JIS
+ {
+ sjis_to_utf8(data,'.',reinterpret_cast(core->temp_text),NSFCore::TEMP_TEXT_SIZE);
+ return core->temp_text;
+ }
+ //else: sjis == NSF_LK_ENCODING_UTF8
+ return reinterpret_cast(data); // already UTF-8
}
// check NSF type, NSFx = NSF/NSF2/NSFe
diff --git a/core/shift_jis.cpp b/core/shift_jis.cpp
new file mode 100644
index 0000000..e424b05
--- /dev/null
+++ b/core/shift_jis.cpp
@@ -0,0 +1,121 @@
+// shift_jis.cpp
+// Detection and conversion of Shift-JIS (CP932) to UTF-8
+
+#include "core.h"
+
+#if !NSF_NOTEXT
+
+bool valid_utf8(const uint8* s) // true if s is valid UTF-8
+{
+ unsigned int multibyte = 0;
+ while (*s)
+ {
+ uint8 c = *s;
+ ++s;
+ if (multibyte)
+ {
+ if ((c & 0xC0) != 0x80) return false; // multibyte continuations always have 10xxxxxx
+ --multibyte;
+ }
+ else
+ {
+ if ((c & 0x80) != 0x00) // high bit marks start of multibyte
+ {
+ if ((c & 0xE0) == 0xC0) multibyte = 1; // 110xxxxx
+ else if ((c & 0xF0) == 0xE0) multibyte = 2; // 1110xxxx
+ else if ((c & 0xF8) == 0xF0) multibyte = 3; // 11110xxx
+ else return false;
+ }
+ }
+ }
+ return true;
+}
+
+#include "shift_jis_table.h"
+// CP932_00_00 - Single byte mappings 00-FF
+// CP932_81_9F - Double byte group 8100-9FFF
+// CP932_E0_EE - Double byte group E000-EEFF
+// CP932_FA_FC - Double byte group FA00-FCFF
+
+void sjis_to_utf8(const uint8* sjis, uint16 unmapped, uint8* output, uint32 output_len)
+{
+ uint32 db = 0;
+ uint32 pos = 0;
+ while (*sjis && pos < output_len)
+ {
+ uint16 unicode = 0;
+ uint8 c = *sjis;
+ ++sjis;
+ if (db == 0) // first byte
+ {
+ if ((c >= 0x81 && c <= 0x9F) ||
+ (c >= 0xE0 && c <= 0xFC))
+ {
+ db = c << 8; // begin double byte
+ }
+ else // single byte
+ {
+ unicode = CP932_00_00[c];
+ }
+ }
+ else // second byte of double-byte
+ {
+ db |= c;
+ unicode = 0xFFFF; // unmapped
+ if (db >= 0x8100 && db <= 0x9FFF) unicode = CP932_81_9F[db-0x8100];
+ else if (db >= 0xE000 && db <= 0xEEFF) unicode = CP932_E0_EE[db-0xE000];
+ else if (db >= 0xFA00 && db <= 0xFCFF) unicode = CP932_FA_FC[db-0xFA00];
+ db = 0;
+ }
+ // emit unicode
+ if (unicode)
+ {
+ if (unicode == 0xFFFF) unicode = unmapped;
+ if (unicode < 0x80)
+ {
+ output[pos] = uint8(unicode); ++pos;
+ }
+ else if (unicode < 0x800)
+ {
+ if ((pos+1) >= output_len) break;
+ output[pos+0] = 0xC0 | ((unicode >> 6) & 0x1F);
+ output[pos+1] = 0x80 | ((unicode >> 0) & 0x3F);
+ pos += 2;
+ }
+ else //if (unicode < 0x10000)
+ {
+ if ((pos+2) >= output_len) break;
+ output[pos+0] = 0xE0 | ((unicode >> 12) & 0x0F);
+ output[pos+1] = 0x80 | ((unicode >> 6) & 0x3F);
+ output[pos+2] = 0x80 | ((unicode >> 0) & 0x3F);
+ pos += 3;
+ }
+ // there are no 4-byte mappings in this set
+ }
+ }
+ output[pos] = 0;
+}
+
+#else // (NSF_NOTEXT)
+
+// NOTEXT won't convert, just assume UTF-8
+bool valid_utf8(const uint8* s)
+{
+ (void)s;
+ return true;
+}
+
+// NOTEXT don't convert, just copy
+void sjis_to_utf8(const uint8* sjis, uint16 unmapped, uint8* output, uint32 output_len)
+{
+ (void)unmapped;
+ for (uint32 i=0; i