Skip to content

Commit

Permalink
shift-jis decoding
Browse files Browse the repository at this point in the history
  • Loading branch information
bbbradsmith committed Apr 27, 2024
1 parent 992ae4c commit ca12567
Show file tree
Hide file tree
Showing 6 changed files with 975 additions and 6 deletions.
4 changes: 3 additions & 1 deletion core/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ typedef uint64_t uint64;

#ifndef NSF_NOTEXT
// define NSF_NOTEXT=1 to strip all unnecessary text from the build
// this disables ini parsing and key looking, error messages and keys will become blank strings
// this disables ini parsing and key lookup
// error messages and keys will become blank strings
// shift-jis decoding will be disabled
// last_error_code can still be used to disambiguate errors
#define NSF_NOTEXT 0
#endif
Expand Down
2 changes: 2 additions & 0 deletions core/core.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -188,12 +188,14 @@
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
</ClCompile>
<ClCompile Include="nsf.cpp" />
<ClCompile Include="shift_jis.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\include\nsfplaycore.h" />
<ClInclude Include="..\include\nsfplayenums.h" />
<ClInclude Include="core.h" />
<ClInclude Include="enums_data.h" />
<ClInclude Include="shift_jis_table.h" />
</ItemGroup>
<ItemGroup>
<Text Include="..\enums\english.txt" />
Expand Down
6 changes: 6 additions & 0 deletions core/core.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
<ClCompile Include="nsf.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="shift_jis.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\include\nsfplaycore.h">
Expand All @@ -40,6 +43,9 @@
<ClInclude Include="enums_data.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="shift_jis_table.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Text Include="..\enums\english.txt">
Expand Down
18 changes: 13 additions & 5 deletions core/nsf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,21 @@ inline static uint8 nsfe_nsf_shared_bit(const NSFCore* core, uint32 nsfe_fcc, ui
return false;
}

// from shift_jis.cpp
bool valid_utf8(const uint8* s); // true if s is value unicode
void sjis_to_utf8(const uint8* sjis, uint16 unmapped, uint8* output, uint32 output_len);

inline const char* legacy_string(const NSFCore* core, const uint8* data)
{
NSF_UNUSED(core);
return reinterpret_cast<const char*>(data);
// TODO detect impossibility of shift-jis (ASCII only?) and return direct reinterpret
//if (core->setting[SHIFT_JIS] == NSF_LK_ENABLE_AUTO_AUTO) // detect Shift-JIS, copy to temp_text
//if (core->setting[SHIFT_JIS] == NSF_LK_ENABLE_ON) // force Shift-JIS, copy to temp_text
sint32 sjis = core->setting[NSF_SET_SHIFT_JIS];
if ((sjis == NSF_LK_ENCODING_AUTO && !valid_utf8(data)) || // automatic setting assumes Shift-JIS if not UTF-8
(sjis == NSF_LK_ENCODING_SJIS)) // force Shift-JIS
{
sjis_to_utf8(data,'.',reinterpret_cast<uint8*>(core->temp_text),NSFCore::TEMP_TEXT_SIZE);
return core->temp_text;
}
//else: sjis == NSF_LK_ENCODING_UTF8
return reinterpret_cast<const char*>(data); // already UTF-8
}

// check NSF type, NSFx = NSF/NSF2/NSFe
Expand Down
121 changes: 121 additions & 0 deletions core/shift_jis.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// shift_jis.cpp
// Detection and conversion of Shift-JIS (CP932) to UTF-8

#include "core.h"

#if !NSF_NOTEXT

bool valid_utf8(const uint8* s) // true if s is valid UTF-8
{
unsigned int multibyte = 0;
while (*s)
{
uint8 c = *s;
++s;
if (multibyte)
{
if ((c & 0xC0) != 0x80) return false; // multibyte continuations always have 10xxxxxx
--multibyte;
}
else
{
if ((c & 0x80) != 0x00) // high bit marks start of multibyte
{
if ((c & 0xE0) == 0xC0) multibyte = 1; // 110xxxxx
else if ((c & 0xF0) == 0xE0) multibyte = 2; // 1110xxxx
else if ((c & 0xF8) == 0xF0) multibyte = 3; // 11110xxx
else return false;
}
}
}
return true;
}

#include "shift_jis_table.h"
// CP932_00_00 - Single byte mappings 00-FF
// CP932_81_9F - Double byte group 8100-9FFF
// CP932_E0_EE - Double byte group E000-EEFF
// CP932_FA_FC - Double byte group FA00-FCFF

void sjis_to_utf8(const uint8* sjis, uint16 unmapped, uint8* output, uint32 output_len)
{
uint32 db = 0;
uint32 pos = 0;
while (*sjis && pos < output_len)
{
uint16 unicode = 0;
uint8 c = *sjis;
++sjis;
if (db == 0) // first byte
{
if ((c >= 0x81 && c <= 0x9F) ||
(c >= 0xE0 && c <= 0xFC))
{
db = c << 8; // begin double byte
}
else // single byte
{
unicode = CP932_00_00[c];
}
}
else // second byte of double-byte
{
db |= c;
unicode = 0xFFFF; // unmapped
if (db >= 0x8100 && db <= 0x9FFF) unicode = CP932_81_9F[db-0x8100];
else if (db >= 0xE000 && db <= 0xEEFF) unicode = CP932_E0_EE[db-0xE000];
else if (db >= 0xFA00 && db <= 0xFCFF) unicode = CP932_FA_FC[db-0xFA00];
db = 0;
}
// emit unicode
if (unicode)
{
if (unicode == 0xFFFF) unicode = unmapped;
if (unicode < 0x80)
{
output[pos] = uint8(unicode); ++pos;
}
else if (unicode < 0x800)
{
if ((pos+1) >= output_len) break;
output[pos+0] = 0xC0 | ((unicode >> 6) & 0x1F);
output[pos+1] = 0x80 | ((unicode >> 0) & 0x3F);
pos += 2;
}
else //if (unicode < 0x10000)
{
if ((pos+2) >= output_len) break;
output[pos+0] = 0xE0 | ((unicode >> 12) & 0x0F);
output[pos+1] = 0x80 | ((unicode >> 6) & 0x3F);
output[pos+2] = 0x80 | ((unicode >> 0) & 0x3F);
pos += 3;
}
// there are no 4-byte mappings in this set
}
}
output[pos] = 0;
}

#else // (NSF_NOTEXT)

// NOTEXT won't convert, just assume UTF-8
bool valid_utf8(const uint8* s)
{
(void)s;
return true;
}

// NOTEXT don't convert, just copy
void sjis_to_utf8(const uint8* sjis, uint16 unmapped, uint8* output, uint32 output_len)
{
(void)unmapped;
for (uint32 i=0; i<output_len; ++i)
{
output[i] = *sjis;
if (*sjis == 0) break;
++sjis;
}
output[output_len-1] = 0;
}

#endif
Loading

0 comments on commit ca12567

Please sign in to comment.