-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01_preprocess_data.py
50 lines (37 loc) · 1.39 KB
/
01_preprocess_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# -*- coding: utf-8 -*-
"""01_preprocess_data.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/github/zhihanyang2022/super_mario_as_a_string/blob/master/01_preprocess_data.ipynb
"""
import os
import json
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
dir_1 = "data_raw/super_mario_bros"
dir_2 = "data_raw/super_mario_bros_2_japan"
annot_txts = [
os.path.join(dir_1, fn) for fn in os.listdir(dir_1) if fn.split('.')[-1] == 'txt'
] + [
os.path.join(dir_2, fn) for fn in os.listdir(dir_2) if fn.split('.')[-1] == 'txt'
]
annot_txts
def load_txt(txt):
with open(txt, 'r') as txt_f:
return txt_f.readlines()
with open('data_preprocessed/mario.txt', 'w+') as txt_f:
for i, fp in enumerate(annot_txts):
infile = load_txt(fp)
lines = []
for line in infile:
lines.append(list(line.rstrip()))
infile_transposed = np.array(lines).T
for line in infile_transposed: # each line represents a column
num_chars_to_add = 16 - len(lines)
txt_f.write("".join(['-'] * num_chars_to_add + list(line)))
txt_f.write("\n")
if i+1 == len(annot_txts): # seperate each level with the ")" character
txt_f.write(")")
else:
txt_f.write(")\n")