-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsetup.py
86 lines (72 loc) · 3.29 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import json
# 假设你有多个数据,存储在一个列表中
with open('question.json','r',encoding='utf8') as file:
data_list=json.load(file)
# 将数据写入JSON Lines文件
with open('question.jsonl', 'w',encoding='utf8',) as file:
for data in data_list:
question_text = data["question"]
correct_answer = 0
if data['type']=='单选':
choices = [data["A"], data["B"], data["C"], data["D"]]
output_data = {
"id": data["id"],
'type':data['type'],
"inputs_pretokenized": f"{question_text}\nA. {choices[0]}\nB. {choices[1]}\nC. {choices[2]}\nD. {choices[3]}",
"choices_pretokenized": [" A", " B", " C", " D"],
"label": correct_answer,
"targets_pretokenized": [""]
}
json.dump(output_data, file,ensure_ascii=False)
file.write('\n')
elif data['type']=='多选':
choices = [data["A"], data["B"], data["C"], data["D"]]
output_data = {
"id": data["id"],
'type':data['type'],
"inputs_pretokenized": f"{question_text}\nA. {choices[0]}\nB. 不存在正确答案\nC. 不存在正确答案\nD. 不存在正确答案",
"choices_pretokenized": [" A", " B", " C", " D"],
"label": [0,1],
"targets_pretokenized": [""]
}
json.dump(output_data, file,ensure_ascii=False)
file.write('\n')
output_data = {
"id": data["id"],
'type':data['type'],
"inputs_pretokenized": f"{question_text}\nA. {choices[1]}\nB. 不存在正确答案\nC. 不存在正确答案\nD. 不存在正确答案",
"choices_pretokenized": [" A", " B", " C", " D"],
"label": [0,1],
"targets_pretokenized": [""]
}
json.dump(output_data, file,ensure_ascii=False)
file.write('\n')
output_data = {
"id": data["id"],
'type':data['type'],
"inputs_pretokenized": f"{question_text}\nA. {choices[2]}\nB. 不存在正确答案\nC. 不存在正确答案\nD. 不存在正确答案",
"choices_pretokenized": [" A", " B", " C", " D"],
"label": [0,1],
"targets_pretokenized": [""]
}
json.dump(output_data, file,ensure_ascii=False)
file.write('\n')
output_data = {
"id": data["id"],
'type':data['type'],
"inputs_pretokenized": f"{question_text}\nA. {choices[3]}\nB. 不存在正确答案\nC. 不存在正确答案\nD. 不存在正确答案",
"choices_pretokenized": [" A", " B", " C", " D"],
"label": [0,1],
"targets_pretokenized": [""]
}
json.dump(output_data, file,ensure_ascii=False)
file.write('\n')
else:
output_data = {
"id": data["id"],
'type':data['type'],
"inputs_pretokenized": f"{question_text}",
"targets_pretokenized": [""]
}
json.dump(output_data, file,ensure_ascii=False)
file.write('\n')