forked from aurelienDelageInsaLyon/IAT-projet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_game.py
91 lines (78 loc) · 3.44 KB
/
run_game.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
from sys import argv
from time import time
from controller import AgentInterface
from controller.qagent import QAgent
from controller.random_agent import RandomAgent
from epsilon_profile import EpsilonProfile
from game.SpaceInvaders import SpaceInvaders
def test(game: SpaceInvaders, agent: AgentInterface, nepisodes: int, same=True, display=False):
sum_rewards = 0.
for _ in range(nepisodes):
state = game.reset() if same else game.reset()
terminal = False
state = game.reset()
while not terminal:
action = agent.select_greedy_action(state)
next_state, reward, terminal = game.step(action)
sum_rewards += reward
state = next_state
return sum_rewards
if __name__ == '__main__':
if len(argv) < 11:
n_episodes = int(argv[1])
max_steps = int(argv[2])
final_episode = int(argv[3])
gamma = float(argv[4])
alpha = float(argv[5])
eps_profile = EpsilonProfile(float(argv[6]), float(argv[7]))
sampling = int(argv[8])
fileName = str(argv[9])
else:
print(
'\n\nUsage: python3 run_game.py <n_episodes> <max_steps> <final_episode> <gamma> <alpha> <eps_begin> '
'<eps_end> <sampling> <fileName>\n')
exit(1)
print("############################ Current config ################################")
print(" sampling: ", sampling)
print(" n_episodes: ", n_episodes)
print(" max_steps: ", max_steps)
print(" gamma: ", gamma)
print(" alpha: ", alpha)
print(" eps_profile (initial, final, dec_episode, dec_step): ",
eps_profile.initial, eps_profile.final, eps_profile.dec_episode,
eps_profile.dec_step)
print("############################################################################")
game = SpaceInvaders(sampling, display=False)
controller = RandomAgent(game.na)
state = game.reset()
random_score = 0
is_done = False
for _ in range(final_episode):
while not is_done:
action = controller.select_action(state)
state, reward, is_done = game.step(action)
random_score += reward
print('Joueur random avant apprentissage - score moyen : {}'.format(random_score / final_episode))
agent = QAgent(game, eps_profile, gamma, alpha, sampling, fileName)
print("\n\n=> Beginning of the learning phase")
startTime = time()
agent.learn(game, n_episodes, max_steps)
endTime = time()
agent.saveQToFile(os.path.join(fileName))
print("\n######################## Learning phase complete #########################")
print(" sampling: ", sampling)
print(" n_episodes: ", n_episodes)
print(" max_steps: ", max_steps)
print(" gamma: ", gamma)
print(" alpha: ", alpha)
print(" eps_profile (initial, final, dec_episode, dec_step): ",
eps_profile.initial, eps_profile.final, eps_profile.dec_episode,
eps_profile.dec_step)
print("Durée de l'apprentissage : " + str(endTime - startTime))
print("############################################################################")
print("\n\n=> Beginning of test phase")
rewards = test(game, agent, final_episode, display=False)
print("################# Test phase complete ##################")
print("Score moyen après entrainement : {}".format(rewards / final_episode))
print("#########################################################")