-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathQuadcopter.py
274 lines (210 loc) · 11.3 KB
/
Quadcopter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
import gym
from gym.spaces import Box
import numpy as np
import random
from numpy import tanh, pi
from ray.rllib.env.env_context import EnvContext
from MathUtils import calc_rot_matrix, calc_rod_inertia, euler_rot_matrix, random_by_interval, random_by_percentage
class Quadcopter(gym.Env):
def __init__(self, env_config: EnvContext):
# current environmental configuration, can change due to curriculum learning
self.env_config = env_config
# time step [s]
self.dt = self.env_config["dt"]
# gravitational acceleration in world coordinate system [m/s^2]
self.grav_acc = self.env_config["grav_acc"]
# standard mass for randomization [kg]
self.standard_mass = self.env_config["standard_mass"]
# standard arm length for randomization [m]
self.standard_arm_length = self.env_config["standard_arm_length"]
# standard settling time for randomization [s]
self.standard_settling_time = self.env_config["standard_settling_time"]
# standard thrust to weight factor for randomization [kg/N]
self.standard_thrust_to_weight = self.env_config["standard_thrust_to_weight"]
# standard thrust to torque factor for randomization [s^(-2)]
self.standard_thrust_to_torque = self.env_config["standard_thrust_to_torque"]
# end/goal position of agent in world coordinate system [m]
self.goal_pos = self.env_config["goal_pos"]
# end/goal linear velocity of agent in world coordinate system [m/s]
self.goal_lin_vel = self.env_config["goal_lin_vel"]
# end/goal rotation matrix from body to world coordinate system
self.goal_ang = self.env_config["goal_ang"]
# end/goal angular velocity of agent in the body coordinate system [rad/s]
self.goal_ang_vel = self.env_config["goal_ang_vel"]
# non negative scalar weights for reward/cost function
self.weights = self.env_config["weights"]
# level of complexity for curriculum training
self.level = None
# mass of agent [kg]
self.mass = None
# distance from quadcopter center to propeller center[m]
self.arm_length = None
# matrix of inertia [kg*m^2]
self.inertia = None
# motor 2% settling time [s]
self.settling_time = None
# thrust to weight ratio [kg/N]
self.thrust_to_weight = None
# torque to thrust coefficient [s^(-2)]
self.thrust_to_torque = None
# current position of the agent [m]
self.cur_pos = None
# current linear velocity of the agent in the world coordinate system [m/s]
self.cur_lin_vel = None
# current rotation matrix from body to world coordinate system
self.cur_ang = None
# current angular velocity of the agent in the body coordinate system [rad/s]
self.cur_ang_vel = None
# max thrust of single motor [N]
self.max_thrust = None
# current simulated motor lag []
self.cur_motor_lag = None
# space of possible actions
self.action_space = Box(low=-float('inf'), high=float('inf'), shape=(4,))
# space of possible observations
self.observation_space = Box(low=-float('inf'), high=float('inf'), shape=(18,))
# range of possible reward
self.reward_range = (-float('inf'), float('inf'))
# set the seed, is only used for the final (reach goal) reward.
self.seed(1)
#
self.reset()
def calc_state(self):
"""
Calculate the state of the quadcopter.
The state consists of the positional error, linear velocity error, rotation matrix and
the angular velocity error.
:return: Current state.
"""
# positional error of agent in world coordinate system [m]
pos_error = self.cur_pos - self.goal_pos
# linear velocity error of agent in world coordinate system [m/s]
lin_vel_error = self.cur_lin_vel - self.goal_lin_vel
# calculate rotation matrix by converting angles to rotation matrix []
rot_matrix = calc_rot_matrix(self.cur_ang[0], self.cur_ang[1], self.cur_ang[2])
# angular velocity error of agent in body coordinate system [rad/s]
ang_vel_error = self.cur_ang_vel - self.goal_ang_vel
return np.concatenate([pos_error, lin_vel_error, rot_matrix[0], rot_matrix[1], rot_matrix[2], ang_vel_error])
def calc_lin_acc(self, thrust):
"""
Calculate the linear acceleration vector by x_'' = g_ + RF_ / m
with g_ (gravitational acceleration vector in world coordinate system),
R (rotation matrix from body to world), F_ (total thrust force in body coordinate system) and m (mass).
:param thrust: Array of thrust of every single rotor.
:return: Linear acceleration vector in world coordinate system.
"""
# total thrust caused by all 4 motors in body coordinate system [N]
total_thrust = np.array([0, 0, np.sum(thrust)])
return self.grav_acc + np.dot(calc_rot_matrix(self.cur_ang[0], self.cur_ang[1], self.cur_ang[2]),
total_thrust) / self.mass
def calc_ang_acc(self, thrust):
"""
Calculate angular acceleration vector by w_' = I^(-1) * (tau_ - w_ x (I * w_)) with I (inertia tensor),
w_ (angular velocity in body coordinate system), tau (total torque in body coordinate system).
:param thrust: Array of thrust of every single rotor.
:return: Angular acceleration vector in body coordinate system.
"""
# distance between axis of rotation and force of thrust
dist = np.sqrt(2) / 2 * self.arm_length
# thruster torque caused by thruster forces in body coordinate system [Nm]
thruster_torque = np.array([np.dot(np.array([-1, -1, 1, 1]), thrust) * dist,
np.dot(np.array([-1, 1, 1, -1]), thrust) * dist,
0])
# thruster created around the z-axis caused by rotors different rotations in body coordinate system [Nm]
rotation_torque = np.array([0, 0, self.thrust_to_torque * np.dot(np.array([-1, 1, -1, 1]), thrust)])
# add up torques
total_torque = thruster_torque + rotation_torque
return np.dot(np.linalg.inv(self.inertia),
(total_torque - np.cross(self.cur_ang_vel, np.dot(self.inertia, self.cur_ang_vel))))
def calc_inertia(self):
"""
Calculate inertia matrix of quadcopter. The quadcopter is modeled as two rods, crossing at an angle of pi/2.
:return: Inertia matrix of quadcopter.
"""
xx = 2 * calc_rod_inertia(self.mass, 2 * self.arm_length, pi/4)
yy = 2 * calc_rod_inertia(self.mass, 2 * self.arm_length, pi/4)
zz = 2 * calc_rod_inertia(self.mass, 2 * self.arm_length, pi/2)
return np.array([xx, yy, zz]) * np.eye(3)
def calc_motor_lag(self, action):
"""
Calculates a simulated motor lag.
:param action: Normalized action for each motor.
:return: Simulated motor lag.
"""
# normalized motor thrust input
norm_thrust = 0.5 * (action + 1)
# normalized angular velocity
norm_ang_vel = np.sqrt(norm_thrust)
# motor lag simulated with discrete-time first-order low-pass filter
return self.cur_motor_lag + 4 * self.dt / self.settling_time * (norm_ang_vel - self.cur_motor_lag)
def step(self, action):
# squash action to (-1, 1)
action = tanh(action)
self.cur_motor_lag = self.calc_motor_lag(action)
# final motor thrust [N]
thrust = self.max_thrust * np.square(self.cur_motor_lag)
# calculate current linear acceleration [m/s^2]
lin_acc = self.calc_lin_acc(thrust)
# calculate next linear velocity by using first order explicit euler method for integration [m/s]
self.cur_lin_vel = self.cur_lin_vel + lin_acc * self.dt
# calculate next position by using first order explicit euler method for integration [m]
self.cur_pos = self.cur_pos + self.cur_lin_vel * self.dt
# calculate current angular acceleration [rad/s^2]
ang_acc = self.calc_ang_acc(thrust)
# calculate next angular velocity by using first order explicit euler method for integration [rad/s]
self.cur_ang_vel = self.cur_ang_vel + ang_acc * self.dt
# compute rate of euler angles (angles_' = E^(-1)(angles_) * w_)
euler_ang_rates = np.dot(np.linalg.inv(euler_rot_matrix(self.cur_ang[0], self.cur_ang[1])), self.cur_ang_vel)
# compute new angles by make an euler integration step [rad]
self.cur_ang = self.cur_ang + euler_ang_rates * self.dt
# compute state
state = self.calc_state()
rot_matrix = np.reshape(state[6:15], (3, 3))
# rotation angle along the axis of rotation of identity matrix to current rotation matrix [rad]
rot_ang = np.arccos((np.trace(rot_matrix) - 1) / 2)
# cost function depending on state and action
cost = (self.weights[0] * np.linalg.norm([state[0], state[1], state[2]]) +
self.weights[1] * np.linalg.norm([state[3], state[4], state[5]]) +
self.weights[2] * rot_ang +
self.weights[3] * np.linalg.norm([state[15], state[16], state[17]]) +
self.weights[4] * np.linalg.norm(action)
) * self.dt
# cost is negative reward
reward = -cost
done = False
return state, reward, done, {}
def reset(self):
# level of complexity for curriculum training
self.level = self.env_config["level"]
# mass of agent [kg]
self.mass = random_by_percentage(self.standard_mass, self.env_config["percentage"])
# distance from quadcopter center to propeller center[m]
self.arm_length = random_by_percentage(self.standard_arm_length, self.env_config["percentage"])
#
self.inertia = self.calc_inertia()
# motor 2% settling time [s]
self.settling_time = random_by_percentage(self.standard_settling_time, self.env_config["percentage"])
# thrust to weight ratio []
self.thrust_to_weight = random_by_percentage(self.standard_thrust_to_weight, self.env_config["percentage"])
# torque to thrust coefficient []
self.thrust_to_torque = random_by_percentage(self.standard_thrust_to_torque, self.env_config["percentage"])
self.max_thrust = 1 / 4 * self.mass * np.abs(self.grav_acc[2]) * self.thrust_to_weight
# reset current motor lag []
self.cur_motor_lag = np.zeros(4)
# reset current position of the agent [m]
self.cur_pos = random_by_interval(self.env_config["max_pos_err"], 3)
# reset current linear velocity of the agent in the world coordinate system [m/s]
self.cur_lin_vel = random_by_interval(self.env_config["max_lin_vel"], 3)
# reset current angles [rad]
self.cur_ang = random_by_interval(self.env_config["max_ang"], 3)
# reset current angular velocity of the agent in the body coordinate system [rad/s]
self.cur_ang_vel = random_by_interval(self.env_config["max_ang_vel"], 3)
state = self.calc_state()
return state
def seed(self, seed=None):
random.seed(seed)
def get_task(self):
return self.env_config
def set_task(self, task):
self.env_config = task
self.reset()