Skip to content

Commit da57404

Browse files
sunil19mEC2 Default User
andauthored
Support for simapp release March 2, 2020 (#1088)
* Support for simapp release March 2, 2020 * Fix multiple rollout with best model selection Co-authored-by: EC2 Default User <[email protected]>
1 parent cb2a421 commit da57404

File tree

75 files changed

+4550
-1778
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+4550
-1778
lines changed

reinforcement_learning/rl_deepracer_robomaker_coach_gazebo/deepracer_rl.ipynb

Lines changed: 321 additions & 65 deletions
Large diffs are not rendered by default.

reinforcement_learning/rl_deepracer_robomaker_coach_gazebo/src/markov/agent_ctrl/agent_ctrl_interface.py

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,54 @@ def action_space(self):
1313

1414
@abc.abstractmethod
1515
def reset_agent(self):
16-
'''Reset the agent to a desired start postion
16+
'''reset agent to the reset position
17+
18+
Raises:
19+
NotImplementedError: agent control must be able to reset agent
1720
'''
1821
raise NotImplementedError('Agent control must be able to reset agent')
1922

2023
@abc.abstractmethod
2124
def send_action(self, action):
2225
'''Send the desired action to the agent
23-
action - Integer with the desired action to take
26+
27+
Args:
28+
action: Interger with the desired action to take
29+
30+
Raises:
31+
NotImplementedError: agent control must be able to send action
2432
'''
2533
raise NotImplementedError('Agent control must be able to send action')
2634

2735
@abc.abstractmethod
28-
def judge_action(self, action):
29-
'''Returns the reward and done flag after and agent takes the action prescribed by a given
30-
policy
31-
action - Integer with the desired action to take
36+
def update_agent(self, action):
37+
'''Update the agent status after action is taken
38+
39+
Args:
40+
action: Interger with the desired action to take
41+
42+
Returns:
43+
dict: dictionary contains the agent info after desired action is taken
44+
with key as agent's name and value as agent's info
45+
46+
Raises:
47+
NotImplementedError: agent control must be able to update agent
48+
'''
49+
raise NotImplementedError('Agent control must be able to update agent')
50+
51+
@abc.abstractmethod
52+
def judge_action(self, agents_info_map):
53+
'''Returns the reward, done flag, step metrics after action is taken
54+
55+
Args:
56+
agents_info_map: dictionary contains all agents info map with key as
57+
each agent's name and value as each agent's info
58+
59+
Returns:
60+
tuple (int, bool, dict): reward, done flag, and step metrics tuple
61+
62+
Raises:
63+
NotImplementedError: Agent control must be able to judge action
3264
'''
3365
raise NotImplementedError('Agent control must be able to judge action')
3466

@@ -39,8 +71,3 @@ def finish_episode(self):
3971
'''
4072
raise NotImplementedError('Agent control must be able to properly handle the end of \
4173
an episode')
42-
43-
@abc.abstractmethod
44-
def clear_data(self):
45-
'''Clears the agent data'''
46-
raise NotImplementedError('Agent control must be able to clear data')

reinforcement_learning/rl_deepracer_robomaker_coach_gazebo/src/markov/agent_ctrl/bot_cars_agent_ctrl.py

Lines changed: 165 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,21 @@
1313
from geometry_msgs.msg import Pose
1414
from rosgraph_msgs.msg import Clock
1515

16-
from markov.track_geom.constants import SET_MODEL_STATE, SPAWN_URDF_MODEL, ObstacleDimensions
16+
from markov.agent_ctrl.constants import ConfigParams, BOT_CAR_Z
17+
from markov.track_geom.constants import SET_MODEL_STATE, SPAWN_SDF_MODEL, ObstacleDimensions
1718
from markov.track_geom.track_data import TrackData, TrackLine
1819
from markov.track_geom.utils import euler_to_quaternion
1920
from markov.agent_ctrl.agent_ctrl_interface import AgentCtrlInterface
2021
from markov.rospy_wrappers import ServiceProxyWrapper
2122
from markov import utils
23+
from markov.reset.constants import AgentPhase, AgentInfo
24+
from markov.deepracer_exceptions import GenericRolloutException
2225

2326
from scipy.interpolate import splprep, spalde
2427
from shapely.geometry import Point
2528
from shapely.geometry.polygon import LineString
2629

27-
SPLINE_DEGREE = 2
30+
SPLINE_DEGREE = 3
2831

2932
class BotCarsCtrl(AgentCtrlInterface):
3033
def __init__(self):
@@ -40,16 +43,21 @@ def __init__(self):
4043
self.lower_lane_change_time = float(rospy.get_param("LOWER_LANE_CHANGE_TIME", 3.0))
4144
self.upper_lane_change_time = float(rospy.get_param("UPPER_LANE_CHANGE_TIME", 5.0))
4245
self.lane_change_distance = float(rospy.get_param("LANE_CHANGE_DISTANCE", 1.0))
46+
self.penalty_seconds = float(rospy.get_param("PENALTY_SECONDS", 2.0))
4347
self.lane_change_duration = self.lane_change_distance/self.bot_car_speed
4448
self.bot_car_names = ["bot_car_{}".format(i) for i in range(self.num_bot_cars)]
49+
self.bot_car_poses = []
50+
self.bot_car_progresses = {}
51+
self.bot_car_phase = AgentPhase.RUN.value
4552
self.bot_car_dimensions = ObstacleDimensions.BOT_CAR_DIMENSION
53+
self.bot_car_crash_count = 0
54+
self.pause_end_time = 0.0
4655

4756
# Wait for ros services
4857
rospy.wait_for_service(SET_MODEL_STATE)
49-
rospy.wait_for_service(SPAWN_URDF_MODEL)
58+
rospy.wait_for_service(SPAWN_SDF_MODEL)
5059
self.set_model_state = ServiceProxyWrapper(SET_MODEL_STATE, SetModelState)
51-
self.spawn_urdf_model = ServiceProxyWrapper(SPAWN_URDF_MODEL, SpawnModel)
52-
self.bot_car_urdf = rospy.get_param('robot_description_bot')
60+
self.spawn_sdf_model = ServiceProxyWrapper(SPAWN_SDF_MODEL, SpawnModel)
5361

5462
# Build splines for inner/outer lanes
5563
self.inner_lane = self._build_lane(self.track_data._inner_lane_)
@@ -63,8 +71,31 @@ def __init__(self):
6371
rospy.Subscriber('/clock', Clock, self._update_sim_time)
6472

6573
def _build_lane(self, lane):
74+
'''Take in the track lane and return a track lane dictionary
75+
76+
Args:
77+
lane (TrackLine): TrackLine instance
78+
79+
Returns:
80+
dict: Dictionary contains input track lane, track lane point distance,
81+
prepared track lane spline.
82+
'''
6683
center_line = self.track_data._center_line_
6784
lane_dists = [center_line.project(Point(c)) for c in lane.coords]
85+
# projecting inner/outer lane into center line cannot
86+
# guarantee monotonic increase along starting and ending position
87+
# if wrap around along start (more than half of track length),
88+
# subtract track length
89+
for i in range(len(lane_dists)):
90+
if lane_dists[i] < 0.5 * center_line.length:
91+
break
92+
lane_dists[i] -= center_line.length
93+
# if wrap around along finish (less than half of track length),
94+
# add track length
95+
for i in range(len(lane_dists) - 1, 0, -1):
96+
if lane_dists[i] > 0.5 * center_line.length:
97+
break
98+
lane_dists[i] += center_line.length
6899
u,ui = np.unique(lane_dists, return_index=True)
69100
x = np.array(lane.coords.xy)[:,ui]
70101
if u[0] > 0.0:
@@ -85,30 +116,60 @@ def _build_lane(self, lane):
85116
"spline": lane_spline}
86117

87118
def _reset_sim_time(self):
119+
'''reset simulation start time
120+
'''
88121
sim_time = rospy.get_rostime()
89122
self.start_sim_time = self.current_sim_time = sim_time.secs + 1.e-9*sim_time.nsecs
90123

91124
def _update_sim_time(self, sim_time):
125+
'''Gazebo clock call back to update current time
126+
127+
Args:
128+
sim_time (Clock): gazebo clock
129+
'''
92130
self.current_sim_time = sim_time.clock.secs + 1.e-9*sim_time.clock.nsecs
93131

94132
def _get_dist_from_sim_time(self, initial_dist, sim_time):
95-
seconds_elapsed = sim_time - self.start_sim_time
133+
'''Get the bot car travel distance since the simulation start time
134+
135+
Args:
136+
initial_dist (float): bot car initial distance
137+
sim_time (float): current simulation time
138+
139+
Returns:
140+
float: current bot car distance
141+
'''
142+
seconds_elapsed = sim_time - self.start_sim_time - self.bot_car_crash_count * \
143+
self.penalty_seconds
96144
bot_car_traveled_dist = seconds_elapsed * self.bot_car_speed
97145
bot_car_center_dist = (initial_dist + bot_car_traveled_dist) \
98146
% self.track_data._center_line_.length
99147
return bot_car_center_dist
100148

101149
def _eval_spline(self, initial_dist, sim_time, spline):
150+
'''Use spline to generate point
151+
152+
Args:
153+
initial_dist (float): bot car initial distance
154+
sim_time (float): current simulation time
155+
spline (splprep): B-spline representation of an N-dimensional curve.
156+
https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.interpolate.splprep.html
157+
158+
Returns:
159+
spalde: Evaluate all derivatives of a B-spline.
160+
https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.interpolate.spalde.html
161+
'''
102162
center_line = self.track_data._center_line_
103163
dist = self._get_dist_from_sim_time(initial_dist, sim_time)
104164
min_dist = spline[0][SPLINE_DEGREE]
105165
max_dist = spline[0][-SPLINE_DEGREE-1]
106166
if dist < min_dist: dist += center_line.length
107167
if dist > max_dist: dist -= center_line.length
108-
return spalde(dist, spline)
168+
return spalde(max(min_dist, min(dist, max_dist)), spline)
109169

110170
def _compute_bot_car_initial_states(self):
111-
171+
'''Compute the bot car initial distance and spline
172+
'''
112173
# Start with equally spaced
113174
bot_car_start_dist = self.min_bot_car_dist
114175
bot_car_end_dist = self.track_data._center_line_.length - 1.0
@@ -151,14 +212,15 @@ def _compute_bot_car_initial_states(self):
151212
self.bot_car_splines = [lane["spline"] for lane in self.bot_cars_lanes]
152213

153214
def _compute_bot_car_lane_changes(self):
215+
'''Compute the bot car lane change splines and update bot car lane change end times
216+
'''
154217
center_line = self.track_data._center_line_
155218

156219
bot_car_splines = []
157220
for i_bot_car, lane_change_end_time in enumerate(self.bot_cars_lane_change_end_times):
158221

159222
# See if the last lane change has finished
160223
if self.current_sim_time >= lane_change_end_time:
161-
162224
# Swap lanes
163225
if lane_change_end_time > 0.0:
164226
self.bot_cars_lanes[i_bot_car], self.bot_cars_opposite_lanes[i_bot_car] = \
@@ -207,8 +269,8 @@ def _compute_bot_car_lane_changes(self):
207269
num_start_coords -= 1
208270
num_end_coords -= 1
209271
start_index_0 = (current_prev_index - 3) % num_start_coords
210-
start_index_1 = start_prev_index
211-
end_index_0 = end_next_index
272+
start_index_1 = start_prev_index % num_start_coords
273+
end_index_0 = end_next_index % num_end_coords
212274
end_index_1 = (end_next_index + 3) % num_end_coords
213275

214276
# Grab waypoint indices for these intervals (some corner cases here...)
@@ -254,7 +316,11 @@ def _compute_bot_car_lane_changes(self):
254316
self.bot_car_splines[i_bot_car] = bot_car_spline
255317

256318
def _compute_bot_car_poses(self):
319+
'''Compute bot car poses
257320
321+
Returns:
322+
list: list of bot car Pose instance
323+
'''
258324
# Evaluate the splines
259325
with self.lock:
260326
bot_cars_spline_derivs = \
@@ -273,7 +339,7 @@ def _compute_bot_car_poses(self):
273339
bot_car_pose = Pose()
274340
bot_car_pose.position.x = bot_car_x
275341
bot_car_pose.position.y = bot_car_y
276-
bot_car_pose.position.z = 0.0
342+
bot_car_pose.position.z = BOT_CAR_Z
277343
bot_car_pose.orientation.x = bot_car_orientation[0]
278344
bot_car_pose.orientation.y = bot_car_orientation[1]
279345
bot_car_pose.orientation.z = bot_car_orientation[2]
@@ -283,16 +349,27 @@ def _compute_bot_car_poses(self):
283349
return bot_car_poses
284350

285351
def _spawn_bot_cars(self):
352+
'''Spawn the bot cars and initialize track data bot car objects
353+
'''
286354
self._compute_bot_car_initial_states()
287-
bot_car_poses = self._compute_bot_car_poses()
288-
for bot_car_name, bot_car_pose in zip(self.bot_car_names, bot_car_poses):
289-
self.spawn_urdf_model(bot_car_name, self.bot_car_urdf, '/{}'.format(bot_car_name),
290-
bot_car_pose, '')
355+
self.bot_car_poses = self._compute_bot_car_poses()
356+
357+
rospack = rospkg.RosPack()
358+
deepracer_path = rospack.get_path("deepracer_simulation_environment")
359+
bot_car_sdf_path = os.path.join(deepracer_path, "models", "bot_car", "model.sdf")
360+
with open(bot_car_sdf_path, "r") as fp:
361+
bot_car_sdf = fp.read()
362+
363+
for bot_car_name, bot_car_pose in zip(self.bot_car_names, self.bot_car_poses):
364+
self.spawn_sdf_model(bot_car_name, bot_car_sdf, '/{}'.format(bot_car_name),
365+
bot_car_pose, '')
291366
self.track_data.initialize_object(bot_car_name, bot_car_pose, self.bot_car_dimensions)
292367

293368
def _update_bot_cars(self):
294-
bot_car_poses = self._compute_bot_car_poses()
295-
for bot_car_name, bot_car_pose in zip(self.bot_car_names, bot_car_poses):
369+
'''Update bot car objects locations
370+
'''
371+
self.bot_car_poses = self._compute_bot_car_poses()
372+
for bot_car_name, bot_car_pose in zip(self.bot_car_names, self.bot_car_poses):
296373
bot_car_state = ModelState()
297374
bot_car_state.model_name = bot_car_name
298375
bot_car_state.pose = bot_car_pose
@@ -310,19 +387,82 @@ def action_space(self):
310387
return None
311388

312389
def reset_agent(self):
390+
'''reset bot car when a new episode start by resetting simulation time and
391+
initial position.
392+
'''
393+
self.bot_car_crash_count = 0
313394
self._reset_sim_time()
314395
self._compute_bot_car_initial_states()
315396
self._update_bot_cars()
316397

317398
def send_action(self, action):
318-
self._compute_bot_car_lane_changes()
319-
self._update_bot_cars()
320-
321-
def judge_action(self, action):
399+
'''Send bot car action to Gazebo for rendering
400+
401+
Args:
402+
action (int): index of action
403+
'''
404+
if self.bot_car_phase == AgentPhase.RUN.value:
405+
self._compute_bot_car_lane_changes()
406+
self._update_bot_cars()
407+
408+
def update_agent(self, action):
409+
'''Update bot car status after action is taken
410+
411+
Args:
412+
action (int): index of action
413+
414+
Returns:
415+
dict: dictionary of bot car info after action is taken
416+
'''
417+
for bot_car_name, bot_car_pose in zip(self.bot_car_names, self.bot_car_poses):
418+
bot_car_progress = self.track_data.get_norm_dist(
419+
Point(bot_car_pose.position.x, bot_car_pose.position.y)) * 100.0
420+
self.bot_car_progresses.update(
421+
{bot_car_name:{AgentInfo.CURRENT_PROGRESS.value:bot_car_progress}})
422+
return self.bot_car_progresses
423+
424+
def judge_action(self, agents_info_map):
425+
'''Judge action to see whether reset is needed
426+
427+
Args:
428+
agents_info_map: Dictionary contains all agents info with agent name as the key
429+
and info as the value
430+
431+
Returns:
432+
tuple: None, None, None
433+
434+
Raises:
435+
GenericRolloutException: bot car pahse is not defined
436+
'''
437+
if self.bot_car_phase == AgentPhase.RUN.value:
438+
self.pause_end_time = self.current_sim_time
439+
for agent_name, _ in agents_info_map.items():
440+
# check racecar crash with a bot_car
441+
crashed_object_name = agents_info_map[agent_name]\
442+
[AgentInfo.CRASHED_OBJECT_NAME.value] \
443+
if AgentInfo.CRASHED_OBJECT_NAME.value in agents_info_map[agent_name] else None
444+
if 'racecar' in agent_name and \
445+
crashed_object_name and 'bot_car' in crashed_object_name:
446+
racecar_progress = agents_info_map[agent_name]\
447+
[AgentInfo.CURRENT_PROGRESS.value]
448+
bot_car_progress = agents_info_map[crashed_object_name]\
449+
[AgentInfo.CURRENT_PROGRESS.value]
450+
# transition to AgentPhase.PAUSE.value
451+
if racecar_progress > bot_car_progress:
452+
self.bot_cars_lane_change_end_times = \
453+
[t + self.penalty_seconds for t in self.bot_cars_lane_change_end_times]
454+
self.bot_car_crash_count += 1
455+
self.pause_end_time += self.penalty_seconds
456+
self.bot_car_phase = AgentPhase.PAUSE.value
457+
break
458+
elif self.bot_car_phase == AgentPhase.PAUSE.value:
459+
# transition to AgentPhase.RUN.value
460+
if self.current_sim_time > self.pause_end_time:
461+
self.bot_car_phase = AgentPhase.RUN.value
462+
else:
463+
raise GenericRolloutException('bot car phase {} is not defined'.\
464+
format(self.bot_car_phase))
322465
return None, None, None
323466

324467
def finish_episode(self):
325468
pass
326-
327-
def clear_data(self):
328-
pass

0 commit comments

Comments
 (0)