-
Notifications
You must be signed in to change notification settings - Fork 2
/
mppi.m
113 lines (83 loc) · 4.31 KB
/
mppi.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
% Author: Akash Patel (apatel435)
function [sample_u_traj, rep_traj_cost] = mppi(func_control_update_converged, ...
func_comp_weights, func_term_cost, func_run_cost, func_g, func_F, ...
func_state_transform, func_filter_du, num_samples, learning_rate, ...
init_state, init_ctrl_seq, ctrl_noise_covar, time_horizon, ...
per_ctrl_based_ctrl_noise, real_traj_cost, print_mppi, save_sampling, ...
sampling_filename)
% time stuff
num_timesteps = size(init_ctrl_seq, 2);
dt = time_horizon / num_timesteps;
% sample state stuff
sample_init_state = func_state_transform(init_state);
sample_state_dim = size(sample_init_state,1);
% state trajectories
real_x_traj = zeros(sample_state_dim, num_timesteps + 1);
real_x_traj(:,1) = sample_init_state;
x_traj = zeros(sample_state_dim, num_samples, num_timesteps + 1);
x_traj(:,:,1) = repmat(sample_init_state,[1, num_samples]);
% control stuff
control_dim = size(init_ctrl_seq, 1);
du = realmax('double') * ones(control_dim, num_timesteps);
% control sequence
sample_u_traj = init_ctrl_seq;
% sampled control trajectories
v_traj = zeros(control_dim, num_samples, num_timesteps);
% Begin mppi
iteration = 1;
while(func_control_update_converged(du, iteration) == false)
last_sample_u_traj = sample_u_traj;
% Noise generation
flat_distribution = randn(control_dim, num_samples * num_timesteps);
ctrl_noise_flat = ctrl_noise_covar * flat_distribution;
ctrl_noise = reshape(ctrl_noise_flat, [control_dim, num_samples, num_timesteps]);
% Compute sampled control trajectories
ctrl_based_ctrl_noise_samples = round(per_ctrl_based_ctrl_noise * num_samples);
if (ctrl_based_ctrl_noise_samples == 0)
v_traj = ctrl_noise;
elseif (ctrl_based_ctrl_noise_samples == num_samples)
v_traj = repmat(reshape(sample_u_traj, [control_dim, 1, num_timesteps]), [1, num_samples, 1]) + ctrl_noise;
else
v_traj(:,1:ctrl_based_ctrl_noise_samples,:) = repmat(reshape(sample_u_traj, [control_dim, 1, num_timesteps]), [1, ctrl_based_ctrl_noise_samples, 1]) + ctrl_noise(:,1:ctrl_based_ctrl_noise_samples,:);
v_traj(:,ctrl_based_ctrl_noise_samples+1:end,:) = ctrl_noise(:,ctrl_based_ctrl_noise_samples+1:end,:);
end
traj_cost = zeros(1, num_samples);
for timestep_num = 1:num_timesteps
% Forward propagation
x_traj(:,:,timestep_num+1) = func_F(x_traj(:,:,timestep_num),func_g(v_traj(:,:,timestep_num)),dt);
traj_cost = traj_cost + func_run_cost(x_traj(:,:,timestep_num)) + learning_rate * sample_u_traj(:,timestep_num)' * inv(ctrl_noise_covar) * (sample_u_traj(:,timestep_num) - v_traj(:,:,timestep_num));
if(print_mppi)
fprintf("TN: %d, IN: %d, DU: %d\n", timestep_num, iteration, mean(sum(abs(du),1)));
end
end
traj_cost = traj_cost + func_term_cost(x_traj(:,:,timestep_num+1));
if(save_sampling)
save("-append", [sampling_filename '_v_traj.dat'],'v_traj');
save("-append", [sampling_filename '_x_traj.dat'],'x_traj');
save("-append", [sampling_filename '_traj_cost.dat'], 'traj_cost');
end
% Weight and du calculation
w = func_comp_weights(traj_cost);
du = reshape(sum(repmat(w, [control_dim, 1, num_timesteps]) .* ctrl_noise,2), [control_dim, num_timesteps]);
% Filter the output from forward propagation
du = func_filter_du(du);
sample_u_traj = sample_u_traj + du;
iteration = iteration + 1;
end
if (real_traj_cost == true)
% Loop through the dynamics again to recalcuate traj_cost
rep_traj_cost = 0;
for timestep_num = 1:num_timesteps
% Forward propagation
real_x_traj(:,timestep_num+1) = func_F(real_x_traj(:,timestep_num),func_g(sample_u_traj(:,timestep_num)),dt);
rep_traj_cost = rep_traj_cost + func_run_cost(real_x_traj(:,timestep_num)) + learning_rate * sample_u_traj(:,timestep_num)' * inv(ctrl_noise_covar) * (last_sample_u_traj(:,timestep_num) - sample_u_traj(:,timestep_num));
end
rep_traj_cost = rep_traj_cost + func_term_cost(real_x_traj(:,timestep_num+1));
else
% normalize weights, in case they are not normalized
normalized_w = w / sum(w);
% Compute the representative trajectory cost of what actually happens
% another way to think about this is weighted average of sample trajectory costs
rep_traj_cost = sum(normalized_w .* traj_cost);
end
end