-
Notifications
You must be signed in to change notification settings - Fork 0
/
Qlearn.R
38 lines (36 loc) · 1.11 KB
/
Qlearn.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
q.learn <- function(R, N, alpha, gamma, final_state) {
#initalize Q matrix with zero values
Q <- matrix(rep(0,length(R)), nrow=nrow(R))
#iterate with number of iteration determind
for (i in 1:N) {
#select state random
cs <- sample(1:nrow(R), 1)
while (TRUE) {
#get the possible action and there rewards from this state
next.states <- which(R[cs,] > -1)
#if it hve one action take it and get next state
if (length(next.states)==1)
ns <- next.states
#else select one random
else
ns <- sample(next.states,1)
#evaluate Q value from the equation
Q[cs,ns] <- (1-alpha)*Q[cs,ns] + alpha*(R[cs,ns] + gamma*max(Q[ns, which(R[ns,] > -1)]))
if (ns == final_state) break
cs <- ns
}
}
#normalize the Q value
return(100*Q/max(Q))
}
#inialize the problem
main.function<-function(){
N <- 1000
alpha <- 0.5
gamma <- 0.8
tgt.state <- 6
iter<-100
R <- matrix(c(-1,-1,-1,-1,0,-1,-1,-1,-1,0,-1,0,-1,-1,-1,0,-1,-1,-1,0,0,-1,0,-1,0,-1,-1,0,-1,0,-1,100,-1,-1,100,100),nrow=6)
q.learn(R,iter,alpha,gamma,tgt.state)
}
main.function()