解决问题:从任何位置到6
视频课程地址:
强化学习算法系列教程及代码实现-Q-Learning_哔哩哔哩_bilibili
相应代码:
import numpy as np import random q = np.zeros((7, 7)) q = np.matrix(q) r = np.array([[-1, -1, -1, 0, -1, -1, -1], [-1, -1, 0, -1, -1, -1, -1], [-1, 0, -1, 0, -1, 0, -1], [0, -1, 0, -1, 0, -1, -1], [-1, -1, -1, 0, -1, 0, 100], [-1, -1, 0, -1, 0, -1, 100], [-1, -1, -1, -1, 0, 0, 100]]) r = np.matrix(r) gamma = 0.8 for i in range(1000): state = random.randint(0, 6) while state != 6: r_pos_action = [] for action in range(7): if r[state, action] >= 0: r_pos_action.append(action) next_state = r_pos_action[random.randint(0, len(r_pos_action) - 1)] q[state, next_state] = r[state, next_state] + gamma * q[next_state].max() state = next_state print(q) state = random.randint(0, 6) print("机器人处于{}".format(state)) count = 0 while state != 6: if count > 20: print("fail") break q_max = q[state].max() q_max_action = [] for action in range(7): if q[state, action] == q_max: q_max_action.append(action) next_state = q_max_action[random.randint(0, len(q_max_action) - 1)] print('机器人 goes to {} 。'.format(next_state)) state = next_state count += 1