forked from tensorforce/tensorforce
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathquickstart.py
More file actions
108 lines (96 loc) · 3.13 KB
/
quickstart.py
File metadata and controls
108 lines (96 loc) · 3.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# Copyright 2017 reinforce.io. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np
from tensorforce.agents import PPOAgent
from tensorforce.execution import Runner
from tensorforce.contrib.openai_gym import OpenAIGym
# Create an OpenAIgym environment.
environment = OpenAIGym('CartPole-v0', visualize=False)
# Network as list of layers
# - Embedding layer:
# - For Gym environments utilizing a discrete observation space, an
# "embedding" layer should be inserted at the head of the network spec.
# Such environments are usually identified by either:
# - class ...Env(discrete.DiscreteEnv):
# - self.observation_space = spaces.Discrete(...)
network_spec = [
# dict(type='embedding', indices=100, size=32),
dict(type='dense', size=32),
dict(type='dense', size=32)
]
agent = PPOAgent(
states=environment.states,
actions=environment.actions,
network=network_spec,
# Agent
states_preprocessing=None,
actions_exploration=None,
reward_preprocessing=None,
# MemoryModel
update_mode=dict(
unit='episodes',
# 10 episodes per update
batch_size=20,
# Every 10 episodes
frequency=20
),
memory=dict(
type='latest',
include_next_states=False,
capacity=5000
),
# DistributionModel
distributions=None,
entropy_regularization=0.01,
# PGModel
baseline_mode='states',
baseline=dict(
type='mlp',
sizes=[32, 32]
),
baseline_optimizer=dict(
type='multi_step',
optimizer=dict(
type='adam',
learning_rate=1e-3
),
num_steps=5
),
gae_lambda=0.97,
# PGLRModel
likelihood_ratio_clipping=0.2,
# PPOAgent
step_optimizer=dict(
type='adam',
learning_rate=1e-3
),
subsampling_fraction=0.2,
optimization_steps=25
)
# Create the runner
runner = Runner(agent=agent, environment=environment)
# Callback function printing episode statistics
def episode_finished(r):
print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(ep=r.episode, ts=r.episode_timestep,
reward=r.episode_rewards[-1]))
return True
# Start learning
runner.run(episodes=3000, max_episode_timesteps=200, episode_finished=episode_finished)
runner.close()
# Print statistics
print("Learning finished. Total episodes: {ep}. Average reward of last 100 episodes: {ar}.".format(
ep=runner.episode,
ar=np.mean(runner.episode_rewards[-100:]))
)