forked from tensorforce/tensorforce
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_quickstart_example.py
More file actions
126 lines (108 loc) · 4.11 KB
/
test_quickstart_example.py
File metadata and controls
126 lines (108 loc) · 4.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Copyright 2017 reinforce.io. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
from six.moves import xrange
import sys
import unittest
from tensorforce.agents import PPOAgent
from tensorforce.execution import Runner
from tensorforce.contrib.openai_gym import OpenAIGym
logging.getLogger('tensorflow').disabled = True
class TestQuickstartExample(unittest.TestCase):
def test_example(self):
sys.stdout.write('\nQuickstart:\n')
sys.stdout.flush()
passed = 0
for _ in xrange(3):
# Create an OpenAI-Gym environment
environment = OpenAIGym('CartPole-v0')
# Network specification for the model
network = [
dict(type='dense', size=32),
dict(type='dense', size=32)
]
# Create the agent
agent = PPOAgent(
states=environment.states,
actions=environment.actions,
network=network,
# Model
states_preprocessing=None,
actions_exploration=None,
reward_preprocessing=None,
# MemoryModel
update_mode=dict(
unit='episodes',
# 10 episodes per update
batch_size=10,
# Every 10 episodes
frequency=10
),
memory=dict(
type='latest',
include_next_states=False,
capacity=5000
),
discount=0.99,
# DistributionModel
distributions=None,
entropy_regularization=0.01,
# PGModel
baseline_mode='states',
baseline=dict(
type='mlp',
sizes=[32, 32]
),
baseline_optimizer=dict(
type='multi_step',
optimizer=dict(
type='adam',
learning_rate=1e-3
),
num_steps=5
),
gae_lambda=None,
# PGLRModel
likelihood_ratio_clipping=0.2,
# PPOAgent
step_optimizer=dict(
type='adam',
learning_rate=1e-3
),
subsampling_fraction=0.1,
optimization_steps=50
)
# Initialize the runner
runner = Runner(agent=agent, environment=environment)
# Function handle called after each finished episode
def episode_finished(r):
# Test if mean reward over 50 should ensure that learning took off
mean_reward = np.mean(r.episode_rewards[-50:])
return r.episode < 100 or mean_reward < 50.0
# Start the runner
runner.run(episodes=2000, max_episode_timesteps=200, episode_finished=episode_finished)
runner.close()
sys.stdout.write('episodes: {}\n'.format(runner.episode))
sys.stdout.flush()
# Test passed if episode_finished handle evaluated to False
if runner.episode < 2000:
passed += 1
sys.stdout.write('==> passed: {}\n'.format(passed))
sys.stdout.flush()
self.assertTrue(passed >= 2)