gomoku_rl
Contents:
gomoku_rl package
gomoku_rl
Index
Index
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
L
|
M
|
N
|
P
|
R
|
S
|
T
|
U
|
V
|
W
_
__call__() (gomoku_rl.policy.base.Policy method)
__init__() (gomoku_rl.collector.BlackPlayCollector method)
(gomoku_rl.collector.SelfPlayCollector method)
(gomoku_rl.collector.VersusPlayCollector method)
(gomoku_rl.collector.WhitePlayCollector method)
(gomoku_rl.core.Gomoku method)
(gomoku_rl.env.GomokuEnv method)
(gomoku_rl.policy.base.Policy method)
A
ActorNet (class in gomoku_rl.utils.module)
add() (gomoku_rl.utils.psro.Population method)
add_current_policy() (gomoku_rl.utils.psro.PSROPolicyWrapper method)
add_prefix() (in module gomoku_rl.utils.misc)
addPlayer() (gomoku_rl.utils.elo.Elo method)
annotate_heatmap() (in module gomoku_rl.utils.visual)
AntiDiagonalFlip (class in gomoku_rl.utils.augment)
assert_layer_transition() (in module gomoku_rl.utils.test)
assert_observation() (in module gomoku_rl.utils.test)
assert_tensor_1d_all() (in module gomoku_rl.utils.test)
assert_transition() (in module gomoku_rl.utils.test)
augment_transition() (in module gomoku_rl.utils.augment)
B
batch_size (gomoku_rl.env.GomokuEnv property)
black (gomoku_rl.utils.test.Type attribute)
black_vs_white (gomoku_rl.utils.psro.PayoffType attribute)
BlackPlayCollector (class in gomoku_rl.collector)
board_size (gomoku_rl.env.GomokuEnv property)
both (gomoku_rl.utils.psro.PayoffType attribute)
C
calculate_jpc() (in module gomoku_rl.utils.psro)
Collector (class in gomoku_rl.collector)
compute_done() (in module gomoku_rl.core)
compute_elo_ratings() (in module gomoku_rl.utils.elo)
compute_expected_score() (in module gomoku_rl.utils.elo)
converged() (gomoku_rl.utils.psro.ConvergedIndicator method)
ConvergedIndicator (class in gomoku_rl.utils.psro)
count_parameters() (in module gomoku_rl.utils.module)
D
device (gomoku_rl.env.GomokuEnv property)
DiagonalFlip (class in gomoku_rl.utils.augment)
dict_flatten() (in module gomoku_rl.utils.wandb)
DQN (class in gomoku_rl.policy.dqn)
E
Elo (class in gomoku_rl.utils.elo)
eval() (gomoku_rl.policy.base.Policy method)
(gomoku_rl.policy.dqn.DQN method)
(gomoku_rl.policy.ppo.PPO method)
(gomoku_rl.utils.psro.PSROPolicyWrapper method)
eval_win_rate() (in module gomoku_rl.utils.eval)
expected_score() (gomoku_rl.utils.elo.Elo method)
F
fixed_behavioural_strategy() (gomoku_rl.utils.psro.Population method)
forward() (gomoku_rl.utils.module.ActorNet method)
(gomoku_rl.utils.module.MyDuelingCnnDQNet method)
(gomoku_rl.utils.module.PolicyHead method)
(gomoku_rl.utils.module.ResidualBlock method)
(gomoku_rl.utils.module.ResidualTower method)
(gomoku_rl.utils.module.ValueHead method)
(gomoku_rl.utils.module.ValueNet method)
G
get_action_mask() (gomoku_rl.core.Gomoku method)
get_augmented_transition() (in module gomoku_rl.utils.augment)
get_encoded_board() (gomoku_rl.core.Gomoku method)
get_kwargs() (in module gomoku_rl.utils.misc)
get_log_func() (in module gomoku_rl.utils.log)
get_meta_solver() (in module gomoku_rl.utils.psro)
get_new_payoffs() (in module gomoku_rl.utils.psro)
get_new_payoffs_sp() (in module gomoku_rl.utils.psro)
get_optimizer() (in module gomoku_rl.policy.common)
get_payoff_matrix() (in module gomoku_rl.utils.eval)
get_policy() (in module gomoku_rl.policy)
get_pretrained_policy() (in module gomoku_rl.policy)
get_replay_buffer() (in module gomoku_rl.policy.dqn)
Gomoku (class in gomoku_rl.core)
gomoku_rl
module
gomoku_rl.collector
module
gomoku_rl.core
module
gomoku_rl.env
module
gomoku_rl.policy
module
gomoku_rl.policy.base
module
gomoku_rl.policy.common
module
gomoku_rl.policy.dqn
module
gomoku_rl.policy.ppo
module
gomoku_rl.runner
module
gomoku_rl.runner.base
module
gomoku_rl.runner.independent_rl_runner
module
gomoku_rl.runner.psro_runner
module
gomoku_rl.utils
module
gomoku_rl.utils.augment
module
gomoku_rl.utils.elo
module
gomoku_rl.utils.eval
module
gomoku_rl.utils.log
module
gomoku_rl.utils.misc
module
gomoku_rl.utils.module
module
gomoku_rl.utils.policy
module
gomoku_rl.utils.psro
module
gomoku_rl.utils.sampler
module
gomoku_rl.utils.test
module
gomoku_rl.utils.visual
module
gomoku_rl.utils.wandb
module
GomokuEnv (class in gomoku_rl.env)
H
heatmap() (in module gomoku_rl.utils.visual)
HorizontalFlip (class in gomoku_rl.utils.augment)
I
Identity (class in gomoku_rl.utils.augment)
IndependentRLRunner (class in gomoku_rl.runner.independent_rl_runner)
IndependentRLSPRunner (class in gomoku_rl.runner.independent_rl_runner)
init_params() (in module gomoku_rl.utils.module)
init_payoffs_sp() (in module gomoku_rl.utils.psro)
init_wandb() (in module gomoku_rl.utils.wandb)
inverse_map_board() (gomoku_rl.utils.augment.AntiDiagonalFlip method)
(gomoku_rl.utils.augment.DiagonalFlip method)
(gomoku_rl.utils.augment.HorizontalFlip method)
(gomoku_rl.utils.augment.Identity method)
(gomoku_rl.utils.augment.Rotation method)
(gomoku_rl.utils.augment.Transform method)
(gomoku_rl.utils.augment.VerticalFlip method)
inverse_map_index() (gomoku_rl.utils.augment.AntiDiagonalFlip method)
(gomoku_rl.utils.augment.DiagonalFlip method)
(gomoku_rl.utils.augment.HorizontalFlip method)
(gomoku_rl.utils.augment.Identity method)
(gomoku_rl.utils.augment.Rotation method)
(gomoku_rl.utils.augment.Transform method)
(gomoku_rl.utils.augment.VerticalFlip method)
is_valid() (gomoku_rl.core.Gomoku method)
L
learn() (gomoku_rl.policy.base.Policy method)
(gomoku_rl.policy.dqn.DQN method)
(gomoku_rl.policy.ppo.PPO method)
load_state_dict() (gomoku_rl.policy.base.Policy method)
(gomoku_rl.policy.dqn.DQN method)
(gomoku_rl.policy.ppo.PPO method)
M
make_behavioural_strategy() (gomoku_rl.utils.psro.Population method)
make_critic() (in module gomoku_rl.policy.common)
make_dataset_naive() (in module gomoku_rl.policy.common)
make_dqn_actor() (in module gomoku_rl.policy.common)
make_egreedy_actor() (in module gomoku_rl.policy.common)
make_ppo_ac() (in module gomoku_rl.policy.common)
make_ppo_actor() (in module gomoku_rl.policy.common)
make_transition() (in module gomoku_rl.collector)
map_board() (gomoku_rl.utils.augment.AntiDiagonalFlip method)
(gomoku_rl.utils.augment.DiagonalFlip method)
(gomoku_rl.utils.augment.HorizontalFlip method)
(gomoku_rl.utils.augment.Identity method)
(gomoku_rl.utils.augment.Rotation method)
(gomoku_rl.utils.augment.Transform method)
(gomoku_rl.utils.augment.VerticalFlip method)
map_index() (gomoku_rl.utils.augment.AntiDiagonalFlip method)
(gomoku_rl.utils.augment.DiagonalFlip method)
(gomoku_rl.utils.augment.HorizontalFlip method)
(gomoku_rl.utils.augment.Identity method)
(gomoku_rl.utils.augment.Rotation method)
(gomoku_rl.utils.augment.Transform method)
(gomoku_rl.utils.augment.VerticalFlip method)
Mean (class in gomoku_rl.utils.log)
mixed (gomoku_rl.utils.test.Type attribute)
module
gomoku_rl
gomoku_rl.collector
gomoku_rl.core
gomoku_rl.env
gomoku_rl.policy
gomoku_rl.policy.base
gomoku_rl.policy.common
gomoku_rl.policy.dqn
gomoku_rl.policy.ppo
gomoku_rl.runner
gomoku_rl.runner.base
gomoku_rl.runner.independent_rl_runner
gomoku_rl.runner.psro_runner
gomoku_rl.utils
gomoku_rl.utils.augment
gomoku_rl.utils.elo
gomoku_rl.utils.eval
gomoku_rl.utils.log
gomoku_rl.utils.misc
gomoku_rl.utils.module
gomoku_rl.utils.policy
gomoku_rl.utils.psro
gomoku_rl.utils.sampler
gomoku_rl.utils.test
gomoku_rl.utils.visual
gomoku_rl.utils.wandb
MyDuelingCnnDQNet (class in gomoku_rl.utils.module)
N
no_nan_in_tensordict() (in module gomoku_rl.utils.test)
num_envs (gomoku_rl.env.GomokuEnv property)
P
payoff_headmap() (in module gomoku_rl.utils.visual)
PayoffType (class in gomoku_rl.utils.psro)
Policy (class in gomoku_rl.policy.base)
PolicyHead (class in gomoku_rl.utils.module)
Population (class in gomoku_rl.utils.psro)
PPO (class in gomoku_rl.policy.ppo)
print_payoffs() (in module gomoku_rl.utils.psro)
PSROPolicyWrapper (class in gomoku_rl.utils.psro)
PSRORunner (class in gomoku_rl.runner.psro_runner)
PSROSPRunner (class in gomoku_rl.runner.psro_runner)
R
ran_out (gomoku_rl.utils.sampler.SequentialSampler property)
REGISTRY (gomoku_rl.policy.base.Policy attribute)
reset() (gomoku_rl.collector.BlackPlayCollector method)
(gomoku_rl.collector.Collector method)
(gomoku_rl.collector.SelfPlayCollector method)
(gomoku_rl.collector.VersusPlayCollector method)
(gomoku_rl.collector.WhitePlayCollector method)
(gomoku_rl.core.Gomoku method)
(gomoku_rl.env.GomokuEnv method)
(gomoku_rl.utils.psro.ConvergedIndicator method)
ResidualBlock (class in gomoku_rl.utils.module)
ResidualTower (class in gomoku_rl.utils.module)
rollout() (gomoku_rl.collector.BlackPlayCollector method)
(gomoku_rl.collector.Collector method)
(gomoku_rl.collector.SelfPlayCollector method)
(gomoku_rl.collector.VersusPlayCollector method)
(gomoku_rl.collector.WhitePlayCollector method)
Rotation (class in gomoku_rl.utils.augment)
round() (in module gomoku_rl.collector)
run() (gomoku_rl.runner.base.Runner method)
(gomoku_rl.runner.base.SPRunner method)
Runner (class in gomoku_rl.runner.base)
S
sample() (gomoku_rl.utils.psro.Population method)
(gomoku_rl.utils.psro.PSROPolicyWrapper method)
(gomoku_rl.utils.sampler.SequentialSampler method)
self_play_step() (in module gomoku_rl.collector)
SelfPlayCollector (class in gomoku_rl.collector)
SequentialSampler (class in gomoku_rl.utils.sampler)
set_meta_policy() (gomoku_rl.utils.psro.PSROPolicyWrapper method)
set_oracle_mode() (gomoku_rl.utils.psro.PSROPolicyWrapper method)
set_post_step() (gomoku_rl.env.GomokuEnv method)
set_seed() (in module gomoku_rl.utils.misc)
solve_last_n() (in module gomoku_rl.utils.psro)
solve_nash() (in module gomoku_rl.utils.psro)
solve_uniform() (in module gomoku_rl.utils.psro)
SPRunner (class in gomoku_rl.runner.base)
state_dict() (gomoku_rl.policy.base.Policy method)
(gomoku_rl.policy.dqn.DQN method)
(gomoku_rl.policy.ppo.PPO method)
step() (gomoku_rl.core.Gomoku method)
(gomoku_rl.env.GomokuEnv method)
step_and_maybe_reset() (gomoku_rl.env.GomokuEnv method)
T
to() (gomoku_rl.core.Gomoku method)
train() (gomoku_rl.policy.base.Policy method)
(gomoku_rl.policy.dqn.DQN method)
(gomoku_rl.policy.ppo.PPO method)
(gomoku_rl.utils.psro.PSROPolicyWrapper method)
Transform (class in gomoku_rl.utils.augment)
Type (class in gomoku_rl.utils.test)
U
uniform_policy() (in module gomoku_rl.utils.policy)
update() (gomoku_rl.utils.elo.Elo method)
(gomoku_rl.utils.log.Mean method)
(gomoku_rl.utils.psro.ConvergedIndicator method)
V
value (gomoku_rl.utils.log.Mean property)
ValueHead (class in gomoku_rl.utils.module)
ValueNet (class in gomoku_rl.utils.module)
VersusPlayCollector (class in gomoku_rl.collector)
VerticalFlip (class in gomoku_rl.utils.augment)
W
white (gomoku_rl.utils.test.Type attribute)
WhitePlayCollector (class in gomoku_rl.collector)