gomoku_rl.utils.psro module

class gomoku_rl.utils.psro.ConvergedIndicator(max_size: int = 15, mean_threshold: float = 0.99, std_threshold: float = 0.005, min_iter_steps: int = 40, max_iter_steps: int = 300)[source]

Bases: object

converged() bool[source]
reset()[source]
update(value: float)[source]
class gomoku_rl.utils.psro.PSROPolicyWrapper(policy: Policy, population: Population)[source]

Bases: object

add_current_policy()[source]
eval()[source]
sample()[source]
set_meta_policy(meta_policy: ndarray)[source]
set_oracle_mode(value: bool = True)[source]
train()[source]
class gomoku_rl.utils.psro.PayoffType(value)[source]

Bases: Enum

An enumeration.

black_vs_white = 1
both = 2
class gomoku_rl.utils.psro.Population(dir: str, initial_policy: ~typing.Callable[[~tensordict._td.TensorDict], ~tensordict._td.TensorDict] | list[~typing.Callable[[~tensordict._td.TensorDict], ~tensordict._td.TensorDict]] = <function uniform_policy>, device: ~torch.device | str | int | None = 'cuda')[source]

Bases: object

add(policy: Policy)[source]
fixed_behavioural_strategy(index: int)[source]
make_behavioural_strategy(index: int) Callable[[TensorDict], TensorDict][source]

share _module_backup!!! ` s1=population.make_behavioural_strategy(0) s2=population.make_behavioural_strategy(1) then s1 and s2 are the same strategy!!! `

sample(meta_policy: ndarray | None = None)[source]
gomoku_rl.utils.psro.calculate_jpc(payoffs: ndarray)[source]
gomoku_rl.utils.psro.get_meta_solver(name: str) Callable[[ndarray], tuple[ndarray, ndarray]][source]
gomoku_rl.utils.psro.get_new_payoffs(env, population_0: Population, population_1: Population, old_payoffs: ndarray | None)[source]
gomoku_rl.utils.psro.get_new_payoffs_sp(env, population: Population, old_payoffs: ndarray | None, type: PayoffType = PayoffType.both)[source]
gomoku_rl.utils.psro.init_payoffs_sp(env, population: Population, type: PayoffType)[source]
gomoku_rl.utils.psro.print_payoffs(payoffs: ndarray)[source]
gomoku_rl.utils.psro.solve_last_n(payoffs: ndarray, n: int) tuple[ndarray, ndarray][source]
gomoku_rl.utils.psro.solve_nash(payoffs: ndarray) tuple[ndarray, ndarray][source]
gomoku_rl.utils.psro.solve_uniform(payoffs: ndarray) tuple[ndarray, ndarray][source]