-
Notifications
You must be signed in to change notification settings - Fork 44
/
Copy pathhuman_play.py
94 lines (76 loc) · 3.1 KB
/
human_play.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# -*- coding: utf-8 -*-
"""
Created on Sat Dec 8 13:51:53 2018
@author: initial-h
"""
from __future__ import print_function
from game_board import Board, Game
from mcts_pure import MCTSPlayer as MCTS_pure
from mcts_alphaZero import MCTSPlayer
from policy_value_net_tensorlayer import PolicyValueNet
import time
from os import path
import os
from collections import defaultdict
class Human(object):
"""
human player
"""
def __init__(self):
self.player = None
def set_player_ind(self, p):
self.player = p
def get_action(self, board,is_selfplay=False,print_probs_value=0):
# no use params in the func : is_selfplay,print_probs_value
# just to stay the same with AI's API
try:
location = input("Your move: ")
if isinstance(location, str): # for python3
location = [int(n, 10) for n in location.split(",")]
move = board.location_to_move(location)
except Exception as e:
move = -1
if move == -1 or move not in board.availables:
print("invalid move")
move,_ = self.get_action(board)
return move,None
def __str__(self):
return "Human {}".format(self.player)
def run(start_player=0,is_shown=1):
# run a gomoku game with AI
# you can set
# human vs AI or AI vs AI
n = 5
width, height = 11, 11
model_file = 'model_11_11_5/best_policy.model'
p = os.getcwd()
model_file = path.join(p,model_file)
board = Board(width=width, height=height, n_in_row=n)
game = Game(board)
mcts_player = MCTS_pure(5,400)
best_policy = PolicyValueNet(board_width=width,board_height=height,block=19,init_model=model_file,cuda=True)
# alpha_zero vs alpha_zero
# best_policy.save_numpy(best_policy.network_all_params)
# best_policy.load_numpy(best_policy.network_oppo_all_params)
alpha_zero_player = MCTSPlayer(policy_value_function=best_policy.policy_value_fn_random,
action_fc=best_policy.action_fc_test,
evaluation_fc=best_policy.evaluation_fc2_test,
c_puct=5,
n_playout=400,
is_selfplay=False)
# alpha_zero_player_oppo = MCTSPlayer(policy_value_function=best_policy.policy_value_fn_random,
# action_fc=best_policy.action_fc_test_oppo,
# evaluation_fc=best_policy.evaluation_fc2_test_oppo,
# c_puct=5,
# n_playout=400,
# is_selfplay=False)
# human player, input your move in the format: 2,3
# set start_player=0 for human first
# play in termianl without GUI
# human = Human()
# win = game.start_play(human, alpha_zero_player, start_player=start_player, is_shown=is_shown,print_prob=True)
# return win
# play in GUI
game.start_play_with_UI(alpha_zero_player)
if __name__ == '__main__':
run(start_player=0,is_shown=True)