Browse Source

Tweak: minor clean-up, allow to resign when the best move in the final selection is above/below a certain threshold

master
Guillaume Grenet 5 years ago
parent
commit
fce494b9fc
9 changed files with 107 additions and 61 deletions
  1. 22
    21
      lib/board.ex
  2. 1
    0
      lib/board/state.ex
  3. 2
    0
      lib/helpers.ex
  4. 58
    35
      lib/player/mc_rave.ex
  5. 2
    1
      lib/player/mc_rave/state.ex
  6. 2
    1
      mix.exs
  7. 2
    1
      mix.lock
  8. 17
    1
      test/player/mc_rave_test.exs
  9. 1
    1
      test/profile/play_move.exs

+ 22
- 21
lib/board.ex View File

@@ -58,7 +58,7 @@ defmodule WeiqiDMC.Board do

def set_handicap(state, handicap) do
handicap_coordinates = Helpers.handicap_coordinates(state.size, handicap) |> Enum.map(&Helpers.coordinate_string_to_tuple(&1))
compute_moves state, handicap_coordinates, :black
compute_moves %{state | handicap: handicap}, handicap_coordinates, :black
end

def valid_move?(state, coordinate) do
@@ -103,12 +103,12 @@ defmodule WeiqiDMC.Board do

def pre_compute_valid_move(state, coordinate, return_pre_computed) do
color = state.next_player
opposite_color = Helpers.opposite_color(color)
coordinate_set = Set.put(HashSet.new, coordinate)
surroundings = surroundings coordinate, state.size
empty = surroundings |> Enum.filter(fn (surrounding) -> State.board_value(state, surrounding) == :empty end)

other_player = surroundings |> Enum.filter_map(&State.board_has_value?(state, &1, Helpers.opposite_color(color)),
&group_containing(&1, Helpers.opposite_color(color), state.groups))
other_player = surroundings |> Enum.filter_map(&State.board_has_value?(state, &1, opposite_color),
&group_containing(&1, opposite_color, state.groups))

if length(empty) == 0 do
capturing = Enum.filter(other_player, fn({_, _, liberties}) ->
@@ -199,20 +199,21 @@ defmodule WeiqiDMC.Board do
end

def process_move(state, coordinate, color, move_liberties) do
#Remove the move in the list of liberties for opposite color groups
groups = state.groups |>
Enum.map(fn ({group_color, coordinates, liberties}) ->
if group_color == Helpers.opposite_color(color) do
{group_color, coordinates, Set.delete(liberties, coordinate)}
else
{group_color, coordinates, liberties}
end
end)
opposite_color = Helpers.opposite_color(color)

#For same group color, find all the groups that have this move as liberty
#and put them in a list to be merged.
to_merge = groups |> Enum.filter(fn ({group_color, _, liberties}) ->
group_color == color and Set.member?(liberties, coordinate)
{groups, to_merge} = Enum.partition(state.groups, fn {group_color, _, liberties} ->
#For same group color, find all the groups that have this move as liberty
#and put them in a list to be merged.
!(group_color == color and Set.member?(liberties, coordinate))
end)

groups = Enum.map(groups, fn {group_color, coordinates, liberties} ->
#Remove the move in the list of liberties for opposite color groups
if group_color == opposite_color do
{group_color, coordinates, Set.delete(liberties, coordinate)}
else
{group_color, coordinates, liberties}
end
end)

if !Enum.empty?(to_merge) do
@@ -223,9 +224,9 @@ defmodule WeiqiDMC.Board do

merged = {color, Set.put(coordinates, coordinate), Set.delete(Enum.into(move_liberties, liberties), coordinate)}

groups = (groups -- to_merge) ++ [merged]
groups = [merged|groups]
else
groups = groups ++ [{color, Set.put(HashSet.new, coordinate), Enum.into(move_liberties, HashSet.new)}]
groups = [{color, Set.put(HashSet.new, coordinate), Enum.into(move_liberties, HashSet.new)}|groups]
end

%{ State.update_board(state, coordinate, color) | groups: groups}
@@ -245,14 +246,14 @@ defmodule WeiqiDMC.Board do
end

def process_capture_group(state, [capture|rest]) do
surroundings = Enum.into surroundings(capture, state.size), HashSet.new
surroundings = surroundings(capture, state.size)

groups = state.groups |> Enum.filter_map(fn {_, coordinates, _} ->
#Remove all the groups containing the removed stone
!Set.member?(coordinates, capture)
end, fn {color, coordinates, liberties} ->
#Add liberties to the surrounding groups
if Set.size(Set.intersection(coordinates, surroundings)) > 0 do
if Enum.any?(surroundings, fn surrounding -> Set.member?(coordinates, surrounding) end) do
{color, coordinates, Set.put(liberties, capture)}
else
{color, coordinates, liberties}

+ 1
- 0
lib/board/state.ex View File

@@ -13,6 +13,7 @@ defmodule WeiqiDMC.Board.State do
coordinate_ko: nil,
last_move: nil,
consecutive_pass: false,
handicap: 0,
board: nil

def to_list(state) do

+ 2
- 0
lib/helpers.ex View File

@@ -40,7 +40,9 @@ defmodule WeiqiDMC.Helpers do
{row, column+1}
end

def coordinate_tuple_to_string()
def coordinate_tuple_to_string(nil) do "none" end
def coordinate_tuple_to_string(:resign) do "resign" end
def coordinate_tuple_to_string(:pass) do "pass" end
def coordinate_tuple_to_string({row, column}) do
"#{String.at("ABCDEFGHJKLMNOPQRSTUVWXYZ", column-1)}#{row}"

+ 58
- 35
lib/player/mc_rave.ex View File

@@ -2,45 +2,56 @@ defmodule WeiqiDMC.Player.MCRave do
alias WeiqiDMC.Board
alias WeiqiDMC.Board.State

@constant_bias 1
@constant_bias 0.1
@heuristic_confidence 5

def board_hash(board) do
#Copied from State.to_list ...
#TODO: any other idea?
board
|> Dict.to_list
|> Enum.map(fn ({row, column_dict}) ->
column_dict
|> Dict.to_list
|> Enum.map(fn ({column, value}) ->
{row, column, value}
end)
end)
end

#Useful for testing
def state_hash(state) when is_atom(state) do state end
def state_hash(state) do
state.board
end

def generate_move(state, think_time_ms) do
def generate_move(state, think_time_ms, show_stats \\ false) do
:random.seed(:os.timestamp)

{mc_rave_state, _} = mc_rave state, think_time_ms*1000,
%WeiqiDMC.Player.MCRave.State{}, 0
mc_rave_state = mc_rave state, think_time_ms*1000, %WeiqiDMC.Player.MCRave.State{}

if show_stats do
show_stats(state, mc_rave_state)
end

select_move state, mc_rave_state
select_move state, mc_rave_state, true
end

def mc_rave(_, remaining_time, mc_rave_state, stats) when remaining_time < 0 do
{mc_rave_state, stats}
def show_stats(state, mc_rave_state) do
IO.puts "\nMove generation"
IO.puts "---------------"

IO.puts State.to_string(state)

state_hash = state_hash state

Dict.keys(mc_rave_state.q) |> Enum.each(fn {hash, move} ->
if hash == state_hash do
n = Dict.get(mc_rave_state.n, {hash, move}, 0)
q = Dict.get(mc_rave_state.q, {hash, move}, 0)
eval = eval(hash, move, mc_rave_state)
IO.puts "#{WeiqiDMC.Helpers.coordinate_tuple_to_string(move)} -> N=#{n}, Q=#{q}, Eval=#{eval}"
end
end)

move = select_move state, mc_rave_state, true
IO.puts "Total simulation: #{mc_rave_state.simulations}"
IO.puts "Selected moved: #{WeiqiDMC.Helpers.coordinate_tuple_to_string(move)} \n\n"
end

def mc_rave(state, remaining_time, mc_rave_state, stats) do
def mc_rave(_, remaining_time, mc_rave_state) when remaining_time < 0 do
mc_rave_state
end

def mc_rave(state, remaining_time, mc_rave_state) do
{elapsed, mc_rave_state} = :timer.tc &simulate/2, [state, mc_rave_state]
mc_rave state, remaining_time - elapsed, mc_rave_state, stats + 1
mc_rave state, remaining_time - elapsed, mc_rave_state
end

def simulate(state, mc_rave_state) do
@@ -49,7 +60,10 @@ defmodule WeiqiDMC.Player.MCRave do
backup mc_rave_state, known_states, known_actions, missing_actions, outcome
end

def backup(mc_rave_state, [], _, _, _) do mc_rave_state end
def backup(mc_rave_state, [], _, _, _) do
%{mc_rave_state | simulations: mc_rave_state.simulations + 1}
end

def backup(mc_rave_state, [known_state|known_states], [known_action|known_actions], missing_actions, outcome) do
state_hash = state_hash(known_state)
key = {state_hash, known_action}
@@ -65,8 +79,6 @@ defmodule WeiqiDMC.Player.MCRave do
backup mc_rave_state, known_states, known_actions, missing_actions, outcome
end

#TODO: what does this do? Once you know, find a better name than the names
# from the algorithm.
def backup_tilde(mc_rave_state, _, _, index, _) when index <= 0 do mc_rave_state end
def backup_tilde(mc_rave_state, state_hash, all_actions, index, outcome) do
u = length(all_actions) - index
@@ -92,10 +104,10 @@ defmodule WeiqiDMC.Player.MCRave do
state_hash = state_hash state
if !tree_member?(mc_rave_state.tree, state_hash) do
parent_hash = states |> List.first |> state_hash
new_action = default_policy(state)
{new_action, _} = default_policy(state)
{Enum.reverse([state|states]), Enum.reverse([new_action|actions]), new_node(state, parent_hash, mc_rave_state)}
else
new_action = select_move(state, mc_rave_state)
new_action = select_move(state, mc_rave_state, false)
{:ok, new_state} = Board.compute_move(state, new_action, state.next_player)
sim_tree [new_state|[state|states]], [new_action|actions], mc_rave_state
end
@@ -104,19 +116,18 @@ defmodule WeiqiDMC.Player.MCRave do

def sim_default(from_state, moves) do
if game_over?(from_state) do
# IO.puts State.to_string(from_state)
{moves, outcome?(from_state)}
else
new_action = default_policy(from_state)
{:ok, new_state} = case new_action do
:pass -> Board.compute_move(from_state, :pass)
{:pass, _} -> Board.compute_move(from_state, :pass)
{coordinate, precomputed} -> Board.compute_valid_move(from_state, coordinate, precomputed)
end
sim_default new_state, moves ++ [new_action]
end
end

def select_move(state, mc_rave_state) do
def select_move(state, mc_rave_state, allow_resign) do
legal_moves = legal_moves state
state_hash = state_hash state

@@ -125,9 +136,19 @@ defmodule WeiqiDMC.Player.MCRave do
else
state_hash = state_hash state
if state.next_player == :black do
Enum.max_by(legal_moves, fn(move) -> eval(state_hash, move, mc_rave_state) end)
move = Enum.max_by(legal_moves, fn(move) -> eval(state_hash, move, mc_rave_state) end)
if allow_resign and eval(state_hash, move, mc_rave_state) < 0.3 do
:resign
else
move
end
else
Enum.min_by(legal_moves, fn(move) -> eval(state_hash, move, mc_rave_state) end)
move = Enum.min_by(legal_moves, fn(move) -> eval(state_hash, move, mc_rave_state) end)
if allow_resign and eval(state_hash, move, mc_rave_state) > 0.7 do
:resign
else
move
end
end
end
end
@@ -136,7 +157,7 @@ defmodule WeiqiDMC.Player.MCRave do
default_policy state, State.empty_coordinates(state)
end

def default_policy(_, []) do :pass end
def default_policy(_, []) do {:pass, nil} end
def default_policy(state, candidates) do
move = Enum.at candidates, :random.uniform(length(candidates)) - 1
if !ruin_perfectly_good_eye?(state, move) do
@@ -228,7 +249,9 @@ defmodule WeiqiDMC.Player.MCRave do
end

def black_wins?(state) do
count_stones(state, :black) > count_stones(state, :white) + state.komi
black_points = count_stones(state, :black)
white_points = count_stones(state, :white) + state.komi
black_points - white_points > 0
end

def count_stones(state, color) do

+ 2
- 1
lib/player/mc_rave/state.ex View File

@@ -3,5 +3,6 @@ defmodule WeiqiDMC.Player.MCRave.State do
q: HashDict.new,
n_tilde: HashDict.new,
q_tilde: HashDict.new,
tree: nil
tree: nil,
simulations: 0
end

+ 2
- 1
mix.exs View File

@@ -27,6 +27,7 @@ defmodule WeiqiDMC.Mixfile do
#
# Type `mix help deps` for more examples and options
defp deps do
[{:exprof, "~> 0.2"}]
[{:exprof, "~> 0.2"},
{:poolboy, github: "devinus/poolboy", tag: "1.5.1"}]
end
end

+ 2
- 1
mix.lock View File

@@ -1,2 +1,3 @@
%{"exprintf": {:hex, :exprintf, "0.1.6"},
"exprof": {:hex, :exprof, "0.2.0"}}
"exprof": {:hex, :exprof, "0.2.0"},
"poolboy": {:git, "https://github.com/devinus/poolboy.git", "3bb48a893ff5598f7c73731ac17545206d259fac", [tag: "1.5.1"]}}

+ 17
- 1
test/player/mc_rave_test.exs View File

@@ -26,7 +26,7 @@ defmodule WeiqiDMC.Player.McRaveTest do

{move, _} = Player.default_policy(Board.force_next_player(state, :black))
assert move == {9,1}
assert Player.default_policy(Board.force_next_player(state, :white)) == :pass
assert Player.default_policy(Board.force_next_player(state, :white)) == {:pass, nil}
end

test "#tree_insert will insert node when providing parent" do
@@ -250,4 +250,20 @@ defmodule WeiqiDMC.Player.McRaveTest do
assert Player.outcome?(state) == 1
assert Player.generate_move(Board.force_next_player(state, :white), 100) == {9,2}
end

# test "will pick the actual best move", %{state: state} do
# {:ok, state} = Board.set_handicap state, 6

# state = play_moves state, ["E7"], "White"
# state = play_moves state, ["A8"], "Black"
# state = play_moves state, ["F6"], "White"
# state = play_moves state, ["J3"], "Black"
# state = play_moves state, ["G6"], "White"
# state = play_moves state, ["D3"], "Black"
# state = play_moves state, ["H5"], "White"
# state = play_moves state, ["A6"], "Black"
# state = play_moves state, ["G4"], "White"

# assert Player.generate_move(state, 1000) == {1,3}
# end
end

+ 1
- 1
test/profile/play_move.exs View File

@@ -10,7 +10,7 @@ defmodule WeiqiDMC.Profile.PlayMove do
:random.seed(:os.timestamp)

#Slow... like SUPER SLOW
:fprof.apply(&MCRave.sim_default/2, [state, []])
:fprof.apply(&MCRave.generate_move/2, [state, 2000])
:fprof.profile
:fprof.analyse [dest: 'profile.out']


Loading…
Cancel
Save