Build your own Translator module

How to build the module

To build the module you’ll need to inherit the AbstractTranslator class from the Translator module documentation and implement all of the methods

How to test if the module was implemented correctly

Verify that you can connect Game module to Translator module and call all Translator methods without Translator crashing
Your Translator module should also follow these rules:
  1. You should convert all moves to the list of indices. Model will return a given index that Translator should later convert to Game move

  2. You should avoid passing any non-numeric values to the RL module

  3. The only exception to the rule above is get_state function that can pass enum or string value

Game module example (Freecell)

Module structure

Among many files, our core functionality was split onto following files:
  • constants.py - contains all the constants used in the Freecell Translator

  • freecell_translator.py - contains the implementation of Freecell Translator

  • functions.py - contains all functions used in Freecell Translator

freecell_translator.py

This part of code is only meant to be a presentation on how the translator could look. For the full implementation look at our repository. Let’s take a look at the FreecellTranslator class:

from ..abstract_translator.abstract_translator import AbstractTranslator
from .functions import *

class FreecellTranslator(AbstractTranslator):
    def __init__(self, game=None):
        super().__init__(game)
        # ML vectors mapped to given index
        self.all_moves = self._get_all_moves_dict()
        self.all_moves_rev = {v:k for k,v in self.all_moves.items()}
        self.config_model = {
            "n_observations": np.prod(SIZE_BOARD) + np.prod(SIZE_FREE_CELL) + np.prod(SIZE_HEAP),
            "n_actions": len(self.all_moves)
        }

        # Store destination card for future reward calculation
        self.dst_card = None

    def make_move(self, move):
        ml_no_cards, ml_src, ml_dst = self.all_moves_rev[move]
        board, free_cells, _ = self.game.get_board()

        src_card = get_source_card(board, free_cells, ml_no_cards, ml_src)
        dst_card = get_dest_card(board, ml_dst)

        # Store destination card for future reward calculation
        self.dst_card = dst_card

        self.game.make_move((src_card, dst_card))

    # Returns list of index of all moves
    def get_moves(self):
        board, free_cells, _ = self.game.get_board()
        moves = self.game.get_moves()

        move_vectors = []
        for move in moves:
            src_card, dst_card = move
            cards_moved_vector, src_vector = get_source_card_vector(board, free_cells, src_card)
            dst_vector = get_dest_card_vector(board, dst_card)

            move_id = self.all_moves[(cards_moved_vector, src_vector, dst_vector)]
            move_vectors.append(move_id)

        return move_vectors

    # Our ml model takes one dimensional inputs
    def get_board(self):
        board, free_cells, heap = self.game.get_board()

        board = convert_board_to_ar_ohe(board)
        free_cells = convert_fc_to_ar_ohe(free_cells)
        heap = convert_heap_to_ar_ohe(heap)

        return np.concatenate((board, free_cells, heap)).tolist()

    def get_state(self):
        return self.game.get_state()

    def start_game(self):
        self.game.start_game()

    def get_reward(self):
        state = self.game.get_state()
        if state.value == State.WON.value:
            return 5
        elif state.value == State.LOST.value:
            return -5
        else:
            if self.dst_card == CARD_LOCATIONS.HEAP.value:
                return 1
            else:
                return 0

    def get_config_model(self):
        return self.config_model

    def _get_all_moves_dict(self):
        result_dict = {}
        n_move = 0

        # Perform all one move cards
        for src, src_v in CARDS_SOURCE.items():
            for dst, dst_v in CARDS_DEST.items():
                if not self._is_the_same_col(src_v, dst_v):
                    no_cards = REV_NUMBER_OF_CARDS[1]
                    result_dict[(no_cards, src, dst)] = n_move
                    n_move += 1

        return result_dict

    def _is_the_same_col(self, src, dst):
        if src == dst:
            return True

        if src[0] == CARD_LOCATIONS.FREE_CELL.value and dst[0] == CARD_LOCATIONS.FREE_CELL.value:
            return True

        return False

Game module example (2048)

Module structure

Among many files, our core functionality was split onto following files:
  • constants.py - contains all the constants used in the 2048 Translator

  • translator2048.py - contains the implementation of 2048 Translator=

translator2048.py

This part of code is only meant to be a presentation on how the translator could look. For the full implementation look at our repository. Let’s take a look at the Translator2048 class:

import math

from .constants import *
from ..abstract_translator.AbstractTranslator import AbstractTranslator

class Translator2048(AbstractTranslator):

    def __init__(self, game=None):
        super().__init__(game)
        self.move_indexes = list(MOVES)

    def make_move(self, move_index):
        move_vector = self.move_indexes[move_index].value[1]
        matching_move = next(move for move in MOVES if move.value[1] == move_vector)
        move = matching_move.value[0]
        self.game.make_move((move,))
        return True

    def get_moves(self):
        all_moves = self.game.get_moves()
        moves_indexes = [self.move_indexes.index(get_enum_member(move)) for move in all_moves]
        return moves_indexes

    def get_board(self):
        board = self.game.get_board()
        board_one_hot_values = [FIELDS_VALUES[field.value] for row in board for field in row]
        return board_one_hot_values

    def get_state(self):
        return self.game.get_state()

    def start_game(self):
        self.game.start_game()

    def get_reward(self):
        state = self.game.get_state()
        if state.value == State.WON.value:
            return 10
        elif state.value == State.LOST.value:
            return -10
        else:
            # Modify merge_reward and empty_penalty to handle None values
            merge_reward = sum([tile.value for row in self.game.get_board() for tile in row if tile.value is not None])
            empty_penalty = -0.1 * len(
                [tile.value for row in self.game.get_board() for tile in row if tile.value is None])

            monotonic_reward = self.__calculate_monotonic_reward()  # Reward for board monotonicity
            smoothness_reward = self.__calculate_smoothness_reward()  # Reward for smoothness

            total_reward = merge_reward + empty_penalty + monotonic_reward + smoothness_reward
            normalized_reward = math.log(total_reward + 1) / 2  # Logarithmic normalization
            scaled_reward = min(10, max(0, normalized_reward))  # Scale to be between 0 and 10

            return scaled_reward

    def get_config_model(self):
        pass

    def __calculate_smoothness_reward(self):
        smoothness_reward = 0
        board = self.game.get_board()
        for row in board:
            for i in range(1, len(row)):
                if row[i].value is not None and row[i - 1].value is not None:
                    smoothness_reward -= abs(row[i].value - row[i - 1].value)

        for col in zip(*board):
            for i in range(1, len(col)):
                if col[i].value is not None and col[i - 1].value is not None:
                    smoothness_reward -= abs(col[i].value - col[i - 1].value)

        return smoothness_reward

    def __calculate_monotonic_reward(self):
        monotonic_reward = 0
        board = self.game.get_board()

        for row in board:
            monotonic_reward += sum([abs(row[i].value or 0 - row[i - 1].value) for i in range(1, len(row)) if
                                    None not in (row[i].value, row[i - 1].value)])

        for col in zip(*board):
            monotonic_reward += sum([abs((col[i].value or 0) - (col[i - 1].value or 0)) for i in range(1, len(col)) if
                                    None not in (col[i].value, col[i - 1].value)])

       return monotonic_reward