From dd2601fc39844ca8c7058bba37419fc917cb7109 Mon Sep 17 00:00:00 2001 From: Krzysztof Rudnicki Date: Sun, 21 May 2023 19:01:43 +0200 Subject: [PATCH] feat: initial commit, --- .gitignore | 162 +++++++++++++++++++++++++++++++++++++++++++++++ .pylintrc | 3 + main.py | 161 +++++++++++++++++++++++++++++++++++++++------- requirements.yml | 1 + 4 files changed, 305 insertions(+), 22 deletions(-) create mode 100644 .gitignore create mode 100644 .pylintrc diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..c0b6fe20 --- /dev/null +++ b/.gitignore @@ -0,0 +1,162 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +vid \ No newline at end of file diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 00000000..aceec323 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,3 @@ +[DESIGN] +# Maximum number of statements in function / method body +max-statements=16 diff --git a/main.py b/main.py index a907164c..0748b9ce 100644 --- a/main.py +++ b/main.py @@ -1,28 +1,145 @@ +""" +Code used to solve MountainCar-v0 gymnasium problem using Q-Learning algorithm +""" +from datetime import datetime import gymnasium as gym +import numpy as np -if __name__ == "__main__": - # init env - env = gym.make("MountainCar-v0", render_mode="rgb_array") - # wrapper to record the video at 3rd episode and saves it to the folder - # 'vid' +def initialize_environment(): + """ + Initialize environment and video recording + """ + # Initialize environment + env = gym.make('MountainCar-v0', render_mode='rgb_array') + # Save video + now = datetime.now() + time_string = now.strftime("%H:%M:%S") env = gym.wrappers.RecordVideo( - env, video_folder="vid", episode_trigger=lambda x: x == 3 - ) + env, + video_folder='vid', + episode_trigger=lambda x: x == 1, + disable_logger=False, + name_prefix=time_string) + return env - # an episode ends if goal is reached or other game ending factors (e.g. - # reached max steps) - n_episodes = 4 - for episode in range(n_episodes): # iterate episodes - state, info = env.reset() # reset the env to an initial state - done = False # boolean to stop an episode - - while not done: # iterate steps - # randomly choose a sample - action = env.action_space.sample() - # take the action (step) and observe the state and reward - next_state, reward, terminated, truncated, info = env.step(action) - # condition to stop an episode - done = terminated or truncated - env.close() +def initialize_q_table(env): + """ + Initialize "empty" Q-table + """ + # Initialize Q-table + n_actions = env.action_space.n # Number of possible actions, should be 3 + # 0 accelerate left + # 1 dont accelerate + # 2 accelerate to the right + q_table = np.zeros((n_actions,)) + return q_table + + +def initialize_hyperparameters(): + """ + Initialize hyperparameters used by algorithm + """ + hyperparameters = { + "learning_rate": 0.1, + "discount_factor": 0.99, + "epsilon": 0.2, + "max_episodes": 1 + } + return hyperparameters + + +def choose_action(hyperparameters, env, q_table): + """ + Choose one of 3 actions possible for the algorithm + """ + # hyperparameters["epsilon"]-greedy exploration-exploitation tradeoff + if np.random.uniform(0, 1) < hyperparameters["epsilon"]: + action = env.action_space.sample() # Choose a random action + else: + # Choose the action with the highest Q-value + action = np.argmax(q_table) + return action + + +def update_q_table(q_table, action, hyperparameters, reward): + """ + Update q_table with newest reward + """ + # Q-table update + q_value = q_table[action] + max_q_value = np.max(q_table) + new_q_value = (1 - hyperparameters["learning_rate"]) * q_value + \ + hyperparameters["learning_rate"] * \ + (reward + hyperparameters["discount_factor"] * max_q_value) + q_table[action] = new_q_value + return q_table + + +def movement(hyperparameters, env, q_table, total_reward=0): + """ + Choose action and observe consequences + """ + action = choose_action(hyperparameters, env, q_table) + # Take the action and observe the next state + next_state, reward, terminated, truncated, info = env.step(action) + done = terminated or truncated + q_table = update_q_table(q_table, action, hyperparameters, reward) + + total_reward += reward + return hyperparameters, env, q_table, done, total_reward + + +def episode_step(env, hyperparameters, q_table, episode_rewards): + """ + Actions done with every episode + """ + state, _ = env.reset() # Reset the environment to an initial state + done = False # Boolean to indicate episode completion + total_reward = 0 # Accumulate rewards for the episode + + while not done: + hyperparameters, env, q_table, done, total_reward = movement( + hyperparameters, env, q_table, total_reward) + + episode_rewards.append(total_reward) + return env, hyperparameters, q_table, episode_rewards + + +def training_loop(hyperparameters, env, q_table): + """ + Actual training for MountainCar + """ + episode_rewards = [] # List to store episode rewards + + for episode in range(hyperparameters["max_episodes"]): + env, hyperparameters, q_table, episode_rewards = episode_step( + env, hyperparameters, q_table, episode_rewards) + + return env, q_table + + +def inference(env, q_table): + """ + Inference using the updated Q-table + """ + state, _ = env.reset() + done = False + + while not done: + # Choose the action with the highest Q-value + action = np.argmax(q_table) + # Take the action and observe the next state + next_state, reward, terminated, truncated, info = env.step(action) + done = terminated or truncated + + +if __name__ == '__main__': + ENV = initialize_environment() + Q_TABLE = initialize_q_table(ENV) + HYPERPARAMETERS = initialize_hyperparameters() + ENV, Q_TABLE = training_loop(HYPERPARAMETERS, ENV, Q_TABLE) + inference(ENV, Q_TABLE) + + ENV.close() diff --git a/requirements.yml b/requirements.yml index de1caf6d..3000663b 100644 --- a/requirements.yml +++ b/requirements.yml @@ -9,3 +9,4 @@ dependencies: - numpy - python=3.9 - pygame + - opencv-python