mirror of
https://github.com/kuhyx/WUT_Computer_Science.git
synced 2026-07-04 15:23:11 +02:00
feat: added lab7
This commit is contained in:
commit
3a140237cf
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
*.swp
|
||||
.DS_Store
|
||||
bin/configlet
|
||||
bin/configlet.exe
|
||||
temp.pl
|
||||
4
.vscode/extensions.json
vendored
4
.vscode/extensions.json
vendored
@ -1,5 +1,6 @@
|
||||
{
|
||||
"recommendations": [
|
||||
<<<<<<< HEAD
|
||||
"ms-python.python",
|
||||
"ms-python.pylint",
|
||||
"mikoz.black-py",
|
||||
@ -7,5 +8,8 @@
|
||||
"kisstkondoros.vscode-gutter-preview",
|
||||
"streetsidesoftware.code-spell-checker",
|
||||
"wesbos.theme-cobalt2"
|
||||
=======
|
||||
"rebornix.prolog"
|
||||
>>>>>>> lab7/main
|
||||
]
|
||||
}
|
||||
179
lab6/main.py
Normal file
179
lab6/main.py
Normal file
@ -0,0 +1,179 @@
|
||||
"""
|
||||
Code used to solve MountainCar-v0 gymnasium problem using Q-Learning algorithm
|
||||
"""
|
||||
from datetime import datetime
|
||||
import gymnasium as gym
|
||||
import numpy as np
|
||||
|
||||
# Helper function to discretize the state
|
||||
|
||||
|
||||
def discretize_state(state, env, first_time):
|
||||
# print(
|
||||
# f"state: {state}, state[0]: {state[0]}, env.observation_space.low: {env.observation_space.low}")
|
||||
# print(f"state[0] - env {state[0] - env.observation_space.low}")
|
||||
# print(f"state - env {state - env.observation_space.low}")
|
||||
if first_time:
|
||||
substract_from_state = state[0] - env.observation_space.low
|
||||
else:
|
||||
substract_from_state = state - env.observation_space.low
|
||||
discretized_state = (
|
||||
substract_from_state) * np.array([10, 100])
|
||||
discretized_state = np.round(discretized_state, 0).astype(int)
|
||||
return discretized_state
|
||||
|
||||
|
||||
def initialize_environment(hyperparameters):
|
||||
"""
|
||||
Initialize environment and video recording
|
||||
"""
|
||||
# Initialize environment
|
||||
env = gym.make('MountainCar-v0', render_mode='rgb_array')
|
||||
# Save video
|
||||
now = datetime.now()
|
||||
time_string = now.strftime("%H:%M:%S")
|
||||
env = gym.wrappers.RecordVideo(
|
||||
env,
|
||||
video_folder='vid',
|
||||
disable_logger=True,
|
||||
name_prefix=time_string, episode_trigger=lambda x: x > 600 and x % 2 == 0)
|
||||
return env
|
||||
|
||||
|
||||
def initialize_q_table(env):
|
||||
"""
|
||||
Initialize "empty" Q-table
|
||||
"""
|
||||
# Initialize Q-table
|
||||
# n_actions = env.action_space.n # Number of possible actions, should be 3
|
||||
# 0 accelerate left
|
||||
# 1 dont accelerate
|
||||
# 2 accelerate to the right
|
||||
# q_table = np.zeros((n_actions,))
|
||||
num_states = (env.observation_space.high -
|
||||
env.observation_space.low) * np.array([10, 100])
|
||||
num_states = np.round(num_states, 0).astype(int) + 1
|
||||
q_table = np.zeros((num_states[0], num_states[1], env.action_space.n))
|
||||
return q_table
|
||||
|
||||
|
||||
def initialize_hyperparameters():
|
||||
"""
|
||||
Initialize hyperparameters used by algorithm
|
||||
"""
|
||||
hyperparameters = {
|
||||
"learning_rate": 0.1,
|
||||
"discount_factor": 0.99,
|
||||
"epsilon": 0.2,
|
||||
"max_episodes": 1000,
|
||||
"max_steps": 500,
|
||||
"min_max_car_position": [-1.2, 0.6],
|
||||
"min_max_car_velocity": [-0.07, 0.07],
|
||||
"goal_x": 0.5,
|
||||
"truncation": 200
|
||||
}
|
||||
return hyperparameters
|
||||
|
||||
|
||||
def choose_action(hyperparameters, env, q_table, discretized_state):
|
||||
"""
|
||||
Choose one of 3 actions possible for the algorithm
|
||||
"""
|
||||
# hyperparameters["epsilon"]-greedy exploration-exploitation tradeoff
|
||||
if np.random.uniform(0, 1) < hyperparameters["epsilon"]:
|
||||
action = env.action_space.sample() # Choose a random action
|
||||
else:
|
||||
# Choose the action with the highest Q-value
|
||||
action = np.argmax(q_table[discretized_state[0], discretized_state[1]])
|
||||
return action
|
||||
|
||||
|
||||
def update_q_table(q_table, action, hyperparameters, reward):
|
||||
"""
|
||||
Update q_table with newest reward
|
||||
"""
|
||||
# Q-table update
|
||||
q_value = q_table[action]
|
||||
max_q_value = np.max(q_table)
|
||||
new_q_value = (1 - hyperparameters["learning_rate"]) * q_value + \
|
||||
hyperparameters["learning_rate"] * \
|
||||
(reward + hyperparameters["discount_factor"] * max_q_value)
|
||||
q_table[action] = new_q_value
|
||||
return q_table
|
||||
|
||||
|
||||
def movement(hyperparameters, env, q_table, discretized_state, total_reward=0, episode_number=0):
|
||||
"""
|
||||
Choose action and observe consequences
|
||||
"""
|
||||
action = choose_action(hyperparameters, env, q_table, discretized_state)
|
||||
# Take the action and observe the next state
|
||||
next_state, reward, terminated, truncated, _ = env.step(action)
|
||||
discretized_next_state = discretize_state(next_state, env, False)
|
||||
# print(discretized_next_state[0], discretized_next_state[1])
|
||||
q_table[discretized_state[0], discretized_state[1], action] += hyperparameters["learning_rate"] * (reward + hyperparameters["discount_factor"] * np.max(
|
||||
q_table[discretized_next_state[0], discretized_next_state[1]]) - q_table[discretized_state[0], discretized_state[1], action])
|
||||
|
||||
total_reward += reward
|
||||
discretized_state = discretized_next_state
|
||||
done = terminated or truncated
|
||||
if terminated:
|
||||
print("Destination reached on episode: ", episode_number)
|
||||
return hyperparameters, env, q_table, done, discretized_state, total_reward
|
||||
|
||||
|
||||
def episode_step(env, hyperparameters, q_table, episode_rewards, episode_number):
|
||||
"""
|
||||
Actions done with every episode
|
||||
"""
|
||||
state = env.reset() # Reset the environment to an initial state
|
||||
discretized_state = discretize_state(state, env, True)
|
||||
done = False # Boolean to indicate episode completion
|
||||
total_reward = 0 # Accumulate rewards for the episode
|
||||
|
||||
for step in range(hyperparameters["max_steps"]):
|
||||
hyperparameters, env, q_table, done, discretized_state, total_reward = movement(
|
||||
hyperparameters, env, q_table, discretized_state, total_reward, episode_number)
|
||||
if done:
|
||||
break
|
||||
|
||||
episode_rewards.append(total_reward)
|
||||
return env, hyperparameters, q_table, episode_rewards
|
||||
|
||||
|
||||
def training_loop(hyperparameters, env, q_table):
|
||||
"""
|
||||
Actual training for MountainCar
|
||||
"""
|
||||
episode_rewards = [] # List to store episode rewards
|
||||
|
||||
for episode_number in range(hyperparameters["max_episodes"]):
|
||||
env, hyperparameters, q_table, episode_rewards = episode_step(
|
||||
env, hyperparameters, q_table, episode_rewards, episode_number)
|
||||
|
||||
return env, q_table
|
||||
|
||||
|
||||
def inference(env, q_table):
|
||||
"""
|
||||
Inference using the updated Q-table
|
||||
"""
|
||||
env.reset()
|
||||
done = False
|
||||
|
||||
while not done:
|
||||
# Choose the action with the highest Q-value
|
||||
action = np.argmax(q_table)
|
||||
# Take the action and observe the next state
|
||||
_, _, terminated, truncated, _ = env.step(action)
|
||||
done = terminated or truncated
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
HYPERPARAMETERS = initialize_hyperparameters()
|
||||
ENV = initialize_environment(HYPERPARAMETERS)
|
||||
Q_TABLE = initialize_q_table(ENV)
|
||||
ENV, Q_TABLE = training_loop(HYPERPARAMETERS, ENV, Q_TABLE)
|
||||
inference(ENV, Q_TABLE)
|
||||
|
||||
ENV.close()
|
||||
0
poetry.lock → lab6/poetry.lock
generated
0
poetry.lock → lab6/poetry.lock
generated
40
lab7/code/main.pl
Normal file
40
lab7/code/main.pl
Normal file
@ -0,0 +1,40 @@
|
||||
% month_days(Month, DaysInMonth, DaysBeforeMonth)
|
||||
% returns the number of days in Month and the number of days it takes to reach that date in year 2023
|
||||
% prolog works by defining facts and rules and when queried about them returns values
|
||||
% it works different from functional programming in this aspect since it
|
||||
month_days('01', 31, 0).
|
||||
month_days('02', 28, 31).
|
||||
month_days('03', 31, 59).
|
||||
month_days('04', 30, 90).
|
||||
month_days('05', 31, 120).
|
||||
month_days('06', 30, 151).
|
||||
month_days('07', 31, 181).
|
||||
month_days('08', 31, 212).
|
||||
month_days('09', 30, 243).
|
||||
month_days('10', 31, 273).
|
||||
month_days('11', 30, 304).
|
||||
month_days('12', 31, 334).
|
||||
|
||||
% day_of_year(Date, DayOfYear) converts a date to day of year number later used to calculate interval
|
||||
% It also checks if number of days given by user is smaller or equal to number of days in the given month and if no then it gives fail
|
||||
day_of_year(Date, DayOfYear) :-
|
||||
atom_chars(Date, Chars),
|
||||
append(DayChars, MonthChars, Chars),
|
||||
atom_chars(Day, DayChars),
|
||||
atom_chars(Month, MonthChars),
|
||||
month_days(Month, DaysInMonth, MonthDays),
|
||||
atom_number(Day, DayNumber),
|
||||
((DayNumber =< DaysInMonth, DayNumber > 0) -> DayOfYear is MonthDays + DayNumber) ; fail.
|
||||
|
||||
% interval(Date1, Date2) prints the number of days between Date1 and Date2
|
||||
% We always expect date to be in format ddmm where 'd' stands for day and 'm' stands for month
|
||||
% if the month or day is just a single digit we expecte there to be zero in front
|
||||
% (like 0505 for 5th of may or 1105 for 11th of may or 0511 for 5th of november)
|
||||
% write(Interval) prints out result, nl writes newline to make output conform to project requirements as much as possible
|
||||
interval(Date1, Date2) :-
|
||||
(day_of_year(Date1, DayOfYear1), day_of_year(Date2, DayOfYear2) ->
|
||||
Interval is abs(DayOfYear2 - DayOfYear1),
|
||||
write(Interval), nl
|
||||
;
|
||||
write('Invalid input.'), nl, fail
|
||||
).
|
||||
BIN
lab7/labMaterials/EARIN Lab 7.pdf
Normal file
BIN
lab7/labMaterials/EARIN Lab 7.pdf
Normal file
Binary file not shown.
BIN
lab7/report/EARIN_LAB_7_KLISZKO_RUDNICKI.pdf
Normal file
BIN
lab7/report/EARIN_LAB_7_KLISZKO_RUDNICKI.pdf
Normal file
Binary file not shown.
183
lab7/report/EARIN_LAB_7_KLISZKO_RUDNICKI.tex
Normal file
183
lab7/report/EARIN_LAB_7_KLISZKO_RUDNICKI.tex
Normal file
@ -0,0 +1,183 @@
|
||||
\documentclass{article}[12pt]
|
||||
\usepackage{listings}
|
||||
\title{EARIN Lab 7 Report}
|
||||
\author{Krzysztof Rudnicki, 307585 \and Jakub Kliszko, 303866}
|
||||
\date{\today}
|
||||
|
||||
\lstset{
|
||||
frameround=fttt,
|
||||
language=Prolog,
|
||||
breaklines=true,
|
||||
keywordstyle=\bfseries,
|
||||
basicstyle=\ttfamily
|
||||
}
|
||||
|
||||
\begin{document}
|
||||
\maketitle
|
||||
\section{Exercicse Variant 2}
|
||||
Our task was to write Prolog program which returns number of days between two dates \\
|
||||
Assumptions:
|
||||
\begin{itemize}
|
||||
\item Year is 2023
|
||||
\item Number of days is $\leq$ 365
|
||||
\end{itemize}
|
||||
|
||||
Exemplary use:
|
||||
\begin{lstlisting}
|
||||
?- interval("2205", "0506")
|
||||
14
|
||||
|
||||
?- interval ("0102", "1102")
|
||||
10
|
||||
\end{lstlisting}
|
||||
Additional assumptions we made based on exemplary use is that we always receive date in "ddmm" format so first we receive 'd' (days) and then 'm' (month) \\
|
||||
If either days or month is a single digit we put '0' in front of the digit to force the string to be 4 characters wide \\
|
||||
Examples: \\
|
||||
1st of January is represented by "0101" \\
|
||||
2nd of November is represented by "0211" \\
|
||||
15th of January is represented by "1501" \\
|
||||
Another assumption we made is that interval will be always positive, order of dates does not matter so those queries:
|
||||
\begin{lstlisting}
|
||||
?- interval("2205", "0506")
|
||||
14
|
||||
|
||||
?- interval ("0506", "2205")
|
||||
14
|
||||
\end{lstlisting}
|
||||
Will give the exact same results. \\
|
||||
Last assumption is that we do not count first date as whole day so:
|
||||
\begin{lstlisting}
|
||||
?- interval("0101", "0101")
|
||||
0
|
||||
|
||||
?- interval("0101", "3112")
|
||||
364
|
||||
\end{lstlisting}
|
||||
|
||||
\section{Program}
|
||||
Our program successfully performs its task and returns correct number of days based on our assumptions. \\
|
||||
Program fails and prints out a message "Invalid input" if:
|
||||
\begin{itemize}
|
||||
\item The number of days in the month is too big (for example \lstinline{interval("3205", "0506")})
|
||||
\item Month does not exist (for example \lstinline{interval("0113", "0506")})
|
||||
\item Input does not make sense (for example \lstinline{interval("xxyy", "0506")})
|
||||
\item Day of month is smaller or equal to "00" (for example \lstinline{interval("0011", "0506")})
|
||||
\end{itemize}
|
||||
|
||||
Program does NOT return error and tries to return correct output (thanks to Prolog magic) for example when given incomplete input like:
|
||||
\begin{lstlisting}
|
||||
?- interval("106", "0506").
|
||||
4
|
||||
\end{lstlisting}
|
||||
|
||||
\subsection{Prolog magic}
|
||||
Prolog is based on the idea of logic programming. It does not have the concept of functions, it rather operates on predicates and goals. A predicate (a fact or a rule) defines a state of the world and a goal tells Prolog to make that state of the world come true, if possible (in other case it fails).
|
||||
|
||||
For example, we can query our program with such command and it will return \emph{true}, because it is a valid state:
|
||||
\begin{lstlisting}
|
||||
?- month_days('09', 30, 243).
|
||||
true
|
||||
\end{lstlisting}
|
||||
The following query will fail, because the predicate is not valid for these values:
|
||||
\begin{lstlisting}
|
||||
?- month_days('10', 28, 123).
|
||||
false
|
||||
\end{lstlisting}
|
||||
We can however provide Prolog with an unbound values. It will try to find a possible solution and assign them. This is called unification:
|
||||
\begin{lstlisting}
|
||||
?- month_days('09', DaysInMonth, Days).
|
||||
Days = 243,
|
||||
DaysInMonth = 30
|
||||
\end{lstlisting}
|
||||
If there are multiple possible valid solutions, Prolog will remember them and backtrack to them in case it fails later. User can also ask for another solution if its available by pressing \emph{;}, for instance:
|
||||
\begin{lstlisting}
|
||||
?- month_days(N, 30, X).
|
||||
N = '04',
|
||||
X = 90 ;
|
||||
N = '06',
|
||||
X = 151 ;
|
||||
N = '09',
|
||||
X = 243 ;
|
||||
N = '11',
|
||||
X = 304
|
||||
\end{lstlisting}
|
||||
|
||||
\subsection{Modules}
|
||||
The program consists of three main modules \\
|
||||
\begin{enumerate}
|
||||
%\item \lstinline{month_days} which specifies what month string corresponds to what number of days and number of days it takes to reach this month
|
||||
%\item \lstinline{day_of_year} which coverts date to number of days in the year used for later calculations, it also checks for erroneous input
|
||||
%\item \lstinline{interval} which actually calculates the interval between two dates and returns it
|
||||
\item \lstinline{month_days} which defines facts about the months -- how many days each month consists of and how many days it takes to reach this month
|
||||
\item \lstinline{day_of_year} which tells how many days have passed since the beginning of the year to a given date
|
||||
\item \lstinline{interval} which actually calculates the interval between two dates and writes it
|
||||
\end{enumerate}
|
||||
|
||||
\subsection{Tested examples}
|
||||
\paragraph{Negative}
|
||||
\begin{lstlisting}
|
||||
% Wrong number of days in month
|
||||
?- interval("3205", "0506")
|
||||
Invalid input.
|
||||
false.
|
||||
|
||||
% non existing month
|
||||
?- interval("0113", "0506").
|
||||
Invalid input.
|
||||
false.
|
||||
|
||||
% erroneous input
|
||||
?- interval("xxyy", "0506").
|
||||
Invalid input.
|
||||
false.
|
||||
|
||||
% Number of days equal or smaller than 00
|
||||
?- interval("0011", "0506").
|
||||
Invalid input.
|
||||
false.
|
||||
|
||||
?- interval("-1011", "0506").
|
||||
Invalid input.
|
||||
false.
|
||||
\end{lstlisting}
|
||||
|
||||
\paragraph{Positive}
|
||||
\begin{lstlisting}
|
||||
% Examples from project requirements
|
||||
?- interval("2205", "0506").
|
||||
14
|
||||
true.
|
||||
|
||||
?- interval("0102", "1102").
|
||||
10
|
||||
true.
|
||||
|
||||
% Edge cases, first day of year and last day of year
|
||||
?- interval("0101", "3112").
|
||||
364
|
||||
true.
|
||||
|
||||
% Edge case, the same date in both inputs
|
||||
?- interval("0101", "0101").
|
||||
0
|
||||
true.
|
||||
|
||||
% Edge case, input without first digit of day
|
||||
?- interval("2205", "506").
|
||||
14
|
||||
true.
|
||||
\end{lstlisting}
|
||||
|
||||
\subsection{Differences}
|
||||
In exemplary use there is no \emph{dot} '.' after query which was necessary for some interpreters (swipl) to actually run the query.\\
|
||||
Also in exemplary use there is no 'true' statement after number of days which Prolog by default displays after successful queries; we decided to not forcefully change it as it would contradict regular Prolog use philosophy.
|
||||
|
||||
\section{Challenges}
|
||||
The biggest challenge was understanding Prolog logic programming and its difference from functional programming paradigm.\\
|
||||
Then it was about finding correct directive for handling characters of date, we decided on \lstinline{atom_chars} and \lstinline{atom_numbers}.\\
|
||||
Last difficult part was writing the output exactly as in examples using \break\lstinline{write(Interval)} and \lstinline{nl}.
|
||||
|
||||
|
||||
|
||||
|
||||
\end{document}
|
||||
Loading…
Reference in New Issue
Block a user