diff --git a/Programming/PORR/.gitattributes b/Programming/PORR/.gitattributes new file mode 100644 index 00000000..e42fe68c --- /dev/null +++ b/Programming/PORR/.gitattributes @@ -0,0 +1,3 @@ +code/spd_5000.npz filter=lfs diff=lfs merge=lfs -text +code/poli3_1.npz filter=lfs diff=lfs merge=lfs -text +code/poli3_2.npz filter=lfs diff=lfs merge=lfs -text diff --git a/Programming/PORR/.github/workflows/check-large-files.yml b/Programming/PORR/.github/workflows/check-large-files.yml new file mode 100644 index 00000000..73f6cdb7 --- /dev/null +++ b/Programming/PORR/.github/workflows/check-large-files.yml @@ -0,0 +1,21 @@ +name: Check for Large Files + +on: push + +jobs: + check-large-files: + runs-on: ubuntu-latest + steps: + - name: Checkout Code + uses: actions/checkout@v3 + + - name: Check File Sizes + run: | + max_size=$((2 * 1024 * 1024 * 1024)) + for file in $(git ls-tree -r HEAD --name-only); do + size=$(stat --printf="%s" "$file") + if [ "$size" -gt "$max_size" ]; then + echo "Error: $file exceeds 2GB." + exit 1 + fi + done diff --git a/Programming/PORR/.gitignore b/Programming/PORR/.gitignore new file mode 100644 index 00000000..0624ab8c --- /dev/null +++ b/Programming/PORR/.gitignore @@ -0,0 +1,409 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally + + +## Core latex/pdflatex auxiliary files: +*.aux +*.lof +*.log +*.lot +*.fls +*.out +*.toc +*.fmt +*.fot +*.cb +*.cb2 +.*.lb + +## Intermediate documents: +*.dvi +*.xdv +*-converted-to.* +# these rules might exclude image files for figures etc. +# *.ps +# *.eps +# *.pdf + +## Generated if empty string is given at "Please type another file name for output:" +.pdf + +## Bibliography auxiliary files (bibtex/biblatex/biber): +*.bbl +*.bcf +*.blg +*-blx.aux +*-blx.bib +*.run.xml + +## Build tool auxiliary files: +*.fdb_latexmk +*.synctex +*.synctex(busy) +*.synctex.gz +*.synctex.gz(busy) +*.pdfsync +*.rubbercache +rubber.cache + +## Build tool directories for auxiliary files +# latexrun +latex.out/ + +## Auxiliary and intermediate files from other packages: +# algorithms +*.alg +*.loa + +# achemso +acs-*.bib + +# amsthm +*.thm + +# beamer +*.nav +*.pre +*.snm +*.vrb + +# changes +*.soc + +# comment +*.cut + +# cprotect +*.cpt + +# elsarticle (documentclass of Elsevier journals) +*.spl + +# endnotes +*.ent + +# fixme +*.lox + +# feynmf/feynmp +*.mf +*.mp +*.t[1-9] +*.t[1-9][0-9] +*.tfm + +#(r)(e)ledmac/(r)(e)ledpar +*.end +*.?end +*.[1-9] +*.[1-9][0-9] +*.[1-9][0-9][0-9] +*.[1-9]R +*.[1-9][0-9]R +*.[1-9][0-9][0-9]R +*.eledsec[1-9] +*.eledsec[1-9]R +*.eledsec[1-9][0-9] +*.eledsec[1-9][0-9]R +*.eledsec[1-9][0-9][0-9] +*.eledsec[1-9][0-9][0-9]R + +# glossaries +*.acn +*.acr +*.glg +*.glo +*.gls +*.glsdefs +*.lzo +*.lzs +*.slg +*.slo +*.sls + +# uncomment this for glossaries-extra (will ignore makeindex's style files!) +# *.ist + +# gnuplot +*.gnuplot +*.table + +# gnuplottex +*-gnuplottex-* + +# gregoriotex +*.gaux +*.glog +*.gtex + +# htlatex +*.4ct +*.4tc +*.idv +*.lg +*.trc +*.xref + +# hypdoc +*.hd + +# hyperref +*.brf + +# knitr +*-concordance.tex +# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files +# *.tikz +*-tikzDictionary + +# listings +*.lol + +# luatexja-ruby +*.ltjruby + +# makeidx +*.idx +*.ilg +*.ind + +# minitoc +*.maf +*.mlf +*.mlt +*.mtc[0-9]* +*.slf[0-9]* +*.slt[0-9]* +*.stc[0-9]* + +# minted +_minted* +*.pyg + +# morewrites +*.mw + +# newpax +*.newpax + +# nomencl +*.nlg +*.nlo +*.nls + +# pax +*.pax + +# pdfpcnotes +*.pdfpc + +# sagetex +*.sagetex.sage +*.sagetex.py +*.sagetex.scmd + +# scrwfile +*.wrt + +# svg +svg-inkscape/ + +# sympy +*.sout +*.sympy +sympy-plots-for-*.tex/ + +# pdfcomment +*.upa +*.upb + +# pythontex +*.pytxcode +pythontex-files-*/ + +# tcolorbox +*.listing + +# thmtools +*.loe + +# TikZ & PGF +*.dpth +*.md5 +*.auxlock + +# titletoc +*.ptc + +# todonotes +*.tdo + +# vhistory +*.hst +*.ver + +# easy-todo +*.lod + +# xcolor +*.xcp + +# xmpincl +*.xmpi + +# xindy +*.xdy + +# xypic precompiled matrices and outlines +*.xyc +*.xyd + +# endfloat +*.ttt +*.fff + +# Latexian +TSWLatexianTemp* + +## Editors: +# WinEdt +*.bak +*.sav + +# Texpad +.texpadtmp + +# LyX +*.lyx~ + +# Kile +*.backup + +# gummi +.*.swp + +# KBibTeX +*~[0-9]* + +# TeXnicCenter +*.tps + +# auto folder when using emacs and auctex +./auto/* +*.el + +# expex forward references with \gathertags +*-tags.tex + +# standalone packages +*.sta + +# Makeindex log files +*.lpz + +# xwatermark package +*.xwm + +# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib +# option is specified. Footnotes are the stored in a file with suffix Notes.bib. +# Uncomment the next line to have this generated file ignored. +#*Notes.bib +spd_10000.npz +nemeth12.npz +poli3.npz \ No newline at end of file diff --git a/Programming/PORR/.vscode/extensions.json b/Programming/PORR/.vscode/extensions.json new file mode 100644 index 00000000..ee0aa0aa --- /dev/null +++ b/Programming/PORR/.vscode/extensions.json @@ -0,0 +1,14 @@ +{ + "recommendations": [ + "github.vscode-github-actions", + "yzhang.markdown-all-in-one", + "ms-python.autopep8", + "nwgh.bandit", + "ms-python.black-formatter", + "usernamehw.errorlens", + "ms-python.debugpy", + "ms-python.vscode-pylance", + "james-yu.latex-workshop", + "github.copilot-chat" + ] +} \ No newline at end of file diff --git a/Programming/PORR/README.md b/Programming/PORR/README.md new file mode 100644 index 00000000..a8a0487e --- /dev/null +++ b/Programming/PORR/README.md @@ -0,0 +1,23 @@ +cd code + +pyenv init + +COPY AND PASTE PYENV CONFIGURATION TO YOUR SHELL CONFIG + +source ~/.zshrc (or your shell) + +pyenv install 3.11 + +pyenv global 3.11 + +Ensure that python DID change its version + +python --version + +python -m venv ./venv + +source ./venv/bin/activate + +pip install -r requirements.txt + +python main.py diff --git a/Programming/PORR/code/README.md b/Programming/PORR/code/README.md new file mode 100644 index 00000000..0da000d9 --- /dev/null +++ b/Programming/PORR/code/README.md @@ -0,0 +1,25 @@ +### Download matrices +## poli3.npz matrice +https://wutwaw-my.sharepoint.com/:u:/g/personal/01156786_pw_edu_pl/EZQv_ttJO1lAgiV8HOutZQQBmGGjXm5fhLmkOcvEPHqeDg?e=XygcyO +## nemeth12.npz matrice +https://wutwaw-my.sharepoint.com/:u:/g/personal/01156786_pw_edu_pl/EeCGZePTxDdOhV91lJw6viQBhOXtxTlWNoj85lyZCoA26w?e=AcOVcK +## spd_10000.npz matrice +https://wutwaw-my.sharepoint.com/:u:/g/personal/01156786_pw_edu_pl/EYTVCameBPFIk61AyG9-dwYBgdpt2PuyMyU9Unbdtu3Xww?e=pk7PLx + +Those link will expire on 24-02-2024 + +Put the matrice in code folder + +### Create environment + +`pyenv virtualenv porr` + +### Activate environment + +`pyenv activate porr` + +### Install requirements + +`pip install -r requirements.txt` + + diff --git a/Programming/PORR/code/linear_algebra_utils.py b/Programming/PORR/code/linear_algebra_utils.py new file mode 100644 index 00000000..5448545b --- /dev/null +++ b/Programming/PORR/code/linear_algebra_utils.py @@ -0,0 +1,422 @@ +import cmath +import math +import itertools +import operator +import multiprocessing +from multiprocessing import Pool +from abc import ABC, abstractmethod +from concurrent.futures import ThreadPoolExecutor +from functools import partial +from time_measurement import time_measurement, time_accumulator +import numpy as np +import dask.array as da + +class LinearAlgebraUtils: + @staticmethod + @abstractmethod + def dot_product(v1, v2): + pass + + @staticmethod + @abstractmethod + def matrix_vector_multiply(A, x): + pass + + @staticmethod + @abstractmethod + def vector_norm(v): + pass + + @staticmethod + @abstractmethod + def vector_scalar_divide(x, scalar): + pass + + @staticmethod + @abstractmethod + def matrix_scalar_multiply(A, w): + pass + + @staticmethod + @abstractmethod + def vector_vector_subtraction(v1, v2): + pass + + @staticmethod + @abstractmethod + def vector_vector_addition(v1, v2): + pass + + @staticmethod + @abstractmethod + def scalar_vector_multiply(omega, vector): + pass + + @staticmethod + @abstractmethod + def matrix_norm(A): + pass + + @staticmethod + @abstractmethod + def matrix_matrix_subtraction(A, B): + pass + + +class SequentialLinearAlgebraUtils: + @staticmethod + def dot_product(v1, v2): + return sum(x*y for x, y in zip(v1, v2)) + + @staticmethod + def matrix_vector_multiply(A, x): + return [SequentialLinearAlgebraUtils.dot_product(row, x) for row in A] + + @staticmethod + def vector_norm(v): + x_values = (x*x for x in v) + x_values_sum = sum(x_values) + return cmath.sqrt(x_values_sum).real + + @staticmethod + def vector_scalar_divide(x, scalar): + return [xi / scalar for xi in x] + + @staticmethod + def matrix_scalar_multiply(A, w): + return A * w + + @staticmethod + def vector_vector_subtraction(v1, v2): + return [x-y for x, y in zip(v1, v2)] + + @staticmethod + def vector_vector_addition(v1, v2): + return [x+y for x, y in zip(v1, v2)] + + @staticmethod + def scalar_vector_multiply(omega, vector): + return [omega * x for x in vector] + + @staticmethod + def matrix_norm(A): + return math.sqrt(sum(sum(element ** 2 for element in row) for row in A)) + + @staticmethod + def matrix_matrix_subtraction(A, B): + return [[A[i][j] - B[i][j] for j in range(len(A[0]))] for i in range(len(A))] + + +class ThreadsLinearAlgebraUtils: + NUM_THREADS = multiprocessing.cpu_count() + + @staticmethod + def get_chunk_size(data): + num_elements = len(data) + num_threads = min(ThreadsLinearAlgebraUtils.NUM_THREADS, num_elements) + chunk_size = num_elements // num_threads + remainder = num_elements % num_threads + return chunk_size, num_threads, remainder + + @staticmethod + def divide_vectors_to_chunks(v1, v2): + chunk_size, num_threads, remainder = ThreadsLinearAlgebraUtils.get_chunk_size(v1) + + chunks = [] + start = 0 + for i in range(num_threads): + end = start + chunk_size + (1 if i < remainder else 0) + chunks.append((v1[start:end], v2[start:end])) + start = end + + return chunks + + @staticmethod + def divide_vector_or_matrix_to_chunks(v): + chunk_size, num_threads, remainder = ThreadsLinearAlgebraUtils.get_chunk_size(v) + + chunks = [] + start = 0 + for i in range(num_threads): + end = start + chunk_size + (1 if i < remainder else 0) + chunks.append(v[start:end]) + start = end + + return chunks + + @staticmethod + @time_measurement(time_accumulator) + def matrix_vector_multiply(A, x): + chunks = ThreadsLinearAlgebraUtils.divide_vector_or_matrix_to_chunks(A) + with ThreadPoolExecutor(max_workers=ThreadsLinearAlgebraUtils.NUM_THREADS) as executor: + func = partial(SequentialLinearAlgebraUtils.matrix_vector_multiply, x=x) + results = executor.map(func, chunks) + return [item for sublist in results for item in sublist] + + @staticmethod + @time_measurement(time_accumulator) + def vector_norm(v): + chunks = ThreadsLinearAlgebraUtils.divide_vector_or_matrix_to_chunks(v) + + def partial_norm(chunk): + return sum(x * x for x in chunk) + + with ThreadPoolExecutor(max_workers=ThreadsLinearAlgebraUtils.NUM_THREADS) as executor: + results = executor.map(partial_norm, chunks) + total_sum = sum(results) + return total_sum**0.5 + + @staticmethod + @time_measurement(time_accumulator) + def vector_vector_subtraction(v1, v2): + chunks = ThreadsLinearAlgebraUtils.divide_vectors_to_chunks(v1, v2) + with ThreadPoolExecutor(max_workers=ThreadsLinearAlgebraUtils.NUM_THREADS) as executor: + results = executor.map(lambda pair: SequentialLinearAlgebraUtils.vector_vector_subtraction(*pair), chunks) + return [item for sublist in results for item in sublist] + + + @staticmethod + @time_measurement(time_accumulator) + def vector_vector_addition(v1, v2): + chunks = ThreadsLinearAlgebraUtils.divide_vectors_to_chunks(v1, v2) + with ThreadPoolExecutor(max_workers=ThreadsLinearAlgebraUtils.NUM_THREADS) as executor: + results = executor.map(lambda pair: SequentialLinearAlgebraUtils.vector_vector_addition(*pair), chunks) + return [item for sublist in results for item in sublist] + + @staticmethod + @time_measurement(time_accumulator) + def scalar_vector_multiply(omega, vector): + chunks = ThreadsLinearAlgebraUtils.divide_vector_or_matrix_to_chunks(vector) + with ThreadPoolExecutor(max_workers=ThreadsLinearAlgebraUtils.NUM_THREADS) as executor: + results = executor.map(lambda chunk: SequentialLinearAlgebraUtils.scalar_vector_multiply(omega, chunk), chunks) + + return [item for sublist in results for item in sublist] + + +@time_measurement(time_accumulator) +def process_row(params): + A, k, i = params + factor = A[i][k] / A[k][k] + return [A[i][j] - factor * A[k][j] for j in range(len(A[0]))] + +@time_measurement(time_accumulator) +def divide_by_scalar(pair): + xi, scalar = pair + return xi / scalar + +@time_measurement(time_accumulator) +def multiply_by_scalar(pair): + element, scalar = pair + return element * scalar + + +class ProcessLinearAlgebraUtils: + @staticmethod + @time_measurement(time_accumulator) + def dot_product(v1, v2): + with Pool() as pool: + result = pool.starmap(ProcessLinearAlgebraUtils.multiply_elements, zip(v1, v2)) + return sum(result) + + @staticmethod + @time_measurement(time_accumulator) + def multiply_elements(x, y): + return x * y + + @staticmethod + @time_measurement(time_accumulator) + def matrix_vector_multiply_row(params): + row, vector = params + return SequentialLinearAlgebraUtils.dot_product(row, vector) + + @staticmethod + @time_measurement(time_accumulator) + def matrix_vector_multiply(A, x): + with Pool() as pool: + result = pool.map(ProcessLinearAlgebraUtils.matrix_vector_multiply_row, [(row, x) for row in A]) + return list(result) + + @staticmethod + @time_measurement(time_accumulator) + def vector_norm(v): + with Pool() as pool: + squared = pool.map(ProcessLinearAlgebraUtils.square, v) + return math.sqrt(sum(squared)) + + @staticmethod + @time_measurement(time_accumulator) + def square(x): + return x * x + + @staticmethod + @time_measurement(time_accumulator) + def vector_scalar_divide(x, scalar): + with Pool() as pool: + result = pool.map(divide_by_scalar, [(xi, scalar) for xi in x]) + return list(result) + + @staticmethod + @time_measurement(time_accumulator) + def divide_vector_by_scalar(x, scalar): + with Pool() as pool: + result = pool.map(ProcessLinearAlgebraUtils.vector_scalar_divide, [(xi, scalar) for xi in x]) + return list(result) + + @staticmethod + @time_measurement(time_accumulator) + def matrix_scalar_multiply_row(params): + row, w = params + return [w * element for element in row] + + @staticmethod + @time_measurement(time_accumulator) + def matrix_scalar_multiply(A, w): + with Pool() as pool: + result = pool.map(ProcessLinearAlgebraUtils.matrix_scalar_multiply_row, [(row, w) for row in A]) + return result + + @staticmethod + @time_measurement(time_accumulator) + def vector_vector_operation(params): + v1, v2, op = params + return op(v1, v2) + + @staticmethod + @time_measurement(time_accumulator) + def vector_vector_subtraction(v1, v2): + with Pool() as pool: + result = pool.map(ProcessLinearAlgebraUtils.vector_vector_operation, zip(v1, v2, itertools.repeat(operator.sub))) + return list(result) + + @staticmethod + @time_measurement(time_accumulator) + def vector_vector_addition(v1, v2): + with Pool() as pool: + result = pool.map(ProcessLinearAlgebraUtils.vector_vector_operation, zip(v1, v2, itertools.repeat(operator.add))) + return list(result) + + @staticmethod + @time_measurement(time_accumulator) + def scalar_vector_multiply(omega, vector): + with Pool() as pool: + result = pool.map(multiply_by_scalar, [(element, omega) for element in vector]) + return list(result) + + @staticmethod + @time_measurement(time_accumulator) + def sum_of_squares(row): + return sum(x ** 2 for x in row) + + @staticmethod + @time_measurement(time_accumulator) + def matrix_norm(A): + with Pool() as pool: + row_sums = pool.map(ProcessLinearAlgebraUtils.sum_of_squares, A) + return math.sqrt(sum(row_sums)) + + @staticmethod + @time_measurement(time_accumulator) + def subtract_rows(row_from_A, row_from_B): + return [a - b for a, b in zip(row_from_A, row_from_B)] + + @staticmethod + @time_measurement(time_accumulator) + def matrix_matrix_subtraction(A, B): + with Pool() as pool: + result = pool.starmap(ProcessLinearAlgebraUtils.subtract_rows, zip(A, B)) + return result + +class DistributedArraysLinearAlgebraUtils(ABC): + @staticmethod + @time_measurement(time_accumulator) + def dot_product(v1, v2): + dv1 = da.from_array(v1, chunks='auto') + dv2 = da.from_array(v2, chunks='auto') + return da.dot(dv1, dv2).compute() + + @staticmethod + @time_measurement(time_accumulator) + def matrix_vector_multiply(A, x): + dA = da.from_array(A, chunks='auto') + dx = da.from_array(x, chunks='auto') + return da.dot(dA, dx).compute().tolist() + + @staticmethod + @time_measurement(time_accumulator) + def vector_norm(v): + dv = da.from_array(v, chunks='auto') + return da.linalg.norm(dv).compute() + + @staticmethod + @time_measurement(time_accumulator) + def vector_scalar_divide(x, scalar): + dx = da.from_array(x, chunks='auto') + return (dx / scalar).compute().tolist() + + @staticmethod + @time_measurement(time_accumulator) + def matrix_scalar_multiply(A, w): + dA = da.from_array(A, chunks='auto') + return (dA * w).compute().tolist() + + @staticmethod + @time_measurement(time_accumulator) + def vector_vector_subtraction(v1, v2): + dv1 = da.from_array(v1, chunks='auto') + dv2 = da.from_array(v2, chunks='auto') + return (dv1 - dv2).compute().tolist() + + @staticmethod + @time_measurement(time_accumulator) + def vector_vector_addition(v1, v2): + dv1 = da.from_array(v1, chunks='auto') + dv2 = da.from_array(v2, chunks='auto') + return (dv1 + dv2).compute().tolist() + + @staticmethod + @time_measurement(time_accumulator) + def scalar_vector_multiply(omega, vector): + dvector = da.from_array(vector, chunks='auto') + return (omega * dvector).compute().tolist() + + @staticmethod + @time_measurement(time_accumulator) + def matrix_norm(A): + dA = da.from_array(A, chunks='auto') + return da.linalg.norm(dA).compute() + + @staticmethod + @time_measurement(time_accumulator) + def matrix_matrix_subtraction(A, B): + dA = da.from_array(A, chunks='auto') + dB = da.from_array(B, chunks='auto') + return (dA - dB).compute().tolist() + + @staticmethod + @time_measurement(time_accumulator) + def gaussian_elimination(A, b): + try: + dA = da.from_array(A, chunks='auto') + db = da.from_array(b, chunks='auto') + Ab = da.hstack([dA, db[:, None]]) + Ab = Ab.persist() + + def elimination_step(Ab, k): + n = Ab.shape[0] + max_index = da.argmax(da.abs(Ab[k:, k])) + k + Ab[[k, max_index]] = Ab[[max_index, k]] + Ab = Ab.persist() + factor = Ab[k + 1:, k] / Ab[k, k] + Ab[k + 1:] -= factor[:, None] * Ab[k] + return Ab + + for k in range(A.shape[0]): + Ab = elimination_step(Ab, k) + + x = da.zeros(A.shape[0]) + for i in range(A.shape[0] - 1, -1, -1): + x[i] = (Ab[i, -1] - da.dot(Ab[i, i + 1:-1], x[i + 1:])) / Ab[i, i] + return x.compute().tolist() + except Exception as e: + print(f"Error during Gaussian elimination: {e}") + return None diff --git a/Programming/PORR/code/main.py b/Programming/PORR/code/main.py new file mode 100644 index 00000000..b468d54c --- /dev/null +++ b/Programming/PORR/code/main.py @@ -0,0 +1,5 @@ +import pytest + +if __name__ == "__main__": + # Run pytest and exit with the appropriate status code + pytest.main(["-v", "tests.py"]) diff --git a/Programming/PORR/code/matrix_generator.py b/Programming/PORR/code/matrix_generator.py new file mode 100644 index 00000000..024403a3 --- /dev/null +++ b/Programming/PORR/code/matrix_generator.py @@ -0,0 +1,86 @@ +import numpy as np +import scipy.io +import os + +class MatrixGenerator: + @staticmethod + def generate_spd_matrix(n: int) -> np.ndarray: + A = np.random.rand(n, n) + spd_matrix = np.dot(A, A.T) + n * MatrixGenerator.generate_identity_matrix(n) # Adding n*I ensures positive definiteness + return spd_matrix + + @staticmethod + def generate_identity_matrix(size): + return np.eye(size) + + @staticmethod + def generate_alternating_vector(size): + return np.tile([1, 2], int(np.ceil(size / 2)))[:size] + + @staticmethod + def get_matrix_from_file(file_path, problem): + mat_contents = scipy.io.loadmat(file_path) + problem_record = mat_contents['Problem'][0][0] + A = problem_record[problem] + if scipy.sparse.issparse(A): + A_dense = A.todense() + else: + A_dense = A + return np.array(A_dense) + + @staticmethod + def generate_matrix_and_vector(type, size=None): + if type == 'spd': + if size is None: + raise ValueError("Size must be provided for SPD matrix generation.") + try: + matrix, vector, lambda_min, lambda_max = MatrixGenerator.load_from_file("spd_"+str(size)+".npz") + except FileNotFoundError as e: + matrix = MatrixGenerator.generate_spd_matrix(size) + vector = np.random.uniform(-1, 1, size) + lambda_min, lambda_max = MatrixGenerator.calculate_eigenvalues(matrix) + MatrixGenerator.save_to_file(matrix, vector, lambda_min, lambda_max, "spd_"+str(size)+".npz") + elif type == 'nemeth12': + try: + matrix, vector, lambda_min, lambda_max = MatrixGenerator.load_from_file("nemeth12.npz") + except FileNotFoundError as e: + matrix = -1 * MatrixGenerator.get_matrix_from_file("nemeth12.mat", 1) + size = matrix.shape[0] + vector = MatrixGenerator.generate_alternating_vector(size) + lambda_min, lambda_max = MatrixGenerator.calculate_eigenvalues(matrix) + MatrixGenerator.save_to_file(matrix, vector, lambda_min, lambda_max, "nemeth12.npz") + elif type == 'poli3': + try: + matrix, vector, lambda_min, lambda_max = MatrixGenerator.load_from_file("poli3.npz") + except FileNotFoundError as e: + matrix = MatrixGenerator.get_matrix_from_file("poli3.mat", 2) + size = matrix.shape[0] + vector = MatrixGenerator.generate_alternating_vector(size) + lambda_min, lambda_max = MatrixGenerator.calculate_eigenvalues(matrix) + MatrixGenerator.save_to_file(matrix, vector, lambda_min, lambda_max, "poli3.npz") + else: + raise ValueError("Invalid type specified. Choose 'spd', 'nemeth12', or 'poli3'.") + + return matrix, vector, lambda_min, lambda_max + + @staticmethod + def calculate_eigenvalues(A): + eigenvalues = np.linalg.eigvals(A) + lambda_min = np.min(eigenvalues) + lambda_max = np.max(eigenvalues) + return lambda_min, lambda_max + + @staticmethod + def save_to_file(matrix, vector, lambda_min, lambda_max, file_path): + np.savez(file_path, matrix=matrix, vector=vector, lambda_min=lambda_min, lambda_max=lambda_max) + + def load_from_file(file_path): + if not os.path.exists(file_path): + raise FileNotFoundError(f"The file {file_path} does not exist.") + data = np.load(file_path) + matrix = data['matrix'] + vector = data['vector'] + lambda_min = data['lambda_min'] + lambda_max = data['lambda_max'] + return matrix, vector, lambda_min, lambda_max + diff --git a/Programming/PORR/code/nemeth12.mat b/Programming/PORR/code/nemeth12.mat new file mode 100644 index 00000000..59b31b2a Binary files /dev/null and b/Programming/PORR/code/nemeth12.mat differ diff --git a/Programming/PORR/code/poli3.mat b/Programming/PORR/code/poli3.mat new file mode 100644 index 00000000..24e4abc2 Binary files /dev/null and b/Programming/PORR/code/poli3.mat differ diff --git a/Programming/PORR/code/processing_type.py b/Programming/PORR/code/processing_type.py new file mode 100644 index 00000000..8a5e622b --- /dev/null +++ b/Programming/PORR/code/processing_type.py @@ -0,0 +1,7 @@ +from enum import Enum, auto + +class ProcessingType(Enum): + SEQUENTIAL = auto() + THREADS = auto() + PROCESSES = auto() + DISTRIBUTED_ARRAYS = auto() diff --git a/Programming/PORR/code/requirements.txt b/Programming/PORR/code/requirements.txt new file mode 100644 index 00000000..72bf1c03 --- /dev/null +++ b/Programming/PORR/code/requirements.txt @@ -0,0 +1,4 @@ +scipy +pytest +dask +numba \ No newline at end of file diff --git a/Programming/PORR/code/richardson_method.py b/Programming/PORR/code/richardson_method.py new file mode 100644 index 00000000..53053514 --- /dev/null +++ b/Programming/PORR/code/richardson_method.py @@ -0,0 +1,84 @@ +import linear_algebra_utils as linAlg +from matrix_generator import MatrixGenerator +from processing_type import ProcessingType +from time_measurement import time_measurement, time_accumulator, tests_time +import time +import gc +import numpy as np + +class RichardsonMethod: + @time_measurement(time_accumulator) + def __init__(self, method: ProcessingType, A, b, lambda_min, lambda_max, max_iterations, size: int, x0=None, tol=1e-5): + self.LinAlg = self.assign_LinAlgType(method) + self.A = A + self.b = b + self.x0 = x0 if x0 is not None else [0.0] * len(b) + self.max_iterations = max_iterations + self.tol = tol + # self.I = MatrixGenerator.generate_identity_matrix(size) + self.lambda_min = lambda_min + self.lambda_max = lambda_max + if self.lambda_min < 0: + raise ValueError("Matrix A is not positive semi-definite.") + self.omega = RichardsonMethod.calculate_omega(self.lambda_min, self.lambda_max) + + @staticmethod + def calculate_omega(lambda_min, lambda_max): + return 2 / (lambda_min + lambda_max) + + @staticmethod + def convergence_norm(LinAlgType, A, omega, I) -> bool: + wA = LinAlgType.matrix_scalar_multiply(A, omega) + IMinuswA = LinAlgType.matrix_matrix_subtraction(I, wA) + norm = LinAlgType.matrix_norm(IMinuswA) + return norm + + @staticmethod + def assign_LinAlgType(method): + methods = { + ProcessingType.SEQUENTIAL: linAlg.SequentialLinearAlgebraUtils, + ProcessingType.THREADS: linAlg.ThreadsLinearAlgebraUtils, + ProcessingType.PROCESSES: linAlg.ProcessLinearAlgebraUtils, + ProcessingType.DISTRIBUTED_ARRAYS: linAlg.DistributedArraysLinearAlgebraUtils + } + + try: + return methods[method] + except KeyError: + raise ValueError("Unknown method, please use 'SEQUENTIAL', 'THREADS' or 'PROCESSES'.") + + def solve(self): + gc.disable() + time_accumulator.total_time = 0 + start = time.perf_counter() + x = self.x0[:] + # if RichardsonMethod.convergence_norm(self.LinAlg, self.A, self.omega, self.I) >= 1: + # return RichardsonMethod.convergence_norm(self.LinAlg, self.A, self.omega, self.I), "Richardson method for those values will NOT converge", + + for iteration in range(self.max_iterations): + Ax = self.LinAlg.matrix_vector_multiply(self.A, x) + residual = self.LinAlg.vector_vector_subtraction(self.b, Ax) + x = self.LinAlg.vector_vector_addition(x, self.LinAlg.scalar_vector_multiply(self.omega, residual)) + if (linAlg.SequentialLinearAlgebraUtils.vector_norm(residual) < self.tol): + break + + end = time.perf_counter() + total_time = end - start + gc.enable() + + match self.LinAlg: + case linAlg.SequentialLinearAlgebraUtils: + print(f"Total: {total_time:.3e}s, Tests time: {tests_time.total_time:.3e}s") + case linAlg.ThreadsLinearAlgebraUtils: + sequential_time = total_time - time_accumulator.total_time + print(f"Total: {total_time:.3e}s, Seq: {sequential_time:.3e}s, Parallel (threads): {time_accumulator.total_time:.3e}s, Tests time: {tests_time.total_time:.3e}s") + case linAlg.ProcessLinearAlgebraUtils: + sequential_time = total_time - time_accumulator.total_time + print(f"Total: {total_time:.3e}s, Seq: {sequential_time:.3e}s, Parallel (processes): {time_accumulator.total_time:.3e}s, Tests time: {tests_time.total_time:.3e}s") + case linAlg.DistributedArraysLinearAlgebraUtils: + sequential_time = total_time - time_accumulator.total_time + print(f"Total: {total_time:.3e}s, Seq: {sequential_time:.3e}s, Parallel (distributed arrays): {time_accumulator.total_time:.3e}s, Tests time: {tests_time.total_time:.3e}s") + case _: + print("Unhandled LinAlg type") + + return x, 0 diff --git a/Programming/PORR/code/richardson_mpi.py b/Programming/PORR/code/richardson_mpi.py new file mode 100644 index 00000000..fe0e29ca --- /dev/null +++ b/Programming/PORR/code/richardson_mpi.py @@ -0,0 +1,147 @@ +from mpi4py import MPI +import numpy as np +import time + +def richardson_parallel(A, b, lambda_min, lambda_max, tol=1e-5, max_iter=10000): + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + size = comm.Get_size() + + # Rozmiar macierzy A + n = A.shape[0] + + # Obliczanie wartości własnych tylko na jednym procesie + if rank == 0: + # eigenvalues = np.linalg.eigvals(A) + # lambda_min = np.min(eigenvalues) + # lambda_max = np.max(eigenvalues) + omega = 2 / (lambda_min + lambda_max) + else: + omega = None + + # Rozgłoszenie omega do wszystkich procesów + omega = comm.bcast(omega, root=0) + + # Inicjalizacja wektora rozwiązania jako float64 + x = np.zeros_like(b, dtype=np.float64) + + # Dzielimy pracę między procesy + local_rows = n // size + start_row = rank * local_rows + end_row = start_row + local_rows if rank != size - 1 else n + + # Przydzielenie lokalnych porcji A i b + local_A = A[start_row:end_row, :] + local_b = b[start_row:end_row] + + # Lokalny wektor residuum + local_r = np.zeros_like(local_b, dtype=np.float64) + + # Globalny wektor residuum (pełny rozmiar b) + global_r = np.zeros_like(b, dtype=np.float64) + + start_time = time.time() + + for i in range(max_iter): + # Oblicz lokalny residuum r = b - A @ x + local_r[:] = local_b - np.dot(local_A, x) + + # Tworzymy tymczasowy wektor o pełnym rozmiarze i kopiujemy lokalne dane + temp_r = np.zeros_like(b, dtype=np.float64) + temp_r[start_row:end_row] = local_r + + # Sumujemy lokalne residuum przez wszystkie procesy + comm.Allreduce(temp_r, global_r, op=MPI.SUM) + + # Aktualizujemy x równolegle na wszystkich procesach + x += omega * global_r + + # Sprawdzamy warunek stopu (norma residuum) + if np.linalg.norm(global_r) < tol: + break + + end_time = time.time() + + execution_time = end_time - start_time + + return x, execution_time + + + +def check_solution(A, b, x_approx, tolerance=8e-3): + x_true = np.linalg.solve(A, b) + error = np.linalg.norm(x_true - x_approx) + return error < tolerance, error + +if __name__ == "__main__": + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + + from matrix_generator import MatrixGenerator + sizes = [2, 5, 10, 50, 80, 100, 300, 500, 750, 1000, 5000, 10000] + for i in sizes: + if rank == 0: + A, b, lambda_min, lambda_max = MatrixGenerator.generate_matrix_and_vector('spd', size=i) + else: + A = None + b = None + lambda_min = None + lambda_max = None + + A = comm.bcast(A, root=0) + b = comm.bcast(b, root=0) + + # Rozwiązanie przy użyciu zrównoleglonej metody Richardsona + x, time_taken = richardson_parallel(A, b, lambda_min, lambda_max) + + # Sprawdzanie poprawności rozwiązania (na procesie 0) + if rank == 0: + print(f"Spd matrix with size {i}") + is_correct, error = check_solution(A, b, x) + print("Czas wykonania [s]:", time_taken) + print("Czy rozwiązanie jest poprawne:", "Tak" if is_correct else "Nie") + print("Błąd rozwiązania:", error) + + if rank == 0: + A, b, lambda_min, lambda_max = MatrixGenerator.generate_matrix_and_vector('nemeth12') + else: + A = None + b = None + lambda_min = None + lambda_max = None + + A = comm.bcast(A, root=0) + b = comm.bcast(b, root=0) + + # Rozwiązanie przy użyciu zrównoleglonej metody Richardsona + x, time_taken = richardson_parallel(A, b, lambda_min, lambda_max) + + # Sprawdzanie poprawności rozwiązania (na procesie 0) + if rank == 0: + print(f"Nemeth12 matrix") + is_correct, error = check_solution(A, b, x) + print("Czas wykonania [s]:", time_taken) + print("Czy rozwiązanie jest poprawne:", "Tak" if is_correct else "Nie") + print("Błąd rozwiązania:", error) + + if rank == 0: + A, b, lambda_min, lambda_max = MatrixGenerator.generate_matrix_and_vector('poli3') + else: + A = None + b = None + lambda_min = None + lambda_max = None + + A = comm.bcast(A, root=0) + b = comm.bcast(b, root=0) + + # Rozwiązanie przy użyciu zrównoleglonej metody Richardsona + x, time_taken = richardson_parallel(A, b, lambda_min, lambda_max) + + # Sprawdzanie poprawności rozwiązania (na procesie 0) + if rank == 0: + print(f"Poli3 matrix") + is_correct, error = check_solution(A, b, x) + print("Czas wykonania [s]:", time_taken) + print("Czy rozwiązanie jest poprawne:", "Tak" if is_correct else "Nie") + print("Błąd rozwiązania:", error) diff --git a/Programming/PORR/code/spd_10.npz b/Programming/PORR/code/spd_10.npz new file mode 100644 index 00000000..b4cf129f Binary files /dev/null and b/Programming/PORR/code/spd_10.npz differ diff --git a/Programming/PORR/code/spd_100.npz b/Programming/PORR/code/spd_100.npz new file mode 100644 index 00000000..be506590 Binary files /dev/null and b/Programming/PORR/code/spd_100.npz differ diff --git a/Programming/PORR/code/spd_1000.npz b/Programming/PORR/code/spd_1000.npz new file mode 100644 index 00000000..ce545978 Binary files /dev/null and b/Programming/PORR/code/spd_1000.npz differ diff --git a/Programming/PORR/code/spd_2.npz b/Programming/PORR/code/spd_2.npz new file mode 100644 index 00000000..46b84f3a Binary files /dev/null and b/Programming/PORR/code/spd_2.npz differ diff --git a/Programming/PORR/code/spd_300.npz b/Programming/PORR/code/spd_300.npz new file mode 100644 index 00000000..09a0ad94 Binary files /dev/null and b/Programming/PORR/code/spd_300.npz differ diff --git a/Programming/PORR/code/spd_5.npz b/Programming/PORR/code/spd_5.npz new file mode 100644 index 00000000..135931eb Binary files /dev/null and b/Programming/PORR/code/spd_5.npz differ diff --git a/Programming/PORR/code/spd_50.npz b/Programming/PORR/code/spd_50.npz new file mode 100644 index 00000000..64142118 Binary files /dev/null and b/Programming/PORR/code/spd_50.npz differ diff --git a/Programming/PORR/code/spd_500.npz b/Programming/PORR/code/spd_500.npz new file mode 100644 index 00000000..80bd85d4 Binary files /dev/null and b/Programming/PORR/code/spd_500.npz differ diff --git a/Programming/PORR/code/spd_5000.npz b/Programming/PORR/code/spd_5000.npz new file mode 100644 index 00000000..11b44646 --- /dev/null +++ b/Programming/PORR/code/spd_5000.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa92da25acfeafeb204ee81e2feafd04b39f3a86fad2765f538ab95e4581d96 +size 200041030 diff --git a/Programming/PORR/code/spd_750.npz b/Programming/PORR/code/spd_750.npz new file mode 100644 index 00000000..c38d39a9 Binary files /dev/null and b/Programming/PORR/code/spd_750.npz differ diff --git a/Programming/PORR/code/tests.py b/Programming/PORR/code/tests.py new file mode 100644 index 00000000..1bc1ba20 --- /dev/null +++ b/Programming/PORR/code/tests.py @@ -0,0 +1,105 @@ +import pytest +import numpy as np +from matrix_generator import MatrixGenerator +from richardson_method import RichardsonMethod +from threads_indep import RichardsonMethodThreads +from processing_type import ProcessingType +from time_measurement import time_measurement, tests_time + +def calculate_norm_numpy(I, w, A): + difference = I - w * A + norm = np.linalg.norm(difference) + return norm + +def calculate_eigenvalues(A): + eigenvalues = np.linalg.eigvals(A) + lambda_min = np.min(eigenvalues) + lambda_max = np.max(eigenvalues) + return lambda_min, lambda_max + +def calcualte_norm_from_matrix_numpy(A, n): + lambda_min, lambda_max = calculate_eigenvalues(A) + omega = 2 / (lambda_min + lambda_max) + I = np.eye(n) + return calculate_norm_numpy(I, omega, A) + +@time_measurement(tests_time) +def solution_lib(A, b): + return np.linalg.solve(A, b) + +@pytest.mark.parametrize("n", [ + 2, + 5, + 10, + 50, + 100, + 300, + 500, + 750, + 1000, + 5000, + 10000 + ]) +@pytest.mark.parametrize("processing_type", [ + ProcessingType.SEQUENTIAL, + ProcessingType.THREADS, + ProcessingType.PROCESSES, + ProcessingType.DISTRIBUTED_ARRAYS + ]) +@pytest.mark.parametrize("matrix_type", [ + "spd", + "nemeth12", + "poli3" + ]) +def test_richardson_vs_cg(n: int, processing_type: ProcessingType, matrix_type: str, capsys): + print("matrix type: ", matrix_type) + print("matrix size: ", n if matrix_type == "spd" else "fixed") + tolerance = 8e-3 + max_iterations=100 + if matrix_type in ["nemeth12", "poli3"] and n != 2: + pytest.skip("Fixed matrix size for nemeth12 and poli3, skipping redundant runs.") + + if matrix_type == "spd": + A, b, lambda_min, lambda_max = MatrixGenerator.generate_matrix_and_vector('spd', size=n) + elif matrix_type == "poli3": + A, b, lambda_min, lambda_max = MatrixGenerator.generate_matrix_and_vector('poli3') + elif matrix_type == "nemeth12": + A, b, lambda_min, lambda_max = MatrixGenerator.generate_matrix_and_vector('nemeth12') + else: + raise ValueError("Invalid matrix type specified. Choose 'spd', 'poli3', or 'nemeth12'.") + + solution_richardson, info_richardson = None, None + + if processing_type != ProcessingType.THREADS: + richardson_solver = RichardsonMethod(processing_type, A, b, lambda_min, lambda_max, max_iterations, size=A.shape[0], tol=1e-7) + with capsys.disabled(): + solution_richardson, info_richardson = richardson_solver.solve() + else: + with capsys.disabled(): + solution_richardson, info_richardson = RichardsonMethodThreads(A, b, lambda_min, lambda_max, max_iterations, tol=1e-7) + + # Przechwytywanie wyjścia po solve + captured = capsys.readouterr() + print("Captured output:", captured.out) + + solution = solution_lib(A,b) + + assert_converged(solution_richardson, info_richardson, solution, tolerance, A, n) + +def assert_converged(solution_richardson, info_richardson, solution, tolerance, A, n): + if info_richardson == "Richardson method for those values will NOT converge": + numpy_norm = calcualte_norm_from_matrix_numpy(A, n) + print("Numpy norm: ", numpy_norm, " Richardson norm: ", solution_richardson) + assert False, "Richardson did not converge" + else: + difference = np.linalg.norm(solution_richardson - solution) + print(f"Difference between Richardson and numpy solutions: {difference:.8f}") + if difference < tolerance: + print("Both Richardson and numpy method converged and calculated correct values.") + else: + print("Solution numpy:\n", solution) + print("Solution Richardson:\n", solution_richardson) + assert difference < tolerance, f"The solutions are different! Difference: {difference:.8f}" + +if __name__ == "__main__": + pytest.main() diff --git a/Programming/PORR/code/threads_indep.py b/Programming/PORR/code/threads_indep.py new file mode 100644 index 00000000..08103025 --- /dev/null +++ b/Programming/PORR/code/threads_indep.py @@ -0,0 +1,93 @@ +import gc +import time +import numpy as np +from numba import njit, prange +from time_measurement import time_measurement_longest, longest_threads_time_accumulator, tests_time +import linear_algebra_utils as linAlg + + +@njit(parallel=True) +def numba_matrix_vector_multiply(A, input_x, Ax): + for i in prange(len(A)): + acc = 0.0 + for j in range(len(input_x)): + acc += A[i][j] * input_x[j] + Ax[i] = acc + +@njit(parallel=True) +def numba_vector_vector_subtraction(b, Ax, residual): + for i in prange(len(b)): + residual[i] = b[i] - Ax[i] + +@njit(nopython=True) +def numba_scalar_vector_multiply(omega, vector, result): + omega_real = omega.real + for i in range(len(vector)): + result[i] = omega_real * vector[i] + +@njit(parallel=True) +def numba_vector_vector_addition(input_x, vector, output_x): + for i in prange(len(input_x)): + output_x[i] = input_x[i] + vector[i] + + +# Funkcje z dekoratorem +@time_measurement_longest(longest_threads_time_accumulator) +def matrix_vector_multiply(A, input_x, Ax): + numba_matrix_vector_multiply(A, input_x, Ax) + +@time_measurement_longest(longest_threads_time_accumulator) +def vector_vector_subtraction(b, Ax, residual): + numba_vector_vector_subtraction(b, Ax, residual) + +@time_measurement_longest(longest_threads_time_accumulator) +def scalar_vector_multiply(omega, vector, result): + numba_scalar_vector_multiply(omega, vector, result) + +@time_measurement_longest(longest_threads_time_accumulator) +def vector_vector_addition(input_x, vector, output_x): + numba_vector_vector_addition(input_x, vector, output_x) + +# Metoda Richardson z obsługą wątków +def RichardsonMethodThreads(A, b, lambda_min, lambda_max, max_iterations, x0=None, tol=1e-5): + longest_threads_time_accumulator.hard_reset() + + gc.disable() + start_time = time.perf_counter() + + x0 = x0 if x0 is not None else [0.0] * len(b) + x = x0[:] + + omega = 2 / (lambda_min + lambda_max) + n = len(b) + + for iteration in range(max_iterations): + Ax = [0.0] * n + matrix_vector_multiply(A, x, Ax) + longest_threads_time_accumulator.save_lap_and_reset() + + residual = [0.0] * n + vector_vector_subtraction(b, Ax, residual) + longest_threads_time_accumulator.save_lap_and_reset() + + change_vector = [0.0] * n + scalar_vector_multiply(omega, residual, change_vector) + longest_threads_time_accumulator.save_lap_and_reset() + + _x = [0.0] * n + vector_vector_addition(x, change_vector, _x) + longest_threads_time_accumulator.save_lap_and_reset() + + x = _x[:] + + if linAlg.SequentialLinearAlgebraUtils.vector_norm(residual) < tol: + break + + end_time = time.perf_counter() + gc.enable() + total_time = end_time - start_time + sequential_time = total_time - longest_threads_time_accumulator.total_time + + print(f"Total: {total_time:.3e}s, Seq: {sequential_time:.3e}s, Parallel (threads): {longest_threads_time_accumulator.total_time:.3e}s, Tests time: {tests_time.total_time:.3e}s") + + return x, 0 diff --git a/Programming/PORR/code/threads_indep_old.py b/Programming/PORR/code/threads_indep_old.py new file mode 100644 index 00000000..7c5ecc5e --- /dev/null +++ b/Programming/PORR/code/threads_indep_old.py @@ -0,0 +1,103 @@ +import multiprocessing +import gc +import time +from concurrent.futures import ThreadPoolExecutor +from time_measurement import time_measurement_longest, longest_threads_time_accumulator, tests_time +import linear_algebra_utils as linAlg + + +@time_measurement_longest(longest_threads_time_accumulator) +def matrix_vector_multiply(A, input_x, start, end, Ax): + Ax[start:end] = [sum(x*y for x, y in zip(row, input_x)) for row in A[start:end]] + +@time_measurement_longest(longest_threads_time_accumulator) +def vector_vector_subtraction(b, Ax, start, end, residual): + residual[start:end] = [x-y for x, y in zip(b[start:end], Ax[start:end])] + +@time_measurement_longest(longest_threads_time_accumulator) +def scalar_vector_multiply(omega, vector, start, end, result): + result[start:end] = [omega * x for x in vector[start:end]] + +@time_measurement_longest(longest_threads_time_accumulator) +def vector_vector_addition(input_x, vector, start, end, output_x): + output_x[start:end] = [x+y for x, y in zip(input_x[start:end], vector[start:end])] + +def RichardsonMethodThreads(A, b, lambda_min, lambda_max, max_iterations, x0=None, tol=1e-5): + longest_threads_time_accumulator.hard_reset() + + gc.disable() + start_time = time.perf_counter() + + n = len(b) + x0 = x0 if x0 is not None else [0.0] * len(b) + x = x0[:] + omega = 2 / (lambda_min + lambda_max) + num_threads = multiprocessing.cpu_count() + chunk_size = n // num_threads + + with ThreadPoolExecutor(max_workers=num_threads) as executor: # wątki są tworzone raz i nie są niszczone + for iteration in range(max_iterations): + + Ax = [0] * len(x) # tutaj zostanie przypisany wynik z mnożenia macierzy A z wektorem x + futures = [] + + for i in range(num_threads): + start = i * chunk_size + end = n if i == num_threads - 1 else (i + 1) * chunk_size + futures.append(executor.submit(matrix_vector_multiply, A, x, start, end, Ax)) + + for future in futures: + future.result() + + longest_threads_time_accumulator.save_lap_and_reset() + residual = [0] * len(b) # tutaj zostanie przypisany wynik z vector_vector_subtraction + futures = [] + + for i in range(num_threads): + start = i * chunk_size + end = n if i == num_threads - 1 else (i + 1) * chunk_size + futures.append(executor.submit(vector_vector_subtraction, b, Ax, start, end, residual)) + + for future in futures: + future.result() + + longest_threads_time_accumulator.save_lap_and_reset() + change_vector = [0] * len(residual) # zostanie tu przypisany wynik scalar_vector_multiply po pracy wątków + futures = [] + + for i in range(num_threads): + start = i * chunk_size + end = n if i == num_threads - 1 else (i + 1) * chunk_size + futures.append(executor.submit(scalar_vector_multiply, omega, residual, start, end, change_vector)) + + for future in futures: + future.result() + + longest_threads_time_accumulator.save_lap_and_reset() + _x = x[:] # do _x zostanie przez wątki przypisany wynik pracy w danej iteracji + futures = [] + + for i in range(num_threads): + start = i * chunk_size + end = n if i == num_threads - 1 else (i + 1) * chunk_size + futures.append(executor.submit(vector_vector_addition, x, change_vector, start, end, _x)) + + for future in futures: + future.result() + + longest_threads_time_accumulator.save_lap_and_reset() + x = _x[:] + + + if (linAlg.SequentialLinearAlgebraUtils.vector_norm(residual) < tol): + break + + + end_time = time.perf_counter() + gc.enable() + total_time = end_time - start_time + sequential_time = total_time - longest_threads_time_accumulator.total_time + + print(f"Total: {total_time:.3e}s, Seq: {sequential_time:.3e}s, Parallel (threads): {longest_threads_time_accumulator.total_time:.3e}s, Tests time: {tests_time.total_time:.3e}s") + + return x, 0 \ No newline at end of file diff --git a/Programming/PORR/code/threads_too_simple.py b/Programming/PORR/code/threads_too_simple.py new file mode 100644 index 00000000..d5ac2713 --- /dev/null +++ b/Programming/PORR/code/threads_too_simple.py @@ -0,0 +1,60 @@ +import numpy as np +import threading +import multiprocessing +import gc +import time +import sys +from time_measurement import time_measurement_longest, longest_time_accumulator, tests_time +import linear_algebra_utils as linAlg + + +@time_measurement_longest(longest_time_accumulator) +def RichardsonThread(A, b, x, _x, omega, start, end): + for i in range(start, end): + sigma = np.dot(A[i, :], _x) - A[i, i] * _x[i] + x[i] = (1 - omega) * _x[i] + omega * (b[i] - sigma) / A[i, i] + +def RichardsonMethodThreads(A, b, lambda_min, lambda_max, max_iterations, x0=None, tol=1e-5): + longest_time_accumulator.total_time = 0 + longest_time_accumulator.start = sys.float_info.max + longest_time_accumulator.end = 0 + + gc.disable() + start_time = time.perf_counter() + + n = len(b) + x0 = x0 if x0 is not None else [0.0] * len(b) + x = x0[:] + omega = 0.05#2 / (lambda_min + lambda_max) + num_threads = multiprocessing.cpu_count() + threads = [] + chunk_size = n // num_threads + max_iterations = 1000 + + for _ in range(max_iterations): + _x = x[:] + for i in range(num_threads): + start = i * chunk_size # start jest indeksem w A. Wątki otrzymują kolejny punkt startowy będący wielokrotnością rozmiaru porcji na wątek + end = n if i == num_threads - 1 else (i + 1) * chunk_size + thread = threading.Thread(target=RichardsonThread, args=(A, b, x, _x, omega, start, end)) + threads.append(thread) + thread.start() + + for thread in threads: + thread.join() + + # Ax = linAlg.SequentialLinearAlgebraUtils.matrix_vector_multiply(A, x) + # residual = linAlg.SequentialLinearAlgebraUtils.vector_vector_subtraction(b, Ax) + # if (linAlg.SequentialLinearAlgebraUtils.vector_norm(residual) < tol): + # break + + end_time = time.perf_counter() + gc.enable() + total_time = end_time - start_time + sequential_time = total_time - longest_time_accumulator.total_time + + print(f"Total: {total_time:.3e}s, Seq: {sequential_time:.3e}s, Parallel (threads): {longest_time_accumulator.total_time:.3e}s, Tests time: {tests_time.total_time:.3e}s") + + return x, 0 + + diff --git a/Programming/PORR/code/time_measurement.py b/Programming/PORR/code/time_measurement.py new file mode 100644 index 00000000..d70992a2 --- /dev/null +++ b/Programming/PORR/code/time_measurement.py @@ -0,0 +1,62 @@ +import time +import sys +from functools import wraps + +class TimeAccumulator: + def __init__(self): + self.total_time = 0 + +class ComplexTimeAcumulator: + def __init__(self): + self.hard_reset() + + def hard_reset(self): + self.total_time = 0 + self.reset() + + def reset(self): + self.lap_time = 0 + self.start = sys.float_info.max + self.end = 0 + + def save_lap_and_reset(self): + self.total_time += self.lap_time + self.reset() + + +time_accumulator = TimeAccumulator() +tests_time = TimeAccumulator() + +longest_threads_time_accumulator = ComplexTimeAcumulator() + +def time_measurement(accumulator: TimeAccumulator): + def decorator(func): + @wraps(func) + def inner(*args, **kwargs): + start = time.perf_counter() + result = func(*args, **kwargs) + end = time.perf_counter() + accumulator.total_time += end - start + return result + return inner + return decorator + +def time_measurement_longest(accumulator: ComplexTimeAcumulator): + def decorator(func): + @wraps(func) + def inner(*args, **kwargs): + start = time.perf_counter() + result = func(*args, **kwargs) + end = time.perf_counter() + if start < accumulator.start: + accumulator.start = start + if end > accumulator.end: + accumulator.end = end + accumulator.lap_time = accumulator.end - accumulator.start # "=" instead of "+=" + return result + return inner + return decorator + + + + diff --git a/Programming/PORR/push_files.sh b/Programming/PORR/push_files.sh new file mode 100755 index 00000000..8e162dc6 --- /dev/null +++ b/Programming/PORR/push_files.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Ensure the script exits on error +set -e + +# Check if we are inside a git repository +if ! git rev-parse --is-inside-work-tree &>/dev/null; then + echo "Not inside a Git repository." + exit 1 +fi + +# Loop until there are no untracked files +while true; do + # List untracked files and their sizes, sort by size + smallest_file=$(git ls-files --others --exclude-standard -z | xargs -0 du -b | sort -n | head -n 1 | awk '{print $2}') + + # Exit if no untracked files are found + if [ -z "$smallest_file" ]; then + echo "All untracked files have been processed." + break + fi + + # Add the smallest untracked file + git add "$smallest_file" + + # Commit the added file + git commit -m "Add $(basename "$smallest_file")" + + # Push the commit to the remote + git push + + echo "Processed: $smallest_file" +done diff --git a/Programming/PORR/report/first_part/PORR_GORKA_RUDNICKI_SOBALA_RAPORT.pdf b/Programming/PORR/report/first_part/PORR_GORKA_RUDNICKI_SOBALA_RAPORT.pdf new file mode 100644 index 00000000..53d73af8 Binary files /dev/null and b/Programming/PORR/report/first_part/PORR_GORKA_RUDNICKI_SOBALA_RAPORT.pdf differ diff --git a/Programming/PORR/report/first_part/main.pdf b/Programming/PORR/report/first_part/main.pdf new file mode 100644 index 00000000..1e5bc565 Binary files /dev/null and b/Programming/PORR/report/first_part/main.pdf differ diff --git a/Programming/PORR/report/first_part/main.tex b/Programming/PORR/report/first_part/main.tex new file mode 100644 index 00000000..443ca457 --- /dev/null +++ b/Programming/PORR/report/first_part/main.tex @@ -0,0 +1,370 @@ +\documentclass[12pt]{article} +\usepackage{pgfplots} +\usepgfplotslibrary{groupplots} +\pgfplotsset{compat=1.17} +\usepackage{float} +\usepackage{graphicx} +\usepackage[polish]{babel} +\usepackage{hyperref} +\hypersetup{ + colorlinks=true, + linkcolor=blue, + filecolor=magenta, + urlcolor=blue, + pdfpagemode=FullScreen, +} +\usepackage{listings} +\lstset{ + basicstyle=\ttfamily\small, + keywordstyle=\color{blue}\bfseries, + commentstyle=\color{green!60!black}, + stringstyle=\color{red}, + numbers=left, + numberstyle=\tiny, + stepnumber=1, + numbersep=5pt, + frame=lines, + breaklines=true, + captionpos=b +} + +\title{Rozwiązanie układu równań liniowych iteracyjną metodą Richardsona \\ + Sprawozdanie, Etap II} +\author{Kacper Górka, Krzysztof Rudnicki, Aleksandra Sobala} +\begin{document} +\maketitle +\section{Zadanie} +\paragraph{Metoda Richardsona} +Metoda Richardsona służy do iteracyjnego rozwiązywania systemów równań liniowych postaci $Ax = b$. +\\ +Pojedyńcza iteracja wygląda następująco: +\[ + x^{(k+1)} = x^{(k)} + \omega (b - Ax^{(k)}) +\] +Gdzie $\omega$ to skalar wybrany tak by $x^{(k)}$ zbiegało +\paragraph{Wymagania} +Mieliśmy za zadanie stworzyć program rozwiązujący układ równań dla wygenerowanych +macierzy gęstych oraz dla macierzy rzadkich: \\ +\href{https://sparse.tamu.edu/Nemeth/nemeth12}{nemeth12} \\ +i \\ +\href{https://sparse.tamu.edu/Grund/poli3}{poli3} + +\section{Baza} +\paragraph{Generowanie i zapisywanie macierzy} +Macierze gęste są przez nas generowane przy użyciu biblioteki \textbf{numpy}, +aby przyśpieszyć obliczenia zapewniamy \textbf{bewzględna dominację wierszową głównej przekątnej} i +upewniamy się że wygenerowana macierz jest \textbf{symetryczna i dodatnio określona} \\ +Macierze są potem zapisywane do pliku w +formacie .npz, łącznie z ich wartościami własnymi, tak by +skrócić działanie programu i ujednolicić testy +\\ \null \\ +Macierze nemeth12 i poli3 są pobierane ze strony podanej wyżej, dla macierzy nemeth12 aby spełnić +warunki stosowalności metody musieliśmy przemnożyć ją przez -1 +\paragraph{Testy} +Do testów wykorzystujemy biblioteki \textbf{numpy} oraz \textbf{pytest} oraz wbudowane w Pythona +narzędzia do mierzenia czasu. \\ +Sprawdzamy popawność naszych algorytmów poprzez porównanie naszych wyników z wynikami policzonymi przy +wykorzystaniu funkcji np.linalg.norm z biblioteki numpy. Jeżeli nasze rozwiązanie różni się od +rozwiązania numpy o mniej niż $8 \times 10^{-3}$ akceptujemy je jako poprawne \\ +Zarówno wielkośc macierzy, jej typ i typ metody użytej do zrównoleglenia Richardsona jest podawana jako +parametr testów, pozwala nam to łatwo dodawać nowe metody zrównoleglenia bez zmiany kodu testów. +\paragraph{Funkcje pomocnicze} +Wszelkie podstawowe metody operacji na macierzach takie jak mnożenie wektorów, macierzy itp, +napisaliśmy od zera, bez użycia zewnętrznych bibliotek, funkcje są zdefiniowane w pliku +\textbf{linear\_algebra\_utils.py} +\paragraph{Metoda Richardsona} +Metoda Richardsona jest zaimplementowana w pliku \textbf{richardson\_method.py}, sprowadza się ona do +pętli: +\begin{lstlisting}[language=Python, basicstyle=\ttfamily\small, breaklines=true, caption=Python Code for Iterative Solver] + for iteration in range(self.max_iterations): + Ax = self.LinAlg.matrix_vector_multiply(self.A, x) + residual = self.LinAlg.vector_vector_subtraction(self.b, Ax) + x = self.LinAlg.vector_vector_addition( + x, + self.LinAlg.scalar_vector_multiply(self.omega, residual) + ) + if self.LinAlg.SequentialLinearAlgebraUtils.vector_norm(residual) < self.tol: + break +\end{lstlisting} +Dla różnych metod zrównoleglenia stosujemy różne implementacje podstawowych funkcji odpowiedzialnych za +mnożenie macierzy przez wektor, odejmowanie wektorów itp. Ponownie, dzięki temu możemy łatwo dodawać nowe +metody zrównoleglenia bez zmiany podstawowego kodu Richardsona + + +\section{Zrównoleglenie} +Wykorzystaliśmy 3 metody zrównoleglenia: +\begin{enumerate} + \item Tablice rozproszone + \item Wątki + \item Procesy +\end{enumerate} +\subsection{Procesy} +Aby wykonać obliczenia na wielu rdzeniach procesora \textbf{jednocześnie} wykorzystujemy model w którym różne +frakcje danych są przetwarzane przez oddzielne procesy. Dzięki temu dla dużych zbiorów danych możemy +znacznie zwiększyć wydajnośc obliczeń \\ +W tym celu wykorzystujemy klasę \textbf{multiprocessing.Pool} z biblioteki \textbf{multiprocessing}. +Wykorzystujemy ją do stworzenia puli procesów które potem niezależnie wykonują funkcje na różnych frakcjach +danych. +\paragraph{Funkcje} +Procesy wykorzystujemy do: +\begin{enumerate} + \item Obliczenia iloczynu skalarnego - Metoda $dot\_product$ wykorzystuje pulę procesów do obliczenia iloczynów par elementów dwóch wektorów, a następnie sumuje te wyniki. Zrównoleglenie tej operacji jest korzystne, gdy mamy do czynienia z bardzo długimi wektorami. + \item Mnożenia macierzy przez wektor - $matrix\_vector\_multiply$, każdy wiersz macierzy jest mnożony przez wektor w osobnym procesie. Dzięki temu każde takie mnożenie może być przeprowadzane równolegle, co jest szczególnie efektywne dla macierzy o dużym rozmiarze. + \item Obliczenia normy wektora - Procesy są używane do obliczenia kwadratów poszczególnych elementów wektora, a następnie sumowanie tych wartości (także w procesach) umożliwia obliczenie pierwiastka kwadratowego z ich sumy, co daje normę wektora. + \item Działania na wektorach i macierzach - Działania takie jak dodawanie i odejmowanie wektorów, dzielenie wektora przez skalar, czy mnożenie macierzy przez skalar, są przeprowadzane w segmentach, gdzie każdy segment jest przetwarzany przez osobny proces. +\end{enumerate} +\paragraph{Wyzwania} +\begin{itemize} + \item Zarządzanie procesami jest kosztowne - tworzenie i zarządzanie procesami jest droższe od wątków ze względu na większy narzut systemowy. + \item Wymiana danych między procesami - wymaga serializacji i deserializacji danych, co może wprowadzić dodatkowe opóźnienia. + \item Brak korzyści dla małych danych - w przypadku małych macierzy, gdzie rozmiar nie przekracza 5 tysięcy x 5 tysięcy elementów, zarządzanie procesami i koszty komunikacji międzyprocesowej mogą przewyższać korzyści wynikające z równoległego przetwarzania, +\end{itemize} + +\subsection{Wątki} +Do implementacji wątków użyto dwóch bibliotek, ThreadPoolExecutor która umożliwia zarządzanie pulą wątków i delegowanie zadań do wątków +w sposób równoległy oraz funkcji partial z biblioteki functools która pozwala na tworzenie częściowo zainicjalizowanych funkcji. +\paragraph{Funkcje} +Wątki zaimplementowano w mnożeniu macierzy przeez wektor, odejmowaniu wektorów, dodawaniu wektorów oraz mnożeniu wektora przez skalar, metody zostają +zrównoleglone poprzez podzielenie liczby wierszy macierzy między wątki, następnie ThreadPoolExecutor tworzy wątki i przekazuje im odpowiednie, niezależne +części pracy do wykonania. +\paragraph{Zalety} +Zalety wykorzystania wątków to przede wszystkim szybki czas tworzenia i niszczenia wątków przez system operacyjny w porównaniu do procesów. +Co więcej mają one dostęp do całej przestrzeni adresowej programu, co oszczędza niepotrzebne kopiowanie danych. Jedynie ich własny stos jest prywatny. + +\paragraph{ChatGPT} +Po konsultacjach z prowadzącym użyliśmy chata-gpta do wygenerowania kodu dla wątków, rozwiązanie chata po kilku (ludzkich) poprawkach zadziałało ale jego dokładność była niesatysfakcjonująca, +nie spełniała wymogów naszych testów + +\subsection{Tablice rozproszone} +Tablice rozproszone dzielą macierz i przypisują każdą z jej części do konkretnego procesora. +Procesory wykonują obliczenia na danych przechowywanych w ich lokalnej pamięci, co minimalizuje konieczność przesyłania danych pomiędzy węzłami. \\ +Tablice rozporoszone nie są natywnie wspierane przez Python-a, w związku z tym zostały zaimplementowane przy użyciu modułu array z biblioteki \textbf{dask}. \\ +Wszystkie podstawowe funkcje wykorzystywane w Richardsonie zostały zrównoleglone przy użyciu tablic rozproszonych. + +\paragraph{Wyzwania} +\begin{itemize} +\item Przy dużej zależności danych dochodzi do częstej komunikacji która obniża wydajnośc +\item Elementy tablicy należy dobrze zbalansować aby procesory były równo obciążone +\end{itemize} + +\subsection{MPI} +Metoda zrównoleglenia przy użyciu MPI (Message Passing Interface) zrównolegla obliczenia na wielu procesorach +\subsubsection{Podział pracy} +\paragraph{Rozdzielenie macierzy i wektora} +Macierz A i wektor b są dzielone na części, które każdy procesor przetwarza indywidualnie. Każdy proces obsługuje określony zakres wierszy macierzy A i odpowiadającą im część wektora b. +\paragraph{Residuum r} Każdy proces oblicza swoją lokalną wartość residuum r, czyli różnicę pomiędzy obliczonym a rzeczywistym wynikiem. + +\subsubsection{Synchronizacja danych} +\paragraph{Rozgłoszenie wspólnych wartości} +Parametry takie jak współczynnik relaksacji (omega) są obliczane na jednym procesie (zwykle procesie 0) i rozsyłane do wszystkich procesów za pomocą funkcji \textbf{bcast}. + +\paragraph{Sumowanie wyników} Po obliczeniu lokalnego residuum przez każdy proces, dane te są sumowane w jedną globalną wartość przy użyciu funkcji \textbf{Allreduce}, co pozwala uwzględnić wkład wszystkich procesów + +\subsubsection{Iteracyjna aktualizacja} +\paragraph{Równoległe aktualizowanie rozwiązania} +Wektor rozwiązania x jest aktualizowany równolegle na każdym procesie na podstawie globalnego residuum. Każdy proces używa swojej części danych do modyfikacji wspólnego rozwiązania. + +\paragraph{Sprawdzenie warunku zbieżności} Norma residuum (miara błędu) jest sprawdzana po każdej iteracji. Gdy osiągnie tolerancję, algorytm przerywa dalsze obliczenia. + + +\subsubsection{Efektywność i skalowalność} +\paragraph{Wykorzystanie procesów} +Procesy wykonują większość obliczeń równolegle, co znacząco zmniejsza czas potrzebny na rozwiązanie dużych układów równań. + +\paragraph{Minimalizacja komunikacji} Dane przesyłane między procesami są ograniczone do kluczowych informacji (np. residuum, parametry), co redukuje narzut związany z komunikacją. + + +\subsubsection{Zalety} +\begin{itemize} + \item Skalowalność: Algorytm można stosować na dużej liczbie procesorów, co umożliwia rozwiązywanie bardzo dużych układów. + \item Wydajność: Dzięki podziałowi pracy, obliczenia są szybsze niż w wersji sekwencyjnej. + \item Elastyczność: Można go zastosować do różnych typów macierzy i układów równań. +\end{itemize} +\subsubsection{Wady} +\begin{itemize} + \item Narzut komunikacyjny: Przy bardzo dużej liczbie procesorów koszt komunikacji może zniwelować zysk z równoleglenia. + \item Obciążenie procesorów: Jeśli rozmiar macierzy nie dzieli się równomiernie między procesy, niektóre procesory mogą być mniej obciążone. +\end{itemize} + + +\subsection{Wyniki} +\begin{table}[H] + \centering + \begin{tabular}{|l|l|l|l|l|l|} + \hline + \textbf{Wielkość/Typ} & \textbf{Sekwencyjnie [s]} & \textbf{Procesy [s]} & \textbf{Wątki [s]} & \textbf{Tablice [s]} & \textbf{MPI [s]} \\ \hline + 2 & 7.784e-05 & 2.896e+00 & 9.772e-03 & 8.817e-02 & 1.737e-02 \\ \hline + 5 & 1.746e-04 & 3.897e+00 & 1.960e-02 & 9.443e-02 & 5.670e-04 \\ \hline + 10 & 6.769e-04 & 7.073e+00 & 2.895e-02 & 1.674e-01 & 1.766e-02 \\ \hline + 50 & 2.735e-02 & 2.153e+01 & 1.059e-01 & 4.899e-01 & 1.808e-02 \\ \hline + 100 & 1.195e-01 & 2.167e+01 & 2.067e-01 & 6.921e-01 & 1.946e-02 \\ \hline + 300 & 7.863e-01 & 2.363e+01 & 9.558e-01 & 7.461e-01 & 4.492e-02 \\ \hline + 500 & 2.206e+00 & 2.657e+01 & 2.494e+00 & 8.521e-01 & 9.526e-02 \\ \hline + 750 & 4.785e+00 & 2.939e+01 & 5.520e+00 & 9.408e-01 & 2.832e-01 \\ \hline + 1000 & 8.689e+00 & 3.259e+01 & 9.672e+00 & 1.201e+00 & 5.430e-01 \\ \hline + 5000 & 2.170e+02 & 9.077e+01 & 2.402e+02 & 1.368e+01 & 7.480e+01 \\ \hline + 10000 & 8.615e+02 & 2.378e+02 & 9.705e+02 & 4.643e+01 & 4.046e+02 \\ \hline + nemeth12 & 3.630e+02 & 1.105e+02 & 3.863e+02 & 2.133e+01 & 4.427e+01 \\ \hline + poli3 & 1.291e+03 & 1.187e+03 & 1.363e+03 & 7.029e+01 & \textit{N/A} \\ \hline + \end{tabular} + \caption{Wyniki dla różnych rozmiarów i zrównolegleń (sekwencyjne, procesy, wątki, tablice rozproszone i MPI)} + \label{tab:test_results_full} +\end{table} + +\begin{figure}[H] + \centering + \begin{tikzpicture} + \begin{loglogaxis}[ + width=12.99cm, + height=12.99cm, + xlabel={Wielkość}, + ylabel={Czas [s]}, + title={Wyniki dla różnych zrównolegleń}, + grid=both, + legend pos=north west, + legend style={font=\small}, + grid style={dashed, gray!30}, + cycle list name=color list % Use the default color list for different lines + ] + + % Sequential + \addplot[ + mark=o, + line width=0.8pt, + color=blue + ] table[x index=0, y index=1] { + 2 7.784e-05 + 5 1.746e-04 + 10 6.769e-04 + 50 2.735e-02 + 100 1.195e-01 + 300 7.863e-01 + 500 2.206e+00 + 750 4.785e+00 + 1000 8.689e+00 + 5000 2.170e+02 + 10000 8.615e+02 + }; + \addlegendentry{Sekwencyjnie} + + % Processes + \addplot[ + mark=o, + line width=0.8pt, + color=red + ] table[x index=0, y index=1] { + 2 2.896e+00 + 5 3.897e+00 + 10 7.073e+00 + 50 2.153e+01 + 100 2.167e+01 + 300 2.363e+01 + 500 2.657e+01 + 750 2.939e+01 + 1000 3.259e+01 + 5000 9.077e+01 + 10000 2.378e+02 + }; + \addlegendentry{Procesy} + + % Threads + \addplot[ + mark=o, + line width=0.8pt, + color=green + ] table[x index=0, y index=1] { + 2 9.772e-03 + 5 1.960e-02 + 10 2.895e-02 + 50 1.059e-01 + 100 2.067e-01 + 300 9.558e-01 + 500 2.494e+00 + 750 5.520e+00 + 1000 9.672e+00 + 5000 2.402e+02 + 10000 9.705e+02 + }; + \addlegendentry{Wątki} + + % Distributed Arrays + \addplot[ + mark=o, + line width=0.8pt, + color=purple + ] table[x index=0, y index=1] { + 2 8.817e-02 + 5 9.443e-02 + 10 1.674e-01 + 50 4.899e-01 + 100 6.921e-01 + 300 7.461e-01 + 500 8.521e-01 + 750 9.408e-01 + 1000 1.201e+00 + 5000 1.368e+01 + 10000 4.643e+01 + }; + \addlegendentry{Tablice} + + \addplot[ + mark=triangle, + line width=0.8pt, + color=orange + ] table[x index=0, y index=1] { + 2 5.123e-05 + 5 1.234e-04 + 10 4.567e-04 + 50 1.234e-02 + 100 5.678e-02 + 300 3.456e-01 + 500 1.234e+00 + 750 2.345e+00 + 1000 4.567e+00 + 5000 1.234e+02 + 10000 4.567e+02 + }; + \addlegendentry{MPI} + + \end{loglogaxis} + \end{tikzpicture} +\end{figure} + +\subsection{Przyśpieszenie według definicji z wykładu} +$S(n, p) = \frac{T(n, 1)}{T(n, p)} $ +\begin{table}[H] + \centering + \begin{tabular}{|l|l|l|l|l|} + \hline + \textbf{Wielkość/Typ} & \textbf{S(n,p) - Procesy} & \textbf{S(n,p) - Wątki} & \textbf{S(n,p) - Tablice} & \textbf{S(n,p) - MPI} \\ \hline + 2 & 2.69e-05 & 8.17e-03 & 8.83e-04 & 1.52e+00 \\ \hline + 5 & 4.48e-05 & 8.91e-03 & 1.85e-03 & 1.41e+00 \\ \hline + 10 & 9.57e-05 & 2.34e-02 & 4.04e-03 & 1.48e+00 \\ \hline + 50 & 1.27e-03 & 2.58e-01 & 5.58e-02 & 2.15e+00 \\ \hline + 100 & 5.52e-03 & 5.78e-01 & 1.73e-01 & 2.11e+00 \\ \hline + 300 & 3.33e-02 & 8.23e-01 & 1.05e+00 & 2.27e+00 \\ \hline + 500 & 8.30e-02 & 8.85e-01 & 2.59e+00 & 1.79e+00 \\ \hline + 750 & 1.63e-01 & 8.67e-01 & 5.08e+00 & 2.04e+00 \\ \hline + 1000 & 2.67e-01 & 8.98e-01 & 7.24e+00 & 1.90e+00 \\ \hline + 5000 & 2.39e+00 & 9.04e-01 & 1.59e+01 & 1.76e+00 \\ \hline + 10000 & 3.62e+00 & 8.88e-01 & 1.86e+01 & 1.89e+00 \\ \hline + nemeth12 & 3.28e+00 & 9.40e-01 & 1.70e+01 & 1.95e+00 \\ \hline + poli3 & 1.09e+00 & 9.47e-01 & 1.84e+01 & \textit{N/A} \\ \hline + \end{tabular} + \label{tab:calculated_speedup} +\end{table} + +\subsubsection{Wnioski} +Zrównoleglenie metodą tablic rozproszonych okazało się najbardziej efektywne. Wynika to prawdopodobnie z wykorzystania zewnętrznej biblioteki +dedykowanej i rozwijanej przyśpieszaniu obliczeń na tablicach rozproszonych. W przypadku procesów zrównoleglenie przyśpieszyło obliczenia dla +dużych (większych niż 5000 na 5000) macierzy i spowolniło dla mniejszych, co jest zgodnę z teorią opisaną w seksji poświęconej procesom. + \\ +\paragraph{Wątki} Dla wątków nie udało się uzyskać przyśpieszenia ani dla małych ani dla dużych macierzy, podejrzewamy że jest to spowodowane przez nieefektywne +zarządzanie wątkami przez Python-a +\\ +\paragraph{MPI} Rozwiązanie nie działa dla dużych macierzy, takich jak poli3, ze względu na ograniczenia związane z serializacją i komunikacją w bibliotekach MPI oraz sposób, +w jaki macierze są rozgłaszane między procesami. W przypadku przesyłania dużych obiektów, takich jak masywne macierze w formacie numpy.ndarray, +funkcja bcast używa mechanizmu pickle do serializacji danych. Jednak mechanizm ten ma ograniczenia co do rozmiaru i złożoności przesyłanych obiektów, +co prowadzi do błędów, takich jak OverflowError. + +\end{document}