From b93c2bc693cab2cd5981bf4611931967e423d5da Mon Sep 17 00:00:00 2001
From: Krzysztof Rudnicki <krzysztofrudnicki0@gmail.com>
Date: Sun, 27 Oct 2024 21:14:26 +0100
Subject: [PATCH] feat: metadrive hello world

---
 README.md               | 14 +++++++++-
 script/main.py          | 58 +++++++----------------------------------
 script/requirements.txt |  7 ++---
 3 files changed, 24 insertions(+), 55 deletions(-)

diff --git a/README.md b/README.md
index 1965c85e..96235e83 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,19 @@ najmniej trzech różnych mapach. Omów otrzymane wyniki oraz zwizualizuj dział
 wytrenowanych agentów.
 
 ## How to run:
+
+### Install metadrive 
+```
+pip install metadrive-simulator
+python -m metadrive.pull_asset
+```
+
+#### Verify
+`python -m metadrive.examples.profile_metadrive`
+
 Install swig, python and pip
-Install libraries required by program:
+
+Install other libraries required by program:
+
 `pip install -r requirements.txt`
 
diff --git a/script/main.py b/script/main.py
index 195653d3..7f9f9aa0 100644
--- a/script/main.py
+++ b/script/main.py
@@ -1,50 +1,10 @@
-import ray
-from ray import tune
-from ray.rllib.agents.ppo import PPOTrainer
-from metadrive import MultiAgentTIntersectionEnv
-import random
+from metadrive.envs.metadrive_env import MetaDriveEnv
+import gymnasium as gym
 
-# Initialize Ray
-ray.init(ignore_reinit_error=True)
-
-# Define a custom environment class that switches between three maps
-class MultiMapEnv(MultiAgentTIntersectionEnv):
-    def __init__(self, config):
-        # Define available maps
-        self.maps = ["TIntersection", "Roundabout", "Straight"]
-        super().__init__(config)
-
-    def reset(self):
-        # Randomly choose a map from the available ones at the start of each episode
-        self.config["map"] = random.choice(self.maps)
-        return super().reset()
-
-# Multi-agent configuration with two independent policies
-config = {
-    "env": MultiMapEnv,
-    "env_config": {
-        "num_agents": 2,           # Set to 2 agents for this multi-agent scenario
-    },
-    "framework": "torch",          # Use PyTorch as the backend
-    "num_workers": 1,              # Set to 1 worker for simplicity
-    "multiagent": {
-        "policies": {
-            "policy_1": {},  # Configuration for the first agent's policy
-            "policy_2": {},  # Configuration for the second agent's policy
-        },
-        "policy_mapping_fn": lambda agent_id: "policy_1" if agent_id == "agent_1" else "policy_2",
-    },
-}
-
-# Initialize the trainer with PPO algorithm
-trainer = PPOTrainer(env=MultiMapEnv, config=config)
-
-# Training loop
-print("Starting training for two agents across multiple maps...")
-for i in range(10):  # Number of training iterations
-    result = trainer.train()
-    print(f"Iteration {i + 1}: reward = {result['episode_reward_mean']}")
-
-# Clean up resources
-trainer.cleanup()
-ray.shutdown()
+env = MetaDriveEnv(config={"use_render": True})
+obs, info = env.reset()
+for i in range(1000):
+    obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
+    if terminated or truncated:
+        env.reset()
+env.close()
\ No newline at end of file
diff --git a/script/requirements.txt b/script/requirements.txt
index 4ffb0465..f84ab97d 100644
--- a/script/requirements.txt
+++ b/script/requirements.txt
@@ -1,5 +1,2 @@
-metadrive
-ray 
-rlib
-ray[rllib]
-metadrive-simulator
\ No newline at end of file
+metadrive-simulator
+gymnasium
\ No newline at end of file