feat: metadrive hello world

2026-07-04 16:43:12 +02:00 · 2024-10-27 21:14:26 +01:00 · 2024-10-27 21:14:26 +01:00 · b93c2bc693
commit b93c2bc693
parent fe31f46096
3 changed files with 24 additions and 55 deletions
--- a/README.md
+++ b/README.md
@ -7,7 +7,19 @@ najmniej trzech różnych mapach. Omów otrzymane wyniki oraz zwizualizuj dział
 wytrenowanych agentów.
 ## How to run:
 ### Install metadrive 
 ```
 pip install metadrive-simulator
 python -m metadrive.pull_asset
 ```
 #### Verify
 `python -m metadrive.examples.profile_metadrive`
 Install swig, python and pip
-Install libraries required by program:
+
 Install other libraries required by program:
 `pip install -r requirements.txt`
--- a/script/main.py
+++ b/script/main.py
@ -1,50 +1,10 @@
-import ray
+from metadrive.envs.metadrive_env import MetaDriveEnv
-from ray import tune
+import gymnasium as gym
 from ray.rllib.agents.ppo import PPOTrainer
 from metadrive import MultiAgentTIntersectionEnv
 import random
-# Initialize Ray
+env = MetaDriveEnv(config={"use_render": True})
-ray.init(ignore_reinit_error=True)
+obs, info = env.reset()
-
+for i in range(1000):
-# Define a custom environment class that switches between three maps
+    obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
-class MultiMapEnv(MultiAgentTIntersectionEnv):
+    if terminated or truncated:
-    def __init__(self, config):
+        env.reset()
-        # Define available maps
+env.close()
        self.maps = ["TIntersection", "Roundabout", "Straight"]
        super().__init__(config)
    def reset(self):
        # Randomly choose a map from the available ones at the start of each episode
        self.config["map"] = random.choice(self.maps)
        return super().reset()
 # Multi-agent configuration with two independent policies
 config = {
    "env": MultiMapEnv,
    "env_config": {
        "num_agents": 2,           # Set to 2 agents for this multi-agent scenario
    },
    "framework": "torch",          # Use PyTorch as the backend
    "num_workers": 1,              # Set to 1 worker for simplicity
    "multiagent": {
        "policies": {
            "policy_1": {},  # Configuration for the first agent's policy
            "policy_2": {},  # Configuration for the second agent's policy
        },
        "policy_mapping_fn": lambda agent_id: "policy_1" if agent_id == "agent_1" else "policy_2",
    },
 }
 # Initialize the trainer with PPO algorithm
 trainer = PPOTrainer(env=MultiMapEnv, config=config)
 # Training loop
 print("Starting training for two agents across multiple maps...")
 for i in range(10):  # Number of training iterations
    result = trainer.train()
    print(f"Iteration {i + 1}: reward = {result['episode_reward_mean']}")
 # Clean up resources
 trainer.cleanup()
 ray.shutdown()
--- a/script/requirements.txt
+++ b/script/requirements.txt
@ -1,5 +1,2 @@
-metadrive
+metadrive-simulator
-ray 
+gymnasium
 rlib
 ray[rllib]
 metadrive-simulator