From b93c2bc693cab2cd5981bf4611931967e423d5da Mon Sep 17 00:00:00 2001 From: Krzysztof Rudnicki Date: Sun, 27 Oct 2024 21:14:26 +0100 Subject: [PATCH] feat: metadrive hello world --- README.md | 14 +++++++++- script/main.py | 58 +++++++---------------------------------- script/requirements.txt | 7 ++--- 3 files changed, 24 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index 1965c85e..96235e83 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,19 @@ najmniej trzech różnych mapach. Omów otrzymane wyniki oraz zwizualizuj dział wytrenowanych agentów. ## How to run: + +### Install metadrive +``` +pip install metadrive-simulator +python -m metadrive.pull_asset +``` + +#### Verify +`python -m metadrive.examples.profile_metadrive` + Install swig, python and pip -Install libraries required by program: + +Install other libraries required by program: + `pip install -r requirements.txt` diff --git a/script/main.py b/script/main.py index 195653d3..7f9f9aa0 100644 --- a/script/main.py +++ b/script/main.py @@ -1,50 +1,10 @@ -import ray -from ray import tune -from ray.rllib.agents.ppo import PPOTrainer -from metadrive import MultiAgentTIntersectionEnv -import random +from metadrive.envs.metadrive_env import MetaDriveEnv +import gymnasium as gym -# Initialize Ray -ray.init(ignore_reinit_error=True) - -# Define a custom environment class that switches between three maps -class MultiMapEnv(MultiAgentTIntersectionEnv): - def __init__(self, config): - # Define available maps - self.maps = ["TIntersection", "Roundabout", "Straight"] - super().__init__(config) - - def reset(self): - # Randomly choose a map from the available ones at the start of each episode - self.config["map"] = random.choice(self.maps) - return super().reset() - -# Multi-agent configuration with two independent policies -config = { - "env": MultiMapEnv, - "env_config": { - "num_agents": 2, # Set to 2 agents for this multi-agent scenario - }, - "framework": "torch", # Use PyTorch as the backend - "num_workers": 1, # Set to 1 worker for simplicity - "multiagent": { - "policies": { - "policy_1": {}, # Configuration for the first agent's policy - "policy_2": {}, # Configuration for the second agent's policy - }, - "policy_mapping_fn": lambda agent_id: "policy_1" if agent_id == "agent_1" else "policy_2", - }, -} - -# Initialize the trainer with PPO algorithm -trainer = PPOTrainer(env=MultiMapEnv, config=config) - -# Training loop -print("Starting training for two agents across multiple maps...") -for i in range(10): # Number of training iterations - result = trainer.train() - print(f"Iteration {i + 1}: reward = {result['episode_reward_mean']}") - -# Clean up resources -trainer.cleanup() -ray.shutdown() +env = MetaDriveEnv(config={"use_render": True}) +obs, info = env.reset() +for i in range(1000): + obs, reward, terminated, truncated, info = env.step(env.action_space.sample()) + if terminated or truncated: + env.reset() +env.close() \ No newline at end of file diff --git a/script/requirements.txt b/script/requirements.txt index 4ffb0465..f84ab97d 100644 --- a/script/requirements.txt +++ b/script/requirements.txt @@ -1,5 +1,2 @@ -metadrive -ray -rlib -ray[rllib] -metadrive-simulator \ No newline at end of file +metadrive-simulator +gymnasium \ No newline at end of file