feat: metadrive hello world

This commit is contained in:
Krzysztof Rudnicki 2024-10-27 21:14:26 +01:00
parent fe31f46096
commit b93c2bc693
3 changed files with 24 additions and 55 deletions

View File

@ -7,7 +7,19 @@ najmniej trzech różnych mapach. Omów otrzymane wyniki oraz zwizualizuj dział
wytrenowanych agentów. wytrenowanych agentów.
## How to run: ## How to run:
### Install metadrive
```
pip install metadrive-simulator
python -m metadrive.pull_asset
```
#### Verify
`python -m metadrive.examples.profile_metadrive`
Install swig, python and pip Install swig, python and pip
Install libraries required by program:
Install other libraries required by program:
`pip install -r requirements.txt` `pip install -r requirements.txt`

View File

@ -1,50 +1,10 @@
import ray from metadrive.envs.metadrive_env import MetaDriveEnv
from ray import tune import gymnasium as gym
from ray.rllib.agents.ppo import PPOTrainer
from metadrive import MultiAgentTIntersectionEnv
import random
# Initialize Ray env = MetaDriveEnv(config={"use_render": True})
ray.init(ignore_reinit_error=True) obs, info = env.reset()
for i in range(1000):
# Define a custom environment class that switches between three maps obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
class MultiMapEnv(MultiAgentTIntersectionEnv): if terminated or truncated:
def __init__(self, config): env.reset()
# Define available maps env.close()
self.maps = ["TIntersection", "Roundabout", "Straight"]
super().__init__(config)
def reset(self):
# Randomly choose a map from the available ones at the start of each episode
self.config["map"] = random.choice(self.maps)
return super().reset()
# Multi-agent configuration with two independent policies
config = {
"env": MultiMapEnv,
"env_config": {
"num_agents": 2, # Set to 2 agents for this multi-agent scenario
},
"framework": "torch", # Use PyTorch as the backend
"num_workers": 1, # Set to 1 worker for simplicity
"multiagent": {
"policies": {
"policy_1": {}, # Configuration for the first agent's policy
"policy_2": {}, # Configuration for the second agent's policy
},
"policy_mapping_fn": lambda agent_id: "policy_1" if agent_id == "agent_1" else "policy_2",
},
}
# Initialize the trainer with PPO algorithm
trainer = PPOTrainer(env=MultiMapEnv, config=config)
# Training loop
print("Starting training for two agents across multiple maps...")
for i in range(10): # Number of training iterations
result = trainer.train()
print(f"Iteration {i + 1}: reward = {result['episode_reward_mean']}")
# Clean up resources
trainer.cleanup()
ray.shutdown()

View File

@ -1,5 +1,2 @@
metadrive metadrive-simulator
ray gymnasium
rlib
ray[rllib]
metadrive-simulator