mirror of
https://github.com/kuhyx/WUT_Computer_Science.git
synced 2026-07-04 14:43:08 +02:00
feat: metadrive hello world
This commit is contained in:
parent
fe31f46096
commit
b93c2bc693
14
README.md
14
README.md
@ -7,7 +7,19 @@ najmniej trzech różnych mapach. Omów otrzymane wyniki oraz zwizualizuj dział
|
||||
wytrenowanych agentów.
|
||||
|
||||
## How to run:
|
||||
|
||||
### Install metadrive
|
||||
```
|
||||
pip install metadrive-simulator
|
||||
python -m metadrive.pull_asset
|
||||
```
|
||||
|
||||
#### Verify
|
||||
`python -m metadrive.examples.profile_metadrive`
|
||||
|
||||
Install swig, python and pip
|
||||
Install libraries required by program:
|
||||
|
||||
Install other libraries required by program:
|
||||
|
||||
`pip install -r requirements.txt`
|
||||
|
||||
|
||||
@ -1,50 +1,10 @@
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.rllib.agents.ppo import PPOTrainer
|
||||
from metadrive import MultiAgentTIntersectionEnv
|
||||
import random
|
||||
from metadrive.envs.metadrive_env import MetaDriveEnv
|
||||
import gymnasium as gym
|
||||
|
||||
# Initialize Ray
|
||||
ray.init(ignore_reinit_error=True)
|
||||
|
||||
# Define a custom environment class that switches between three maps
|
||||
class MultiMapEnv(MultiAgentTIntersectionEnv):
|
||||
def __init__(self, config):
|
||||
# Define available maps
|
||||
self.maps = ["TIntersection", "Roundabout", "Straight"]
|
||||
super().__init__(config)
|
||||
|
||||
def reset(self):
|
||||
# Randomly choose a map from the available ones at the start of each episode
|
||||
self.config["map"] = random.choice(self.maps)
|
||||
return super().reset()
|
||||
|
||||
# Multi-agent configuration with two independent policies
|
||||
config = {
|
||||
"env": MultiMapEnv,
|
||||
"env_config": {
|
||||
"num_agents": 2, # Set to 2 agents for this multi-agent scenario
|
||||
},
|
||||
"framework": "torch", # Use PyTorch as the backend
|
||||
"num_workers": 1, # Set to 1 worker for simplicity
|
||||
"multiagent": {
|
||||
"policies": {
|
||||
"policy_1": {}, # Configuration for the first agent's policy
|
||||
"policy_2": {}, # Configuration for the second agent's policy
|
||||
},
|
||||
"policy_mapping_fn": lambda agent_id: "policy_1" if agent_id == "agent_1" else "policy_2",
|
||||
},
|
||||
}
|
||||
|
||||
# Initialize the trainer with PPO algorithm
|
||||
trainer = PPOTrainer(env=MultiMapEnv, config=config)
|
||||
|
||||
# Training loop
|
||||
print("Starting training for two agents across multiple maps...")
|
||||
for i in range(10): # Number of training iterations
|
||||
result = trainer.train()
|
||||
print(f"Iteration {i + 1}: reward = {result['episode_reward_mean']}")
|
||||
|
||||
# Clean up resources
|
||||
trainer.cleanup()
|
||||
ray.shutdown()
|
||||
env = MetaDriveEnv(config={"use_render": True})
|
||||
obs, info = env.reset()
|
||||
for i in range(1000):
|
||||
obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
|
||||
if terminated or truncated:
|
||||
env.reset()
|
||||
env.close()
|
||||
@ -1,5 +1,2 @@
|
||||
metadrive
|
||||
ray
|
||||
rlib
|
||||
ray[rllib]
|
||||
metadrive-simulator
|
||||
metadrive-simulator
|
||||
gymnasium
|
||||
Loading…
Reference in New Issue
Block a user