Source code for syllabus.curricula.domain_randomization

from typing import Any, List

import numpy as np

from syllabus.core import Curriculum


[docs]class DomainRandomization(Curriculum): """A simple but strong baseline for curriculum learning that uniformly samples a task from the task space.""" def _sample_distribution(self) -> List[float]: """ Returns a sample distribution over the task space. """ # Uniform distribution return [1.0 / self.num_tasks for _ in range(self.num_tasks)]
[docs]class BatchedDomainRandomization(Curriculum): """A simple but strong baseline for curriculum learning that uniformly samples a task from the task space.""" def __init__(self, batch_size: int, task_space, warmup_batches: int = 5, **kwargs): super().__init__(task_space, **kwargs) self.batch_size = batch_size self.current_task = None self._batch_steps = batch_size # Start by sampling new task self._batch_count = 0 self.warmup_batches = warmup_batches self.distribution = [1.0 / self.num_tasks for _ in range(self.num_tasks)] # Uniform distribution def _sample_distribution(self) -> List[float]: """ Returns a sample distribution over the task space. """ return self.distribution
[docs] def sample(self, k: int = 1) -> Any: tasks = None if self._batch_count < self.warmup_batches: tasks = super().sample(k=k) if self._batch_steps >= self.batch_size: self.current_task = super().sample(k=1) self._batch_steps -= self.batch_size self._batch_count += 1 if tasks is None: tasks = [self.current_task[0] for _ in range(k)] return tasks
[docs] def update_on_episode(self, episode_return, length, task, progress, env_id: int = None) -> None: super().update_on_episode(episode_return, length, task, progress, env_id=env_id) self._batch_steps += length
[docs]class SyncedBatchedDomainRandomization(Curriculum): """A simple but strong baseline for curriculum learning that uniformly samples a task from the task space.""" def __init__(self, batch_size: int, task_space, warmup_batches: int = 1, uniform_chance: float = 0.05, **kwargs): super().__init__(task_space, **kwargs) self.batch_size = batch_size self.warmup_batches = warmup_batches self.uniform_chance = uniform_chance self.current_task = None self._batch_count = 0 self._should_update = True self.distribution = [1.0 / self.num_tasks for _ in range(self.num_tasks)] # Uniform distribution def _sample_distribution(self) -> List[float]: """ Returns a sample distribution over the task space. """ return self.distribution
[docs] def sample(self, k: int = 1) -> Any: """ Sample k tasks from the curriculum.""" tasks = None if self._batch_count < self.warmup_batches: tasks = super().sample(k=k) if self._should_update: self.current_task = super().sample(k=1)[0] self._should_update = False if tasks is None: tasks = [] for _ in range(k): if self.uniform_chance < np.random.rand(): tasks.append(self.current_task) else: tasks.append(np.random.choice(self.num_tasks)) return tasks
[docs] def update_batch(self): """ Update the current batch.""" self._should_update = True self._batch_count += 1