Source code for syllabus.curricula.domain_randomization

from typing import Any, List

import numpy as np

from syllabus.core import Curriculum


[docs]class DomainRandomization(Curriculum):
    """A simple but strong baseline for curriculum learning that uniformly samples a task from the task space."""

    def _sample_distribution(self) -> List[float]:
        """
        Returns a sample distribution over the task space.
        """
        # Uniform distribution
        return [1.0 / self.num_tasks for _ in range(self.num_tasks)]


[docs]class BatchedDomainRandomization(Curriculum):
    """A simple but strong baseline for curriculum learning that uniformly samples a task from the task space."""

    def __init__(self, batch_size: int, task_space, warmup_batches: int = 5, **kwargs):
        super().__init__(task_space, **kwargs)
        self.batch_size = batch_size
        self.current_task = None
        self._batch_steps = batch_size  # Start by sampling new task
        self._batch_count = 0
        self.warmup_batches = warmup_batches
        self.distribution = [1.0 / self.num_tasks for _ in range(self.num_tasks)]   # Uniform distribution

    def _sample_distribution(self) -> List[float]:
        """
        Returns a sample distribution over the task space.
        """
        return self.distribution

[docs]    def sample(self, k: int = 1) -> Any:
        tasks = None
        if self._batch_count < self.warmup_batches:
            tasks = super().sample(k=k)

        if self._batch_steps >= self.batch_size:
            self.current_task = super().sample(k=1)
            self._batch_steps -= self.batch_size
            self._batch_count += 1

        if tasks is None:
            tasks = [self.current_task[0] for _ in range(k)]
        return tasks

[docs]    def update_on_episode(self, episode_return, length, task, progress, env_id: int = None) -> None:
        super().update_on_episode(episode_return, length, task, progress, env_id=env_id)
        self._batch_steps += length


[docs]class SyncedBatchedDomainRandomization(Curriculum):
    """A simple but strong baseline for curriculum learning that uniformly samples a task from the task space."""

    def __init__(self, batch_size: int, task_space, warmup_batches: int = 1, uniform_chance: float = 0.05, **kwargs):
        super().__init__(task_space, **kwargs)
        self.batch_size = batch_size
        self.warmup_batches = warmup_batches
        self.uniform_chance = uniform_chance

        self.current_task = None
        self._batch_count = 0
        self._should_update = True
        self.distribution = [1.0 / self.num_tasks for _ in range(self.num_tasks)]   # Uniform distribution

    def _sample_distribution(self) -> List[float]:
        """
        Returns a sample distribution over the task space.
        """
        return self.distribution

[docs]    def sample(self, k: int = 1) -> Any:
        """ Sample k tasks from the curriculum."""
        tasks = None
        if self._batch_count < self.warmup_batches:
            tasks = super().sample(k=k)

        if self._should_update:
            self.current_task = super().sample(k=1)[0]
            self._should_update = False

        if tasks is None:
            tasks = []
            for _ in range(k):
                if self.uniform_chance < np.random.rand():
                    tasks.append(self.current_task)
                else:
                    tasks.append(np.random.choice(self.num_tasks))
        return tasks

[docs]    def update_batch(self):
        """ Update the current batch."""
        self._should_update = True
        self._batch_count += 1