Source code for syllabus.core.task_interface.task_wrapper

import gymnasium as gym
import pettingzoo
from pettingzoo.utils.wrappers.base_parallel import BaseParallelWrapper


[docs]class TaskWrapper(gym.Wrapper):
    # TODO: Update to new TaskSpace API
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.task_completion = 0.0
        self.task_space = None
        self.task = None    # TODO: Would making this a property protect from accidental overriding?

[docs]    def reset(self, **kwargs):
        new_task = kwargs.pop("new_task", None)
        if new_task is not None:
            self.change_task(new_task)

        obs, info = self.env.reset(**kwargs)
        info["task_completion"] = 0.0
        info["task"] = self.task
        return self.observation(obs), info

[docs]    def change_task(self, new_task):
        """
        Changes the task of the existing environment to the new_task.

        Each environment will implement tasks differently. The easiest system would be to call a
        function or set an instance variable to change the task.

        Some environments may need to be reset or even reinitialized to change the task.
        If you need to reset or re-init the environment here, make sure to check
        that it is not in the middle of an episode to avoid unexpected behavior.
        """
        self.task = new_task

    def _task_completion(self, obs, rew, term, trunc, info) -> float:
        """
        Implement this function to indicate whether the selected task has been completed.
        This can be determined using the observation, rewards, term, trunc, info or internal values
        from the environment. Intended to be used for automatic curricula.
        Returns a boolean or float value indicating binary completion or scalar degree of completion.
        """
        return 1.0 if term or trunc else 0.0

    def _encode_goal(self):
        """
        Implement this method to indicate which task is selected to the agent.
        Returns: Numpy array encoding the goal.
        """
        return None

[docs]    def observation(self, observation):
        """
        Adds the goal encoding to the observation.
        Override to add additional task-specific observations.
        Returns a modified observation.
        TODO: Complete this implementation and find way to support centralized encodings
        """
        # Add goal to observation
        goal_encoding = self._encode_goal()
        if goal_encoding is not None:
            observation['goal'] = goal_encoding

        return observation

[docs]    def step(self, action):
        obs, rew, term, trunc, info = self.env.step(action)

        # Determine completion status of the current task
        self.task_completion = self._task_completion(obs, rew, term, trunc, info)
        info["task_completion"] = self.task_completion
        info["task"] = self.task

        return self.observation(obs), rew, term, trunc, info

    def __getattr__(self, attr):
        env_attr = getattr(self.env, attr)
        if env_attr:
            return env_attr
        else:
            raise AttributeError(f"TaskWrapper and env do not have attribute {attr}")


[docs]class PettingZooTaskWrapper(BaseParallelWrapper):
    def __init__(self, env: pettingzoo.ParallelEnv):
        super().__init__(env)
        self.task = None

    @property
    def agents(self):
        return self.env.agents

    def __getattr__(self, attr):
        env_attr = getattr(self.env, attr, None)
        if env_attr:
            return env_attr

[docs]    def get_current_task(self):
        return self.current_task

[docs]    def reset(self, **kwargs):
        new_task = kwargs.pop("new_task", None)
        if new_task is not None:
            self.change_task(new_task)
            self.task = new_task
        obs, info = self.env.reset(**kwargs)
        for agent in info.keys():
            info[agent]["task_completion"] = 0.0
            info[agent]["task_id"] = self.task
        return self.observation(obs), info

[docs]    def change_task(self, new_task):
        """
        Changes the task of the existing environment to the new_task.

        Each environment will implement tasks differently. The easiest system would be to call a
        function or set an instance variable to change the task.

        Some environments may need to be reset or even reinitialized to change the task.
        If you need to reset or re-init the environment here, make sure to check
        that it is not in the middle of an episode to avoid unexpected behavior.
        """
        raise NotImplementedError

[docs]    def step(self, action):
        obs, rew, term, trunc, info = self.env.step(action)
        # Determine completion status of the current task
        self.task_completion = self._task_completion(obs, rew, term, trunc, info)
        for agent in self.env.possible_agents:
            if agent not in info:
                info[agent] = {}
            info[agent]["task_completion"] = self.task_completion
            info[agent]["task_id"] = self.task
        return obs, rew, term, trunc, info

[docs]    def observation(self, observation):
        """
        Adds the goal encoding to the observation.
        Override to add additional task-specific observations.
        Returns a modified observation.
        TODO: Complete this implementation and find way to support centralized encodings
        TODO: Support PettingZoo environments
        TODO: Use TaskSpace for encodings?
        """
        # Add goal to observation
        goal_encoding = self._encode_goal()
        if goal_encoding is not None:
            observation['goal'] = goal_encoding

        return observation

    def _encode_goal(self):
        """
        Implement this method to indicate which task is selected to the agent.
        Returns: Numpy array encoding the goal.
        """
        return None

    def _task_completion(self, obs, rew, term, trunc, info) -> float:
        """
        Implement this function to indicate whether the selected task has been completed.
        This can be determined using the observation, rewards, term, trunc, info or internal values
        from the environment. Intended to be used for automatic curricula.
        Returns a boolean or float value indicating binary completion or scalar degree of completion.
        # TODO: Support PettingZoo environments
        """
        return 0.0