[docs]classExpandingBox(Curriculum):""" Base class and API for defining curricula to interface with Gym environments. """def__init__(self,*curriculum_args,steps:int=5,success_threshold:float=0.75,required_successes:int=10,**curriculum_kwargs):super().__init__(*curriculum_args,**curriculum_kwargs)assertisinstance(self.task_space.gym_space,Box),"ExpandingBox only supports Box task spaces."self.success_threshold=success_thresholdself.required_successes=required_successesfull_range=self.task_space.gym_space.high[1]-self.task_space.gym_space.low[0]midpoint=self.task_space.gym_space.low[0]+(full_range/2.0)self.step_size=(full_range/2.0)/stepsself.max_range=np.array([midpoint-self.step_size,midpoint+self.step_size])self.consecutive_successes=0self.max_reached=False
[docs]defupdate_task_progress(self,task:typing.Any,progress:Union[float,bool],env_id:int=None)->None:""" Update the curriculum with a task and its success probability upon success or failure. """ifself.max_reached:return# Check if this task passed success thresholdifprogress>self.success_threshold:self.consecutive_successes+=1else:self.consecutive_successes=0# If we have enough successes in a row, update taskifself.consecutive_successes>=self.required_successes:new_low=max(self.max_range[0]-self.step_size,self.task_space.gym_space.low[0])new_high=min(self.max_range[1]+self.step_size,self.task_space.gym_space.high[1])self.max_range=np.array([new_low,new_high])self.consecutive_successes=0ifnew_low==self.task_space.gym_space.low[0]andnew_high==self.task_space.gym_space.high[1]:self.max_reached=True
[docs]defsample(self,k:int=1)->Union[List,Any]:""" Sample k tasks from the curriculum. """return[self.max_rangefor_inrange(k)]
[docs]deflog_metrics(self,writer,logs,step=None,log_n_tasks=1):""" Log metrics to writer. """logs=[]iflogsisNoneelselogslogs.append(("range_min",self.max_range[0]))logs.append(("range_max",self.max_range[1]))returnsuper().log_metrics(writer,logs,step=step,log_n_tasks=log_n_tasks)