dora-rs
/
dora

 
			
							"""Module for integrating gym environments with Dora nodes.

This module provides functionality for running gym environments as Dora nodes,
including replay capabilities for recorded robot actions.
"""

import time
from pathlib import Path

import gym_dora  # noqa: F401
import gymnasium as gym
import pandas as pd

env = gym.make(
    "gym_dora/DoraAloha-v0", disable_env_checker=True, max_episode_steps=10000,
)
observation = env.reset()


class ReplayPolicy:
    """A policy class for replaying recorded robot actions.
    
    This class handles loading and replaying recorded actions from a dataset,
    maintaining timing between actions to match the original recording.
    """

    def __init__(self, example_path, epidode=0):
        """Initialize the replay policy.

        Args:
            example_path: Path to the directory containing recorded actions
            epidode: Index of the episode to replay

        """
        df_action = pd.read_parquet(example_path / "action.parquet")
        df_episode_index = pd.read_parquet(example_path / "episode_index.parquet")
        self.df = pd.merge_asof(
            df_action[["timestamp_utc", "action"]],
            df_episode_index[["timestamp_utc", "episode_index"]],
            on="timestamp_utc",
            direction="backward",
        )
        # self.df["episode_index"] = self.df["episode_index"].map(lambda x: x[0])
        self.df = self.df[self.df["episode_index"] == epidode]
        self.current_time = self.df["timestamp_utc"].iloc[0]
        self.topic = "action"
        self.index = 0
        self.finished = False

    def select_action(self, obs):
        """Select the next action to replay.

        Args:
            obs: Current observation from the environment (unused)

        Returns:
            tuple: (action, finished) where action is the next action to take
                  and finished indicates if all actions have been replayed

        """
        if self.index < len(self.df):
            self.index += 1
        else:
            self.finished = True
        row = self.df.iloc[self.index]
        delta_time = (row["timestamp_utc"] - self.current_time).microseconds
        self.current_time = row["timestamp_utc"]
        if delta_time > 0:
            time.sleep(delta_time / 1_000_000)
        return row[self.topic], self.finished


# policy = ReplayPolicy(
    # Path(
        # "/home/rcadene/dora-aloha/aloha/graphs/out/018fa076-ad19-7c77-afa4-49f7f072e86f"
    # )
# )

policy = ReplayPolicy(
    Path(
        "/home/rcadene/dora-aloha/aloha/graphs/out/018fa4ad-5942-7235-93d3-3efebe9b8a12",
    ),
)


done = False
while not done:
    actions, finished = policy.select_action(observation)

    observation, reward, terminated, truncated, info = env.step(actions)
    if terminated:
        print(observation, reward, terminated, truncated, info, flush=True)
    done = terminated | truncated | done | finished

env.close()