Reinforcement Learning

The following is a simple example of training a reinforcement learning agent on the Lwmr environment using Stable Baselines3’s PPO implementation.

from dataclasses import dataclass

import gymnasium as gym
import lwmr  # noqa: F401  # registers "lwmr/Lwmr-v0"
import tyro
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import SubprocVecEnv


@dataclass
class Args:
    """Example using Lwmr environment."""

    quiet: bool = True
    seed: int = 47
    device: str = "cpu"

    n_envs: int = 8
    n_steps: int = 512
    total_timesteps: int = 1_000_000

    model_path: str = "ppo_lwmr"
    eval_steps: int = 100


# SubprocVecEnv requires the script to use __main__
if __name__ == "__main__":
    args = tyro.cli(Args)

    verbose = 0 if args.quiet else 1

    env_kwargs = {"quiet": args.quiet, "render_mode": "none"}
    env = make_vec_env("lwmr/Lwmr-v0", n_envs=args.n_envs, vec_env_cls=SubprocVecEnv, env_kwargs=env_kwargs)
    model = PPO("MlpPolicy", env, n_steps=args.n_steps, verbose=verbose, device=args.device)
    model.learn(total_timesteps=args.total_timesteps, progress_bar=True)
    model.save(args.model_path)

    env = gym.make("lwmr/Lwmr-v0", quiet=True, render_mode="viser")
    model = PPO.load(args.model_path, device=args.device)

    obs = env.reset()
    obs = obs[0]
    for _ in range(args.eval_steps):
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, dones, info, blah = env.step(action)
        env.render()

    env.close()
!python ./basic_ppo-sb3.py --help
usage: ./basic_ppo-sb3.py [-h] [OPTIONS]



Example using Lwmr environment.



 options ──────────────────────────────────────────────╮

 -h, --help             show this help message and exit 

 --quiet, --no-quiet    (default: True)                 

 --seed INT             (default: 47)                   

 --device STR           (default: cpu)                  

 --n-envs INT           (default: 8)                    

 --n-steps INT          (default: 512)                  

 --total-timesteps INT  (default: 1000000)              

 --model-path STR       (default: ppo_lwmr)             

 --eval-steps INT       (default: 100)                  

────────────────────────────────────────────────────────
!python ./basic_ppo-sb3.py --quiet --total-timesteps 10000
2026-06-01 08:59:37.692 | WARNING  | lwmr.envs.plane:__init__:65 - CUDA device specified but not available, falling back to CPU

2026-06-01 08:59:37.694 | WARNING  | lwmr.envs.plane:__init__:65 - CUDA device specified but not available, falling back to CPU

2026-06-01 08:59:37.695 | WARNING  | lwmr.envs.plane:__init__:65 - CUDA device specified but not available, falling back to CPU

2026-06-01 08:59:37.696 | WARNING  | lwmr.envs.plane:__init__:65 - CUDA device specified but not available, falling back to CPU

2026-06-01 08:59:37.697 | WARNING  | lwmr.envs.plane:__init__:65 - CUDA device specified but not available, falling back to CPU

2026-06-01 08:59:37.699 | WARNING  | lwmr.envs.plane:__init__:65 - CUDA device specified but not available, falling back to CPU

2026-06-01 08:59:37.703 | WARNING  | lwmr.envs.plane:__init__:65 - CUDA device specified but not available, falling back to CPU

2026-06-01 08:59:37.704 | WARNING  | lwmr.envs.plane:__init__:65 - CUDA device specified but not available, falling back to CPU


   0% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0/10,000  [ 0:00:00 < -:--:-- , ? it/s ]

   0% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0/10,000  [ 0:00:00 < -:--:-- , ? it/s ]

   0% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0/10,000  [ 0:00:00 < -:--:-- , ? it/s ]

   0% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 8/10,000  [ 0:00:00 < -:--:-- , ? it/s ]

   1% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 104/10,000  [ 0:00:00 < 0:00:12 , 888 it/s ]

   2% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 192/10,000  [ 0:00:00 < 0:00:12 , 869 it/s ]

   3% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 280/10,000  [ 0:00:00 < 0:00:12 , 863 it/s ]

   4% ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 368/10,000  [ 0:00:00 < 0:00:12 , 858 it/s ]

   5% ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 456/10,000  [ 0:00:00 < 0:00:12 , 855 it/s ]

   5% ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 544/10,000  [ 0:00:00 < 0:00:12 , 854 it/s ]

   6% ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 632/10,000  [ 0:00:01 < 0:00:11 , 855 it/s ]

   7% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 720/10,000  [ 0:00:01 < 0:00:11 , 849 it/s ]

   9% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 896/10,000  [ 0:00:01 < 0:00:11 , 850 it/s ]

  10% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 984/10,000  [ 0:00:01 < 0:00:11 , 851 it/s ]

  11% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 1,072/10,000  [ 0:00:01 < 0:00:11 , 850 it/s ]

  12% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 1,160/10,000  [ 0:00:01 < 0:00:11 , 851 it/s ]

  12% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 1,248/10,000  [ 0:00:01 < 0:00:11 , 849 it/s ]

  13% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 1,336/10,000  [ 0:00:01 < 0:00:11 , 849 it/s ]

  14% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 1,424/10,000  [ 0:00:01 < 0:00:11 , 847 it/s ]

  15% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 1,512/10,000  [ 0:00:02 < 0:00:11 , 847 it/s ]

  16% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 1,600/10,000  [ 0:00:02 < 0:00:10 , 848 it/s ]

  17% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 1,688/10,000  [ 0:00:02 < 0:00:10 , 847 it/s ]

  18% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 1,776/10,000  [ 0:00:02 < 0:00:10 , 848 it/s ]

  19% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 1,864/10,000  [ 0:00:02 < 0:00:10 , 847 it/s ]

  20% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 1,952/10,000  [ 0:00:02 < 0:00:10 , 847 it/s ]

  20% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 2,040/10,000  [ 0:00:02 < 0:00:10 , 847 it/s ]

  20% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 2,040/10,000  [ 0:00:02 < 0:00:10 , 847 it/s ]

  21% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 2,128/10,000  [ 0:00:02 < 0:00:10 , 810 it/s ]

  22% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 2,216/10,000  [ 0:00:02 < 0:00:10 , 811 it/s ]

  23% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 2,304/10,000  [ 0:00:03 < 0:00:10 , 812 it/s ]

  24% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 2,392/10,000  [ 0:00:03 < 0:00:10 , 813 it/s ]

  25% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 2,480/10,000  [ 0:00:03 < 0:00:10 , 815 it/s ]

  26% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 2,568/10,000  [ 0:00:03 < 0:00:10 , 815 it/s ]

  27% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 2,656/10,000  [ 0:00:03 < 0:00:10 , 816 it/s ]

  27% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 2,744/10,000  [ 0:00:03 < 0:00:09 , 816 it/s ]

  28% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 2,832/10,000  [ 0:00:03 < 0:00:09 , 817 it/s ]

  29% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 2,920/10,000  [ 0:00:03 < 0:00:09 , 817 it/s ]

  30% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 3,008/10,000  [ 0:00:03 < 0:00:09 , 819 it/s ]

  31% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 3,096/10,000  [ 0:00:04 < 0:00:09 , 820 it/s ]

  32% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 3,184/10,000  [ 0:00:04 < 0:00:09 , 820 it/s ]

  33% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 3,272/10,000  [ 0:00:04 < 0:00:09 , 821 it/s ]

  34% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 3,360/10,000  [ 0:00:04 < 0:00:09 , 822 it/s ]

  34% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 3,448/10,000  [ 0:00:04 < 0:00:08 , 822 it/s ]

  35% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 3,536/10,000  [ 0:00:04 < 0:00:08 , 823 it/s ]

  36% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 3,624/10,000  [ 0:00:04 < 0:00:08 , 823 it/s ]

  37% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 3,712/10,000  [ 0:00:04 < 0:00:08 , 824 it/s ]

  38% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 3,800/10,000  [ 0:00:04 < 0:00:08 , 824 it/s ]

  39% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 3,888/10,000  [ 0:00:04 < 0:00:08 , 824 it/s ]

  40% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 3,976/10,000  [ 0:00:05 < 0:00:08 , 825 it/s ]

  41% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,064/10,000  [ 0:00:05 < 0:00:08 , 825 it/s ]

  41% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,064/10,000  [ 0:00:05 < 0:00:08 , 825 it/s ]

  41% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,064/10,000  [ 0:00:05 < 0:00:08 , 825 it/s ]

  41% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,064/10,000  [ 0:00:05 < 0:00:08 , 825 it/s ]

  41% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,064/10,000  [ 0:00:05 < 0:00:08 , 825 it/s ]

  41% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,064/10,000  [ 0:00:05 < 0:00:08 , 825 it/s ]

  41% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,064/10,000  [ 0:00:05 < 0:00:08 , 825 it/s ]

  41% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,064/10,000  [ 0:00:05 < 0:00:08 , 825 it/s ]

  41% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,064/10,000  [ 0:00:06 < 0:00:08 , 825 it/s ]

  42% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,152/10,000  [ 0:00:06 < 0:00:09 , 704 it/s ]

  42% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,240/10,000  [ 0:00:06 < 0:00:09 , 707 it/s ]

  43% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,328/10,000  [ 0:00:06 < 0:00:09 , 709 it/s ]

  44% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,416/10,000  [ 0:00:06 < 0:00:08 , 711 it/s ]

  45% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,504/10,000  [ 0:00:06 < 0:00:08 , 713 it/s ]

  46% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,592/10,000  [ 0:00:06 < 0:00:08 , 715 it/s ]

  47% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,680/10,000  [ 0:00:06 < 0:00:08 , 717 it/s ]

  48% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,768/10,000  [ 0:00:06 < 0:00:08 , 719 it/s ]

  49% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,856/10,000  [ 0:00:06 < 0:00:08 , 721 it/s ]

  49% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 4,944/10,000  [ 0:00:07 < 0:00:07 , 723 it/s ]

  50% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 5,032/10,000  [ 0:00:07 < 0:00:07 , 724 it/s ]

  51% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 5,120/10,000  [ 0:00:07 < 0:00:07 , 726 it/s ]

  52% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 5,208/10,000  [ 0:00:07 < 0:00:07 , 728 it/s ]

  53% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 5,296/10,000  [ 0:00:07 < 0:00:07 , 730 it/s ]

  54% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 5,384/10,000  [ 0:00:07 < 0:00:07 , 731 it/s ]

  55% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 5,472/10,000  [ 0:00:07 < 0:00:07 , 733 it/s ]

  56% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 5,560/10,000  [ 0:00:07 < 0:00:07 , 734 it/s ]

  56% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 5,648/10,000  [ 0:00:07 < 0:00:06 , 735 it/s ]

  57% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 5,736/10,000  [ 0:00:08 < 0:00:06 , 737 it/s ]

  58% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 5,824/10,000  [ 0:00:08 < 0:00:06 , 738 it/s ]

  59% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 5,912/10,000  [ 0:00:08 < 0:00:06 , 740 it/s ]

  60% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 6,000/10,000  [ 0:00:08 < 0:00:06 , 741 it/s ]

  61% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 6,088/10,000  [ 0:00:08 < 0:00:06 , 742 it/s ]

  61% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 6,088/10,000  [ 0:00:08 < 0:00:06 , 742 it/s ]

  62% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 6,176/10,000  [ 0:00:08 < 0:00:06 , 732 it/s ]

  63% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 6,264/10,000  [ 0:00:08 < 0:00:06 , 734 it/s ]

  64% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 6,352/10,000  [ 0:00:08 < 0:00:05 , 735 it/s ]

  64% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 6,440/10,000  [ 0:00:08 < 0:00:05 , 736 it/s ]

  65% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 6,528/10,000  [ 0:00:09 < 0:00:05 , 737 it/s ]

  66% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 6,616/10,000  [ 0:00:09 < 0:00:05 , 738 it/s ]

  67% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 6,704/10,000  [ 0:00:09 < 0:00:05 , 739 it/s ]

  68% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 6,792/10,000  [ 0:00:09 < 0:00:05 , 741 it/s ]

  69% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 6,880/10,000  [ 0:00:09 < 0:00:05 , 742 it/s ]

  70% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 6,968/10,000  [ 0:00:09 < 0:00:05 , 743 it/s ]

  71% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 7,056/10,000  [ 0:00:09 < 0:00:04 , 744 it/s ]

  71% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 7,144/10,000  [ 0:00:09 < 0:00:04 , 745 it/s ]

  72% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 7,232/10,000  [ 0:00:09 < 0:00:04 , 746 it/s ]

  73% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 7,320/10,000  [ 0:00:10 < 0:00:04 , 747 it/s ]

  74% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 7,408/10,000  [ 0:00:10 < 0:00:04 , 748 it/s ]

  75% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 7,496/10,000  [ 0:00:10 < 0:00:04 , 749 it/s ]

  76% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 7,584/10,000  [ 0:00:10 < 0:00:04 , 750 it/s ]

  77% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 7,672/10,000  [ 0:00:10 < 0:00:04 , 751 it/s ]

  78% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 7,760/10,000  [ 0:00:10 < 0:00:03 , 752 it/s ]

  78% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 7,848/10,000  [ 0:00:10 < 0:00:03 , 753 it/s ]

  79% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 7,936/10,000  [ 0:00:10 < 0:00:03 , 754 it/s ]

  80% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,024/10,000  [ 0:00:10 < 0:00:03 , 755 it/s ]

  81% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,112/10,000  [ 0:00:10 < 0:00:03 , 755 it/s ]

  81% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,112/10,000  [ 0:00:11 < 0:00:03 , 755 it/s ]

  81% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,112/10,000  [ 0:00:11 < 0:00:03 , 755 it/s ]

  81% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,112/10,000  [ 0:00:11 < 0:00:03 , 755 it/s ]

  81% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,112/10,000  [ 0:00:11 < 0:00:03 , 755 it/s ]

  81% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,112/10,000  [ 0:00:11 < 0:00:03 , 755 it/s ]

  81% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,112/10,000  [ 0:00:11 < 0:00:03 , 755 it/s ]

  81% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,112/10,000  [ 0:00:11 < 0:00:03 , 755 it/s ]

  81% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,112/10,000  [ 0:00:11 < 0:00:03 , 755 it/s ]

  81% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,112/10,000  [ 0:00:11 < 0:00:03 , 755 it/s ]

  82% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,200/10,000  [ 0:00:12 < 0:00:03 , 699 it/s ]

  83% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,296/10,000  [ 0:00:12 < 0:00:03 , 701 it/s ]

  84% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,384/10,000  [ 0:00:12 < 0:00:03 , 702 it/s ]

  85% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,472/10,000  [ 0:00:12 < 0:00:03 , 704 it/s ]

  86% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,560/10,000  [ 0:00:12 < 0:00:03 , 705 it/s ]

  86% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,648/10,000  [ 0:00:12 < 0:00:02 , 706 it/s ]

  87% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,736/10,000  [ 0:00:12 < 0:00:02 , 707 it/s ]

  88% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,824/10,000  [ 0:00:12 < 0:00:02 , 708 it/s ]

  89% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 8,912/10,000  [ 0:00:12 < 0:00:02 , 709 it/s ]

  90% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 9,000/10,000  [ 0:00:12 < 0:00:02 , 710 it/s ]

  91% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 9,088/10,000  [ 0:00:13 < 0:00:02 , 711 it/s ]

  92% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 9,176/10,000  [ 0:00:13 < 0:00:02 , 712 it/s ]

  93% ━━━━━━━━━━━━━━━━━━━━━━━━━ 9,264/10,000  [ 0:00:13 < 0:00:02 , 713 it/s ]

  94% ━━━━━━━━━━━━━━━━━━━━━━━━━ 9,352/10,000  [ 0:00:13 < 0:00:01 , 715 it/s ]

  94% ━━━━━━━━━━━━━━━━━━━━━━━━━ 9,440/10,000  [ 0:00:13 < 0:00:01 , 716 it/s ]

  96% ━━━━━━━━━━━━━━━━━━━━━━━━━ 9,616/10,000  [ 0:00:13 < 0:00:01 , 718 it/s ]

  96% ━━━━━━━━━━━━━━━━━━━━━━━━━ 9,616/10,000  [ 0:00:13 < 0:00:01 , 718 it/s ]

  97% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 9,704/10,000  [ 0:00:13 < 0:00:01 , 718 it/s ]

  98% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 9,792/10,000  [ 0:00:13 < 0:00:01 , 719 it/s ]

  99% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 9,880/10,000  [ 0:00:14 < 0:00:01 , 720 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 9,968/10,000  [ 0:00:14 < 0:00:01 , 721 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 10,056/10,000  [ 0:00:14 < 0:00:00 , 722 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 10,144/10,000  [ 0:00:14 < 0:00:00 , 723 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 10,232/10,000  [ 0:00:14 < 0:00:00 , 724 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 10,232/10,000  [ 0:00:14 < 0:00:00 , 724 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 10,320/10,000  [ 0:00:14 < 0:00:00 , 717 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 10,408/10,000  [ 0:00:14 < 0:00:00 , 718 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 10,496/10,000  [ 0:00:14 < 0:00:00 , 719 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 10,584/10,000  [ 0:00:14 < 0:00:00 , 719 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 10,672/10,000  [ 0:00:14 < 0:00:00 , 720 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 10,760/10,000  [ 0:00:14 < 0:00:00 , 721 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 10,848/10,000  [ 0:00:14 < 0:00:00 , 722 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 10,936/10,000  [ 0:00:14 < 0:00:00 , 723 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 11,024/10,000  [ 0:00:14 < 0:00:00 , 724 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 11,112/10,000  [ 0:00:14 < 0:00:00 , 724 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 11,200/10,000  [ 0:00:14 < 0:00:00 , 725 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 11,288/10,000  [ 0:00:14 < 0:00:00 , 726 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 11,376/10,000  [ 0:00:14 < 0:00:00 , 727 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 11,464/10,000  [ 0:00:14 < 0:00:00 , 728 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 11,552/10,000  [ 0:00:14 < 0:00:00 , 728 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 11,640/10,000  [ 0:00:14 < 0:00:00 , 729 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 11,728/10,000  [ 0:00:14 < 0:00:00 , 730 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 11,816/10,000  [ 0:00:14 < 0:00:00 , 730 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 11,904/10,000  [ 0:00:14 < 0:00:00 , 731 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 11,992/10,000  [ 0:00:14 < 0:00:00 , 732 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 12,080/10,000  [ 0:00:14 < 0:00:00 , 733 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 12,168/10,000  [ 0:00:14 < 0:00:00 , 733 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 12,256/10,000  [ 0:00:14 < 0:00:00 , 734 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 12,256/10,000  [ 0:00:14 < 0:00:00 , 734 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 12,256/10,000  [ 0:00:14 < 0:00:00 , 734 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 12,256/10,000  [ 0:00:14 < 0:00:00 , 734 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 12,256/10,000  [ 0:00:14 < 0:00:00 , 734 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 12,256/10,000  [ 0:00:14 < 0:00:00 , 734 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 12,256/10,000  [ 0:00:14 < 0:00:00 , 734 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 12,256/10,000  [ 0:00:14 < 0:00:00 , 734 it/s ]

 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━ 12,288/10,000  [ 0:00:14 < 0:00:00 , 701 it/s ]

2026-06-01 08:59:56.635 | WARNING  | lwmr.envs.plane:__init__:65 - CUDA device specified but not available, falling back to CPU


  0%|                                                   | 0/100 [00:00<?, ?it/s]

  1%|▍                                          | 1/100 [00:00<00:21,  4.68it/s]

 13%|█████▍                                    | 13/100 [00:00<00:01, 50.78it/s]

 27%|███████████▎                              | 27/100 [00:00<00:00, 81.02it/s]

 41%|█████████████████▏                        | 41/100 [00:00<00:00, 98.52it/s]

 55%|██████████████████████▌                  | 55/100 [00:00<00:00, 109.39it/s]

 68%|███████████████████████████▉             | 68/100 [00:00<00:00, 115.01it/s]

 82%|█████████████████████████████████▌       | 82/100 [00:00<00:00, 119.97it/s]

 95%|██████████████████████████████████████▉  | 95/100 [00:00<00:00, 122.84it/s]

100%|████████████████████████████████████████| 100/100 [00:00<00:00, 101.64it/s]

(viser) Server stopped
import gymnasium as gym
import lwmr  # noqa: F401
from stable_baselines3 import PPO
from tqdm.auto import trange
/Users/ajcd2020/Documents/Repositories/anthonyjclark/simer-tutorial/2026-icra/.venv/lib/python3.14/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
model_path = "ppo_lwmr.zip"
device = "cpu"
eval_steps = 200

env = gym.make("lwmr/Lwmr-v0", quiet=True, render_mode="viser")
model = PPO.load(model_path, device=device)

obs = env.reset()
obs = obs[0]
for _ in trange(eval_steps):
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, dones, info, blah = env.step(action)
    env.render()
env.close()
2026-06-01 09:00:01.170 | WARNING  | lwmr.envs.plane:__init__:65 - CUDA device specified but not available, falling back to CPU


  0%|          | 0/200 [00:00<?, ?it/s]
  0%|          | 1/200 [00:00<00:43,  4.56it/s]
  7%|▋         | 14/200 [00:00<00:03, 52.66it/s]
 14%|█▎        | 27/200 [00:00<00:02, 79.99it/s]
 20%|██        | 40/200 [00:00<00:01, 95.57it/s]
 26%|██▋       | 53/200 [00:00<00:01, 106.58it/s]
 34%|███▎      | 67/200 [00:00<00:01, 114.36it/s]
 40%|████      | 80/200 [00:00<00:01, 118.89it/s]
 46%|████▋     | 93/200 [00:00<00:00, 121.62it/s]
 54%|█████▎    | 107/200 [00:01<00:00, 124.35it/s]
 60%|██████    | 120/200 [00:01<00:00, 125.98it/s]
 67%|██████▋   | 134/200 [00:01<00:00, 127.21it/s]
 74%|███████▍  | 148/200 [00:01<00:00, 128.22it/s]
 80%|████████  | 161/200 [00:01<00:00, 125.72it/s]
 87%|████████▋ | 174/200 [00:01<00:00, 125.94it/s]
 94%|█████████▎| 187/200 [00:01<00:00, 126.98it/s]
100%|██████████| 200/200 [00:01<00:00, 112.73it/s]
(viser) Server stopped
# !python -m http.server 8047 1>/dev/null 2>&1
# http://localhost:8047/?playbackPath=./recordings/lwmr_plane.viser