Add PID-PI loop functionality (#561)

- Adds PID-PI position/velocity loops. - EMA filter is moved to the outer position loop in cascaded control.

Add PID-PI loop functionality (#561)
89a953c5 · Ruben D'Sa · GitHub · fe8373df · 89a953c5 · 89a953c5
Unverified Commit 89a953c5 authored 4 years ago by Ruben D'Sa Committed by GitHub 4 years ago
--- a/mujoco_py/mjpid.pyx
+++ b/mujoco_py/mjpid.pyx
@@ -48,11 +48,11 @@ cdef PIDOutput _pid(PIDParameters parameters):

    cdef double error = parameters.setpoint - parameters.feedback

-    # clamp error that's within the error deadband
+    # Clamp error that's within the error deadband
    if fabs(error) < parameters.error_deadband:
        error = 0.0

-    # compute derivative error
+    # Compute derivative error
    cdef double derivative_error = (error - parameters.previous_errors.error) / parameters.dt_seconds

    derivative_error = (1.0 - parameters.derivative_gain_smoothing) * parameters.previous_errors.derivative_error + \
@@ -60,7 +60,7 @@ cdef PIDOutput _pid(PIDParameters parameters):

    cdef double derivative_error_term = derivative_error * parameters.derivative_time_const

-    # update and clamp integral error
+    # Update and clamp integral error
    integral_error = parameters.previous_errors.integral_error
    integral_error += error * parameters.dt_seconds
    integral_error = fmax(-parameters.integral_max_clamp, fmin(parameters.integral_max_clamp, integral_error))
@@ -83,8 +83,8 @@ cdef enum USER_DEFINED_ACTUATOR_PARAMS:

 cdef enum USER_DEFINED_CONTROLLER_DATA_PID:
    IDX_INTEGRAL_ERROR = 0,
-    IDX_LAST_ERROR = 1,
-    IDX_DERIVATIVE_ERROR_LAST = 2,
+    IDX_ERROR = 1,
+    IDX_DERIVATIVE_ERROR = 2,
    NUM_USER_DATA_PER_ACT_PID = 3,

 cdef mjtNum c_pid_bias(const mjModel*m, const mjData*d, int id):
@@ -105,13 +105,13 @@ cdef mjtNum c_pid_bias(const mjModel*m, const mjData*d, int id):
        derivative_gain_smoothing=m.actuator_gainprm[id * NGAIN + IDX_DERIVATIVE_GAIN_SMOOTHING],
        derivative_time_const=m.actuator_gainprm[id * NGAIN + IDX_DERIVATIVE_TIME_CONSTANT],
        previous_errors=PIDErrors(
-            error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_LAST_ERROR],
-            derivative_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR_LAST],
+            error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_ERROR],
+            derivative_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR],
            integral_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_INTEGRAL_ERROR],
        )))

-    d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_LAST_ERROR] = result.errors.error
-    d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR_LAST] = result.errors.derivative_error
+    d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_ERROR] = result.errors.error
+    d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR] = result.errors.derivative_error
    d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_INTEGRAL_ERROR] = result.errors.integral_error

    f = result.output
@@ -127,59 +127,78 @@ cdef enum USER_DEFINED_ACTUATOR_PARAMS_CASCADE:
    IDX_CAS_PROPORTIONAL_GAIN = 0,
    IDX_CAS_INTEGRAL_TIME_CONSTANT = 1,
    IDX_CAS_INTEGRAL_MAX_CLAMP = 2,
-    IDX_CAS_PROPORTIONAL_GAIN_V = 3,
-    IDX_CAS_INTEGRAL_TIME_CONSTANT_V = 4,
-    IDX_CAS_INTEGRAL_MAX_CLAMP_V = 5,
-    IDX_CAS_EMA_SMOOTH_V = 6,
-    IDX_CAS_MAX_VEL = 7,
+    IDX_CAS_DERIVATIVE_TIME_CONSTANT = 3,
+    IDX_CAS_DERIVATIVE_GAIN_SMOOTHING = 4,
+    IDX_CAS_PROPORTIONAL_GAIN_V = 5,
+    IDX_CAS_INTEGRAL_TIME_CONSTANT_V = 6,
+    IDX_CAS_INTEGRAL_MAX_CLAMP_V = 7,
+    IDX_CAS_EMA_SMOOTH = 8,
+    IDX_CAS_MAX_VEL = 9,

 cdef enum USER_DEFINED_CONTROLLER_DATA_CASCADE:
-    IDX_CAS_INTEGRAL_ERROR = 0,
-    IDX_CAS_INTEGRAL_ERROR_V = 1,
-    IDX_CAS_STORED_EMA_SMOOTH_V = 2,
-    NUM_USER_DATA_PER_ACT_CAS = 3,
+    IDX_CAS_ERROR = 0,
+    IDX_CAS_INTEGRAL_ERROR = 1,
+    IDX_CAS_DERIVATIVE_ERROR = 2,
+    IDX_CAS_INTEGRAL_ERROR_V = 3,
+    IDX_CAS_STORED_EMA_SMOOTH = 4,
+    NUM_USER_DATA_PER_ACT_CAS = 5,

 cdef NUM_USER_DATA_PER_ACT = max(int(NUM_USER_DATA_PER_ACT_PID), int(NUM_USER_DATA_PER_ACT_CAS))

 cdef mjtNum c_pi_cascade_bias(const mjModel*m, const mjData*d, int id):
    """
-    A cascaded PID controller implementation that can control position
+    A cascaded PID-PI controller implementation that can control position
    and velocity setpoints for a given actuator.
    
-    Currently, the cascaded controller is implemented as PI controllers for both the position and 
-    the velocity terms and therefore require Kp, Ti and max_clamp_integral parameters to be 
-    specified for both. Additionally, an exponential moving average smoothing is applied to the 
-    velocity component for added stability, and the controller is gravity compensated via the 
-    `qfrc_bias` term.
-
-    These are provided as part of gainprm in the following order: `gainprm="Kp_pos Ti_pos 
-    max_clamp_pos Kp_vel Ti_vel max_clamp_vel ema_smooth_factor max_vel"`, where ema_smooth_factor 
-    denotes the EMA smoothing factor and max_vel is a velocity constraint applied to the velocity setpoint. 
+    The cascaded controller is implemented as a nested position-velocity controller. A PID loop is 
+    wrapped around desired position and a PI loop is wrapped around desired velocity. An exponential 
+    moving average filter is applied to the commanded position input (d.ctrl). Additionally the controller
+    is gravity compensated via the `qfrc_bias` term.
+
+    The PID-PI gains are set as part of gainprm in the following order: 
+    `gainprm="  Kp_pos              -> Position loop proportional gain
+                Ti_pos              -> Position loop integral time constant
+                Ti_max_clamp_pos    -> Position loop integral error clamp
+                Td_pos              -> Position loop derivative time constant
+                Td_smooth_pos       -> Position loop derivative smoothing (EMA)
+                Kp_vel              -> Velocity loop proportional gain
+                Ti_vel              -> Velocity loop integral time constant
+                max_clamp_vel       -> Velocity loop integral error clamp
+                ema_smooth_factor   -> Exponential moving average (EMA) on desired position
+                max_vel"`           -> Clamped velocity limit (applied in positive and negative direction)  
    """
    cdef double dt_in_sec = m.opt.timestep
    cdef int NGAIN = int(const.NGAIN)

    cdef double Kp_cas = m.actuator_gainprm[id * NGAIN + IDX_CAS_PROPORTIONAL_GAIN]

+    # Apply Exponential Moving Average smoothing to the position setpoint
+    ctrl_ema = d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_STORED_EMA_SMOOTH]
+    smoothing_factor = m.actuator_gainprm[id * NGAIN + IDX_CAS_EMA_SMOOTH]
+    smooth_pos_setpoint = (smoothing_factor * ctrl_ema) + (1 - smoothing_factor) * d.ctrl[id]
+    d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_STORED_EMA_SMOOTH] = smooth_pos_setpoint
+
    if Kp_cas != 0:
        # Run a position PID loop and use the result to set a desired velocity signal
        pos_output = _pid(parameters=PIDParameters(
            dt_seconds=dt_in_sec,
-            setpoint=d.ctrl[id],
+            setpoint=smooth_pos_setpoint,
            feedback=d.actuator_length[id],
            Kp=Kp_cas,
            error_deadband=0.0,
            integral_max_clamp=m.actuator_gainprm[id * NGAIN + IDX_CAS_INTEGRAL_MAX_CLAMP],
            integral_time_const=m.actuator_gainprm[id * NGAIN + IDX_CAS_INTEGRAL_TIME_CONSTANT],
-            derivative_gain_smoothing=0.0,
-            derivative_time_const=0.0,
+            derivative_gain_smoothing=m.actuator_gainprm[id * NGAIN + IDX_CAS_DERIVATIVE_GAIN_SMOOTHING],
+            derivative_time_const=m.actuator_gainprm[id * NGAIN + IDX_CAS_DERIVATIVE_TIME_CONSTANT],
            previous_errors=PIDErrors(
-                error=0.0,
-                derivative_error=0.0,
+                error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_ERROR],
+                derivative_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_DERIVATIVE_ERROR],
                integral_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_INTEGRAL_ERROR],
            )))

        # Save errors
+        d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_ERROR] = pos_output.errors.error
+        d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_DERIVATIVE_ERROR] = pos_output.errors.derivative_error
        d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_INTEGRAL_ERROR] = pos_output.errors.integral_error
        des_vel = pos_output.output
    else:
@@ -190,17 +209,9 @@ cdef mjtNum c_pi_cascade_bias(const mjModel*m, const mjData*d, int id):
    max_qvel = m.actuator_gainprm[id * NGAIN + IDX_CAS_MAX_VEL]
    des_vel = fmax(-max_qvel, fmin(max_qvel, des_vel))

-    # Apply Exponential Moving Average smoothing to the velocity setpoint
-    ctrl_ema_vel = d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_STORED_EMA_SMOOTH_V]
-    smoothing_factor = m.actuator_gainprm[id * NGAIN + IDX_CAS_EMA_SMOOTH_V]
-
-    smoothed_vel_setpoint = (smoothing_factor * ctrl_ema_vel) + (1 - smoothing_factor) * des_vel
-
-    d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_STORED_EMA_SMOOTH_V] = smoothed_vel_setpoint
-
    vel_output = _pid(parameters=PIDParameters(
        dt_seconds=dt_in_sec,
-        setpoint=smoothed_vel_setpoint,
+        setpoint=des_vel,
        feedback=d.actuator_velocity[id],
        Kp=m.actuator_gainprm[id * NGAIN + IDX_CAS_PROPORTIONAL_GAIN_V],
        error_deadband=0.0,
@@ -223,7 +234,7 @@ cdef mjtNum c_pi_cascade_bias(const mjModel*m, const mjData*d, int id):

    f = vel_output.output

-    # gravity compensation
+    # Gravity compensation
    f += d.qfrc_bias[id]

    if effort_limit_low != 0.0 or effort_limit_high != 0.0:
@@ -237,9 +248,9 @@ cdef enum USER_DEFINED_ACTUATOR_DATA:

 cdef mjtNum c_custom_bias(const mjModel*m, const mjData*d, int id) with gil:
    """
-    Switches between PID and Cascaded PI type custom bias computation based on the
+    Switches between PID and Cascaded PID-PI type custom bias computation based on the
    defined actuator's actuator_user field.
-    user="1": Cascade PI
+    user="1": Cascade PID-PI
    default: PID
    :param m: mjModel
    :param d:  mjData

--- a/mujoco_py/tests/test_pid.py
+++ b/mujoco_py/tests/test_pid.py
@@ -16,6 +16,8 @@ MODEL_XML = """
 	<option gravity="0 0 -9.81" integrator="RK4" timestep="0.001"/>
 	<size nstack="3000"/>
 	<worldbody>
+        <camera name="camera1" pos="3 0 0" zaxis="1 0 0" />
+
 		<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
 		<body name="cart" pos="0 0 0">
 			<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
@@ -35,8 +37,16 @@ PID_ACTUATOR = """
 	<general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-100 100" gainprm="200 10 10.0 0.1 0.1 0" joint="hinge" name="a-hinge"/>
 """

-CASCADED_PID_ACTUATOR = """
-	<general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-3 3" gainprm="5 0 0 10 .1 1.5 .97 3" joint="hinge" name="a-hinge" user="1"/>
+CASCADED_PIPI_ACTUATOR = """
+	<general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-3 3" gainprm="5 0 0 0 0 10 .1 1.5 .97 3" joint="hinge" name="a-hinge" user="1"/>
+"""
+
+CASCADED_PDPI_ACTUATOR = """
+	<general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-3 3" gainprm="10 0 0 .1 1 10 .1 1.5 .97 3" joint="hinge" name="a-hinge" user="1"/>
+"""
+
+CASCADED_PDPI_ACTUATOR_NO_D = """
+	<general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-3 3" gainprm="10 0 0 0 0 10 .1 1.5 .97 3" joint="hinge" name="a-hinge" user="1"/>
 """

 P_ONLY_ACTUATOR = """
@@ -50,8 +60,7 @@ POSITION_ACTUATOR = """

 def test_mj_pid():
    """
-    To enable PID control in the mujoco, please
-    refer to the setting in the PID_ACTUATOR.
+    To enable PID control in the mujoco, please refer to the setting in the PID_ACTUATOR.

    Here we set Kp = 200, Ti = 10, Td = 0.1 (also iClamp = 10.0, dSmooth be 0.1)
    """
@@ -61,7 +70,7 @@ def test_mj_pid():
    sim = MjSim(model)
    cymj.set_pid_control(sim.model, sim.data)

-    # pertubation of pole to be unbalanced
+    # Perturbation of pole to be unbalanced
    init_pos = 0.1 * (random.random() - 0.5)
    print('init pos', init_pos)
    sim.data.qpos[0] = init_pos
@@ -79,8 +88,8 @@ def test_mj_pid():

 def test_mj_proportional_only():
    """
-    check new PID control is backward compatible with  position control
-    when only has Kp term.
+    Check new PID control is backward compatible with position control
+    when it only has a Kp term.
    """
    model = load_model_from_xml(MODEL_XML.format(actuator=P_ONLY_ACTUATOR, nuser_actuator=1))
    sim = MjSim(model)
@@ -100,22 +109,22 @@ def test_mj_proportional_only():
        assert abs(sim.data.qpos[0] - sim2.data.qpos[0]) <= 1e-7, "%d step violates" % i


-def test_cascaded_pid():
+def test_cascaded_pipi():
    """
-    To enable Cascaded PID control in the mujoco, please
-    refer to the setting in the CASCADED_PID_ACTUATOR. user param should be set to 1
+    To enable Cascaded control in mujoco, please refer to the setting in 
+    the CASCADED_PIPI_ACTUATOR. user param should be set to 1

    Here we set Kp = 5 for the position control loop and Kp =  10 for the velocity control
    Ti = 0.1 and integral_max_clamp=1.5.
    EMA smoothing constant is set to 0.97, and velocity limit is 3 rad/s
    """
    random.seed(30)
-    xml = MODEL_XML.format(actuator=CASCADED_PID_ACTUATOR, nuser_actuator=1)
+    xml = MODEL_XML.format(actuator=CASCADED_PIPI_ACTUATOR, nuser_actuator=1)
    model = load_model_from_xml(xml)
    sim = MjSim(model)
    cymj.set_pid_control(sim.model, sim.data)

-    # pertubation of pole to be unbalanced
+    # Perturbation of pole to be unbalanced
    init_pos = 0.1 * (random.random() - 1.0)
    print('init pos', init_pos)
    sim.data.qpos[0] = init_pos
@@ -133,12 +142,59 @@ def test_cascaded_pid():

    print('final pos', sim.data.qpos[0])
    assert abs(sim.data.qpos[0] - desired_pos) < 1e-3
-    assert max_torque <= 3  # torque limit set on the actuator
+    assert max_torque <= 3  # Torque limit set on the actuator
+    
+def test_cascaded_pdpi():
+    """
+    This tests using a PD-PI loop with the cascaded controller. This is achieved by setting the 
+    integral time constant and clamp equal to zero.
+
+    Here we setup two sims (CASCADED_PDPI_ACTUATOR and CASCADED_PDPI_ACTUATOR_NO_D), one with 
+    derivative gain and one without. With the exception of the derivative term, all other gain 
+    parameters are the same. The goal is to show the stability added by the derivative term. 
+    """
+    random.seed(30)
+    xml = MODEL_XML.format(actuator=CASCADED_PDPI_ACTUATOR, nuser_actuator=1)
+    model = load_model_from_xml(xml)
+    sim = MjSim(model)
+    cymj.set_pid_control(sim.model, sim.data)
+    
+    """
+    sim2 uses the same Kp gain on the position loop as sim but does not have any derivative gain. 
+    It is expected that given this Kp gain and no derivative gain, the system will be unstable 
+    and fail to hold the desired position. 
+    """
+    xml = MODEL_XML.format(actuator=CASCADED_PDPI_ACTUATOR_NO_D, nuser_actuator=1)
+    model2 = load_model_from_xml(xml)
+    sim2 = MjSim(model2)
+    cymj.set_pid_control(sim2.model, sim2.data)
+
+    # Perturbation of pole to be unbalanced
+    init_pos = 0.1 * (random.random() - 1.0)
+    print('init pos', init_pos)
+    sim.data.qpos[0] = sim2.data.qpos[0] = init_pos
+
+    desired_pos = 0.0
+    sim.data.ctrl[0] = sim2.data.ctrl[0] = desired_pos
+    print('desired position:', desired_pos)
+
+    max_torque = 0
+
+    for _ in range(2000):
+        sim.step()
+        sim2.step()
+        if abs(sim.data.actuator_force[0]) > max_torque:
+            max_torque = abs(sim.data.actuator_force[0])
+        
+    print('final pos', sim.data.qpos[0])
+    assert abs(sim2.data.qpos[0] - desired_pos) > 1e-3 # Verify instability without D term
+    assert abs(sim.data.qpos[0] - desired_pos) < 1e-3 # Verify stability with D term
+    assert max_torque <= 3  # Torque limit set on the actuator


 def test_mjsize_exception():
    """nuser_actuator must be set large enough to use custom controllers."""
-    xml = MODEL_XML.format(actuator=CASCADED_PID_ACTUATOR, nuser_actuator=0)
+    xml = MODEL_XML.format(actuator=CASCADED_PIPI_ACTUATOR, nuser_actuator=0)
    model = load_model_from_xml(xml)
    sim = MjSim(model)
    with pytest.raises(Exception):

--- a/mujoco_py/version.py
+++ b/mujoco_py/version.py
 __all__ = ['__version__', 'get_version']

-version_info = (2, 0, 2, 11)
+version_info = (2, 0, 2, 12)
 # format:
 # ('mujoco_major', 'mujoco_minor', 'mujoco_py_major', 'mujoco_py_minor')