diff --git a/mujoco_py/mjpid.pyx b/mujoco_py/mjpid.pyx
index 52d7627b7fddf1907c9575df9585e620b1dbe388..aee279dcf6dda236518d30d510b29eb39f1490c0 100644
--- a/mujoco_py/mjpid.pyx
+++ b/mujoco_py/mjpid.pyx
@@ -48,11 +48,11 @@ cdef PIDOutput _pid(PIDParameters parameters):
 
     cdef double error = parameters.setpoint - parameters.feedback
 
-    # clamp error that's within the error deadband
+    # Clamp error that's within the error deadband
     if fabs(error) < parameters.error_deadband:
         error = 0.0
 
-    # compute derivative error
+    # Compute derivative error
     cdef double derivative_error = (error - parameters.previous_errors.error) / parameters.dt_seconds
 
     derivative_error = (1.0 - parameters.derivative_gain_smoothing) * parameters.previous_errors.derivative_error + \
@@ -60,7 +60,7 @@ cdef PIDOutput _pid(PIDParameters parameters):
 
     cdef double derivative_error_term = derivative_error * parameters.derivative_time_const
 
-    # update and clamp integral error
+    # Update and clamp integral error
     integral_error = parameters.previous_errors.integral_error
     integral_error += error * parameters.dt_seconds
     integral_error = fmax(-parameters.integral_max_clamp, fmin(parameters.integral_max_clamp, integral_error))
@@ -83,8 +83,8 @@ cdef enum USER_DEFINED_ACTUATOR_PARAMS:
 
 cdef enum USER_DEFINED_CONTROLLER_DATA_PID:
     IDX_INTEGRAL_ERROR = 0,
-    IDX_LAST_ERROR = 1,
-    IDX_DERIVATIVE_ERROR_LAST = 2,
+    IDX_ERROR = 1,
+    IDX_DERIVATIVE_ERROR = 2,
     NUM_USER_DATA_PER_ACT_PID = 3,
 
 cdef mjtNum c_pid_bias(const mjModel*m, const mjData*d, int id):
@@ -105,13 +105,13 @@ cdef mjtNum c_pid_bias(const mjModel*m, const mjData*d, int id):
         derivative_gain_smoothing=m.actuator_gainprm[id * NGAIN + IDX_DERIVATIVE_GAIN_SMOOTHING],
         derivative_time_const=m.actuator_gainprm[id * NGAIN + IDX_DERIVATIVE_TIME_CONSTANT],
         previous_errors=PIDErrors(
-            error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_LAST_ERROR],
-            derivative_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR_LAST],
+            error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_ERROR],
+            derivative_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR],
             integral_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_INTEGRAL_ERROR],
         )))
 
-    d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_LAST_ERROR] = result.errors.error
-    d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR_LAST] = result.errors.derivative_error
+    d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_ERROR] = result.errors.error
+    d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR] = result.errors.derivative_error
     d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_INTEGRAL_ERROR] = result.errors.integral_error
 
     f = result.output
@@ -127,59 +127,78 @@ cdef enum USER_DEFINED_ACTUATOR_PARAMS_CASCADE:
     IDX_CAS_PROPORTIONAL_GAIN = 0,
     IDX_CAS_INTEGRAL_TIME_CONSTANT = 1,
     IDX_CAS_INTEGRAL_MAX_CLAMP = 2,
-    IDX_CAS_PROPORTIONAL_GAIN_V = 3,
-    IDX_CAS_INTEGRAL_TIME_CONSTANT_V = 4,
-    IDX_CAS_INTEGRAL_MAX_CLAMP_V = 5,
-    IDX_CAS_EMA_SMOOTH_V = 6,
-    IDX_CAS_MAX_VEL = 7,
+    IDX_CAS_DERIVATIVE_TIME_CONSTANT = 3,
+    IDX_CAS_DERIVATIVE_GAIN_SMOOTHING = 4,
+    IDX_CAS_PROPORTIONAL_GAIN_V = 5,
+    IDX_CAS_INTEGRAL_TIME_CONSTANT_V = 6,
+    IDX_CAS_INTEGRAL_MAX_CLAMP_V = 7,
+    IDX_CAS_EMA_SMOOTH = 8,
+    IDX_CAS_MAX_VEL = 9,
 
 cdef enum USER_DEFINED_CONTROLLER_DATA_CASCADE:
-    IDX_CAS_INTEGRAL_ERROR = 0,
-    IDX_CAS_INTEGRAL_ERROR_V = 1,
-    IDX_CAS_STORED_EMA_SMOOTH_V = 2,
-    NUM_USER_DATA_PER_ACT_CAS = 3,
+    IDX_CAS_ERROR = 0,
+    IDX_CAS_INTEGRAL_ERROR = 1,
+    IDX_CAS_DERIVATIVE_ERROR = 2,
+    IDX_CAS_INTEGRAL_ERROR_V = 3,
+    IDX_CAS_STORED_EMA_SMOOTH = 4,
+    NUM_USER_DATA_PER_ACT_CAS = 5,
 
 cdef NUM_USER_DATA_PER_ACT = max(int(NUM_USER_DATA_PER_ACT_PID), int(NUM_USER_DATA_PER_ACT_CAS))
 
 cdef mjtNum c_pi_cascade_bias(const mjModel*m, const mjData*d, int id):
     """
-    A cascaded PID controller implementation that can control position
+    A cascaded PID-PI controller implementation that can control position
     and velocity setpoints for a given actuator.
     
-    Currently, the cascaded controller is implemented as PI controllers for both the position and 
-    the velocity terms and therefore require Kp, Ti and max_clamp_integral parameters to be 
-    specified for both. Additionally, an exponential moving average smoothing is applied to the 
-    velocity component for added stability, and the controller is gravity compensated via the 
-    `qfrc_bias` term.
-
-    These are provided as part of gainprm in the following order: `gainprm="Kp_pos Ti_pos 
-    max_clamp_pos Kp_vel Ti_vel max_clamp_vel ema_smooth_factor max_vel"`, where ema_smooth_factor 
-    denotes the EMA smoothing factor and max_vel is a velocity constraint applied to the velocity setpoint. 
+    The cascaded controller is implemented as a nested position-velocity controller. A PID loop is 
+    wrapped around desired position and a PI loop is wrapped around desired velocity. An exponential 
+    moving average filter is applied to the commanded position input (d.ctrl). Additionally the controller
+    is gravity compensated via the `qfrc_bias` term.
+
+    The PID-PI gains are set as part of gainprm in the following order: 
+    `gainprm="  Kp_pos              -> Position loop proportional gain
+                Ti_pos              -> Position loop integral time constant
+                Ti_max_clamp_pos    -> Position loop integral error clamp
+                Td_pos              -> Position loop derivative time constant
+                Td_smooth_pos       -> Position loop derivative smoothing (EMA)
+                Kp_vel              -> Velocity loop proportional gain
+                Ti_vel              -> Velocity loop integral time constant
+                max_clamp_vel       -> Velocity loop integral error clamp
+                ema_smooth_factor   -> Exponential moving average (EMA) on desired position
+                max_vel"`           -> Clamped velocity limit (applied in positive and negative direction)  
     """
     cdef double dt_in_sec = m.opt.timestep
     cdef int NGAIN = int(const.NGAIN)
 
     cdef double Kp_cas = m.actuator_gainprm[id * NGAIN + IDX_CAS_PROPORTIONAL_GAIN]
 
+    # Apply Exponential Moving Average smoothing to the position setpoint
+    ctrl_ema = d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_STORED_EMA_SMOOTH]
+    smoothing_factor = m.actuator_gainprm[id * NGAIN + IDX_CAS_EMA_SMOOTH]
+    smooth_pos_setpoint = (smoothing_factor * ctrl_ema) + (1 - smoothing_factor) * d.ctrl[id]
+    d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_STORED_EMA_SMOOTH] = smooth_pos_setpoint
+
     if Kp_cas != 0:
         # Run a position PID loop and use the result to set a desired velocity signal
         pos_output = _pid(parameters=PIDParameters(
             dt_seconds=dt_in_sec,
-            setpoint=d.ctrl[id],
+            setpoint=smooth_pos_setpoint,
             feedback=d.actuator_length[id],
             Kp=Kp_cas,
             error_deadband=0.0,
             integral_max_clamp=m.actuator_gainprm[id * NGAIN + IDX_CAS_INTEGRAL_MAX_CLAMP],
             integral_time_const=m.actuator_gainprm[id * NGAIN + IDX_CAS_INTEGRAL_TIME_CONSTANT],
-            derivative_gain_smoothing=0.0,
-            derivative_time_const=0.0,
+            derivative_gain_smoothing=m.actuator_gainprm[id * NGAIN + IDX_CAS_DERIVATIVE_GAIN_SMOOTHING],
+            derivative_time_const=m.actuator_gainprm[id * NGAIN + IDX_CAS_DERIVATIVE_TIME_CONSTANT],
             previous_errors=PIDErrors(
-                error=0.0,
-                derivative_error=0.0,
+                error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_ERROR],
+                derivative_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_DERIVATIVE_ERROR],
                 integral_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_INTEGRAL_ERROR],
             )))
 
         # Save errors
+        d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_ERROR] = pos_output.errors.error
+        d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_DERIVATIVE_ERROR] = pos_output.errors.derivative_error
         d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_INTEGRAL_ERROR] = pos_output.errors.integral_error
         des_vel = pos_output.output
     else:
@@ -190,17 +209,9 @@ cdef mjtNum c_pi_cascade_bias(const mjModel*m, const mjData*d, int id):
     max_qvel = m.actuator_gainprm[id * NGAIN + IDX_CAS_MAX_VEL]
     des_vel = fmax(-max_qvel, fmin(max_qvel, des_vel))
 
-    # Apply Exponential Moving Average smoothing to the velocity setpoint
-    ctrl_ema_vel = d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_STORED_EMA_SMOOTH_V]
-    smoothing_factor = m.actuator_gainprm[id * NGAIN + IDX_CAS_EMA_SMOOTH_V]
-
-    smoothed_vel_setpoint = (smoothing_factor * ctrl_ema_vel) + (1 - smoothing_factor) * des_vel
-
-    d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_STORED_EMA_SMOOTH_V] = smoothed_vel_setpoint
-
     vel_output = _pid(parameters=PIDParameters(
         dt_seconds=dt_in_sec,
-        setpoint=smoothed_vel_setpoint,
+        setpoint=des_vel,
         feedback=d.actuator_velocity[id],
         Kp=m.actuator_gainprm[id * NGAIN + IDX_CAS_PROPORTIONAL_GAIN_V],
         error_deadband=0.0,
@@ -223,7 +234,7 @@ cdef mjtNum c_pi_cascade_bias(const mjModel*m, const mjData*d, int id):
 
     f = vel_output.output
 
-    # gravity compensation
+    # Gravity compensation
     f += d.qfrc_bias[id]
 
     if effort_limit_low != 0.0 or effort_limit_high != 0.0:
@@ -237,9 +248,9 @@ cdef enum USER_DEFINED_ACTUATOR_DATA:
 
 cdef mjtNum c_custom_bias(const mjModel*m, const mjData*d, int id) with gil:
     """
-    Switches between PID and Cascaded PI type custom bias computation based on the
+    Switches between PID and Cascaded PID-PI type custom bias computation based on the
     defined actuator's actuator_user field.
-    user="1": Cascade PI
+    user="1": Cascade PID-PI
     default: PID
     :param m: mjModel
     :param d:  mjData
diff --git a/mujoco_py/tests/test_pid.py b/mujoco_py/tests/test_pid.py
index fb422d5def7f3c63b4cb432e92c2b9328fa75559..21b6d4f0dfba6778c3787c2c66f7cdfe5af7ae7a 100644
--- a/mujoco_py/tests/test_pid.py
+++ b/mujoco_py/tests/test_pid.py
@@ -16,6 +16,8 @@ MODEL_XML = """
 	<option gravity="0 0 -9.81" integrator="RK4" timestep="0.001"/>
 	<size nstack="3000"/>
 	<worldbody>
+        <camera name="camera1" pos="3 0 0" zaxis="1 0 0" />
+
 		<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
 		<body name="cart" pos="0 0 0">
 			<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
@@ -35,8 +37,16 @@ PID_ACTUATOR = """
 	<general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-100 100" gainprm="200 10 10.0 0.1 0.1 0" joint="hinge" name="a-hinge"/>
 """
 
-CASCADED_PID_ACTUATOR = """
-	<general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-3 3" gainprm="5 0 0 10 .1 1.5 .97 3" joint="hinge" name="a-hinge" user="1"/>
+CASCADED_PIPI_ACTUATOR = """
+	<general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-3 3" gainprm="5 0 0 0 0 10 .1 1.5 .97 3" joint="hinge" name="a-hinge" user="1"/>
+"""
+
+CASCADED_PDPI_ACTUATOR = """
+	<general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-3 3" gainprm="10 0 0 .1 1 10 .1 1.5 .97 3" joint="hinge" name="a-hinge" user="1"/>
+"""
+
+CASCADED_PDPI_ACTUATOR_NO_D = """
+	<general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-3 3" gainprm="10 0 0 0 0 10 .1 1.5 .97 3" joint="hinge" name="a-hinge" user="1"/>
 """
 
 P_ONLY_ACTUATOR = """
@@ -50,8 +60,7 @@ POSITION_ACTUATOR = """
 
 def test_mj_pid():
     """
-    To enable PID control in the mujoco, please
-    refer to the setting in the PID_ACTUATOR.
+    To enable PID control in the mujoco, please refer to the setting in the PID_ACTUATOR.
 
     Here we set Kp = 200, Ti = 10, Td = 0.1 (also iClamp = 10.0, dSmooth be 0.1)
     """
@@ -61,7 +70,7 @@ def test_mj_pid():
     sim = MjSim(model)
     cymj.set_pid_control(sim.model, sim.data)
 
-    # pertubation of pole to be unbalanced
+    # Perturbation of pole to be unbalanced
     init_pos = 0.1 * (random.random() - 0.5)
     print('init pos', init_pos)
     sim.data.qpos[0] = init_pos
@@ -79,8 +88,8 @@ def test_mj_pid():
 
 def test_mj_proportional_only():
     """
-    check new PID control is backward compatible with  position control
-    when only has Kp term.
+    Check new PID control is backward compatible with position control
+    when it only has a Kp term.
     """
     model = load_model_from_xml(MODEL_XML.format(actuator=P_ONLY_ACTUATOR, nuser_actuator=1))
     sim = MjSim(model)
@@ -100,22 +109,22 @@ def test_mj_proportional_only():
         assert abs(sim.data.qpos[0] - sim2.data.qpos[0]) <= 1e-7, "%d step violates" % i
 
 
-def test_cascaded_pid():
+def test_cascaded_pipi():
     """
-    To enable Cascaded PID control in the mujoco, please
-    refer to the setting in the CASCADED_PID_ACTUATOR. user param should be set to 1
+    To enable Cascaded control in mujoco, please refer to the setting in 
+    the CASCADED_PIPI_ACTUATOR. user param should be set to 1
 
     Here we set Kp = 5 for the position control loop and Kp =  10 for the velocity control
     Ti = 0.1 and integral_max_clamp=1.5.
     EMA smoothing constant is set to 0.97, and velocity limit is 3 rad/s
     """
     random.seed(30)
-    xml = MODEL_XML.format(actuator=CASCADED_PID_ACTUATOR, nuser_actuator=1)
+    xml = MODEL_XML.format(actuator=CASCADED_PIPI_ACTUATOR, nuser_actuator=1)
     model = load_model_from_xml(xml)
     sim = MjSim(model)
     cymj.set_pid_control(sim.model, sim.data)
 
-    # pertubation of pole to be unbalanced
+    # Perturbation of pole to be unbalanced
     init_pos = 0.1 * (random.random() - 1.0)
     print('init pos', init_pos)
     sim.data.qpos[0] = init_pos
@@ -133,12 +142,59 @@ def test_cascaded_pid():
 
     print('final pos', sim.data.qpos[0])
     assert abs(sim.data.qpos[0] - desired_pos) < 1e-3
-    assert max_torque <= 3  # torque limit set on the actuator
+    assert max_torque <= 3  # Torque limit set on the actuator
+    
+def test_cascaded_pdpi():
+    """
+    This tests using a PD-PI loop with the cascaded controller. This is achieved by setting the 
+    integral time constant and clamp equal to zero.
+
+    Here we setup two sims (CASCADED_PDPI_ACTUATOR and CASCADED_PDPI_ACTUATOR_NO_D), one with 
+    derivative gain and one without. With the exception of the derivative term, all other gain 
+    parameters are the same. The goal is to show the stability added by the derivative term. 
+    """
+    random.seed(30)
+    xml = MODEL_XML.format(actuator=CASCADED_PDPI_ACTUATOR, nuser_actuator=1)
+    model = load_model_from_xml(xml)
+    sim = MjSim(model)
+    cymj.set_pid_control(sim.model, sim.data)
+    
+    """
+    sim2 uses the same Kp gain on the position loop as sim but does not have any derivative gain. 
+    It is expected that given this Kp gain and no derivative gain, the system will be unstable 
+    and fail to hold the desired position. 
+    """
+    xml = MODEL_XML.format(actuator=CASCADED_PDPI_ACTUATOR_NO_D, nuser_actuator=1)
+    model2 = load_model_from_xml(xml)
+    sim2 = MjSim(model2)
+    cymj.set_pid_control(sim2.model, sim2.data)
+
+    # Perturbation of pole to be unbalanced
+    init_pos = 0.1 * (random.random() - 1.0)
+    print('init pos', init_pos)
+    sim.data.qpos[0] = sim2.data.qpos[0] = init_pos
+
+    desired_pos = 0.0
+    sim.data.ctrl[0] = sim2.data.ctrl[0] = desired_pos
+    print('desired position:', desired_pos)
+
+    max_torque = 0
+
+    for _ in range(2000):
+        sim.step()
+        sim2.step()
+        if abs(sim.data.actuator_force[0]) > max_torque:
+            max_torque = abs(sim.data.actuator_force[0])
+        
+    print('final pos', sim.data.qpos[0])
+    assert abs(sim2.data.qpos[0] - desired_pos) > 1e-3 # Verify instability without D term
+    assert abs(sim.data.qpos[0] - desired_pos) < 1e-3 # Verify stability with D term
+    assert max_torque <= 3  # Torque limit set on the actuator
 
 
 def test_mjsize_exception():
     """nuser_actuator must be set large enough to use custom controllers."""
-    xml = MODEL_XML.format(actuator=CASCADED_PID_ACTUATOR, nuser_actuator=0)
+    xml = MODEL_XML.format(actuator=CASCADED_PIPI_ACTUATOR, nuser_actuator=0)
     model = load_model_from_xml(xml)
     sim = MjSim(model)
     with pytest.raises(Exception):
diff --git a/mujoco_py/version.py b/mujoco_py/version.py
index 72098a78b95d8999c73b94c1e31b575e55fef932..0371ae31a31fae9d663147e0ddf303ebb8dc0b2c 100644
--- a/mujoco_py/version.py
+++ b/mujoco_py/version.py
@@ -1,6 +1,6 @@
 __all__ = ['__version__', 'get_version']
 
-version_info = (2, 0, 2, 11)
+version_info = (2, 0, 2, 12)
 # format:
 # ('mujoco_major', 'mujoco_minor', 'mujoco_py_major', 'mujoco_py_minor')