diff --git a/mujoco_py/mjpid.pyx b/mujoco_py/mjpid.pyx index 52d7627b7fddf1907c9575df9585e620b1dbe388..aee279dcf6dda236518d30d510b29eb39f1490c0 100644 --- a/mujoco_py/mjpid.pyx +++ b/mujoco_py/mjpid.pyx @@ -48,11 +48,11 @@ cdef PIDOutput _pid(PIDParameters parameters): cdef double error = parameters.setpoint - parameters.feedback - # clamp error that's within the error deadband + # Clamp error that's within the error deadband if fabs(error) < parameters.error_deadband: error = 0.0 - # compute derivative error + # Compute derivative error cdef double derivative_error = (error - parameters.previous_errors.error) / parameters.dt_seconds derivative_error = (1.0 - parameters.derivative_gain_smoothing) * parameters.previous_errors.derivative_error + \ @@ -60,7 +60,7 @@ cdef PIDOutput _pid(PIDParameters parameters): cdef double derivative_error_term = derivative_error * parameters.derivative_time_const - # update and clamp integral error + # Update and clamp integral error integral_error = parameters.previous_errors.integral_error integral_error += error * parameters.dt_seconds integral_error = fmax(-parameters.integral_max_clamp, fmin(parameters.integral_max_clamp, integral_error)) @@ -83,8 +83,8 @@ cdef enum USER_DEFINED_ACTUATOR_PARAMS: cdef enum USER_DEFINED_CONTROLLER_DATA_PID: IDX_INTEGRAL_ERROR = 0, - IDX_LAST_ERROR = 1, - IDX_DERIVATIVE_ERROR_LAST = 2, + IDX_ERROR = 1, + IDX_DERIVATIVE_ERROR = 2, NUM_USER_DATA_PER_ACT_PID = 3, cdef mjtNum c_pid_bias(const mjModel*m, const mjData*d, int id): @@ -105,13 +105,13 @@ cdef mjtNum c_pid_bias(const mjModel*m, const mjData*d, int id): derivative_gain_smoothing=m.actuator_gainprm[id * NGAIN + IDX_DERIVATIVE_GAIN_SMOOTHING], derivative_time_const=m.actuator_gainprm[id * NGAIN + IDX_DERIVATIVE_TIME_CONSTANT], previous_errors=PIDErrors( - error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_LAST_ERROR], - derivative_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR_LAST], + error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_ERROR], + derivative_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR], integral_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_INTEGRAL_ERROR], ))) - d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_LAST_ERROR] = result.errors.error - d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR_LAST] = result.errors.derivative_error + d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_ERROR] = result.errors.error + d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR] = result.errors.derivative_error d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_INTEGRAL_ERROR] = result.errors.integral_error f = result.output @@ -127,59 +127,78 @@ cdef enum USER_DEFINED_ACTUATOR_PARAMS_CASCADE: IDX_CAS_PROPORTIONAL_GAIN = 0, IDX_CAS_INTEGRAL_TIME_CONSTANT = 1, IDX_CAS_INTEGRAL_MAX_CLAMP = 2, - IDX_CAS_PROPORTIONAL_GAIN_V = 3, - IDX_CAS_INTEGRAL_TIME_CONSTANT_V = 4, - IDX_CAS_INTEGRAL_MAX_CLAMP_V = 5, - IDX_CAS_EMA_SMOOTH_V = 6, - IDX_CAS_MAX_VEL = 7, + IDX_CAS_DERIVATIVE_TIME_CONSTANT = 3, + IDX_CAS_DERIVATIVE_GAIN_SMOOTHING = 4, + IDX_CAS_PROPORTIONAL_GAIN_V = 5, + IDX_CAS_INTEGRAL_TIME_CONSTANT_V = 6, + IDX_CAS_INTEGRAL_MAX_CLAMP_V = 7, + IDX_CAS_EMA_SMOOTH = 8, + IDX_CAS_MAX_VEL = 9, cdef enum USER_DEFINED_CONTROLLER_DATA_CASCADE: - IDX_CAS_INTEGRAL_ERROR = 0, - IDX_CAS_INTEGRAL_ERROR_V = 1, - IDX_CAS_STORED_EMA_SMOOTH_V = 2, - NUM_USER_DATA_PER_ACT_CAS = 3, + IDX_CAS_ERROR = 0, + IDX_CAS_INTEGRAL_ERROR = 1, + IDX_CAS_DERIVATIVE_ERROR = 2, + IDX_CAS_INTEGRAL_ERROR_V = 3, + IDX_CAS_STORED_EMA_SMOOTH = 4, + NUM_USER_DATA_PER_ACT_CAS = 5, cdef NUM_USER_DATA_PER_ACT = max(int(NUM_USER_DATA_PER_ACT_PID), int(NUM_USER_DATA_PER_ACT_CAS)) cdef mjtNum c_pi_cascade_bias(const mjModel*m, const mjData*d, int id): """ - A cascaded PID controller implementation that can control position + A cascaded PID-PI controller implementation that can control position and velocity setpoints for a given actuator. - Currently, the cascaded controller is implemented as PI controllers for both the position and - the velocity terms and therefore require Kp, Ti and max_clamp_integral parameters to be - specified for both. Additionally, an exponential moving average smoothing is applied to the - velocity component for added stability, and the controller is gravity compensated via the - `qfrc_bias` term. - - These are provided as part of gainprm in the following order: `gainprm="Kp_pos Ti_pos - max_clamp_pos Kp_vel Ti_vel max_clamp_vel ema_smooth_factor max_vel"`, where ema_smooth_factor - denotes the EMA smoothing factor and max_vel is a velocity constraint applied to the velocity setpoint. + The cascaded controller is implemented as a nested position-velocity controller. A PID loop is + wrapped around desired position and a PI loop is wrapped around desired velocity. An exponential + moving average filter is applied to the commanded position input (d.ctrl). Additionally the controller + is gravity compensated via the `qfrc_bias` term. + + The PID-PI gains are set as part of gainprm in the following order: + `gainprm=" Kp_pos -> Position loop proportional gain + Ti_pos -> Position loop integral time constant + Ti_max_clamp_pos -> Position loop integral error clamp + Td_pos -> Position loop derivative time constant + Td_smooth_pos -> Position loop derivative smoothing (EMA) + Kp_vel -> Velocity loop proportional gain + Ti_vel -> Velocity loop integral time constant + max_clamp_vel -> Velocity loop integral error clamp + ema_smooth_factor -> Exponential moving average (EMA) on desired position + max_vel"` -> Clamped velocity limit (applied in positive and negative direction) """ cdef double dt_in_sec = m.opt.timestep cdef int NGAIN = int(const.NGAIN) cdef double Kp_cas = m.actuator_gainprm[id * NGAIN + IDX_CAS_PROPORTIONAL_GAIN] + # Apply Exponential Moving Average smoothing to the position setpoint + ctrl_ema = d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_STORED_EMA_SMOOTH] + smoothing_factor = m.actuator_gainprm[id * NGAIN + IDX_CAS_EMA_SMOOTH] + smooth_pos_setpoint = (smoothing_factor * ctrl_ema) + (1 - smoothing_factor) * d.ctrl[id] + d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_STORED_EMA_SMOOTH] = smooth_pos_setpoint + if Kp_cas != 0: # Run a position PID loop and use the result to set a desired velocity signal pos_output = _pid(parameters=PIDParameters( dt_seconds=dt_in_sec, - setpoint=d.ctrl[id], + setpoint=smooth_pos_setpoint, feedback=d.actuator_length[id], Kp=Kp_cas, error_deadband=0.0, integral_max_clamp=m.actuator_gainprm[id * NGAIN + IDX_CAS_INTEGRAL_MAX_CLAMP], integral_time_const=m.actuator_gainprm[id * NGAIN + IDX_CAS_INTEGRAL_TIME_CONSTANT], - derivative_gain_smoothing=0.0, - derivative_time_const=0.0, + derivative_gain_smoothing=m.actuator_gainprm[id * NGAIN + IDX_CAS_DERIVATIVE_GAIN_SMOOTHING], + derivative_time_const=m.actuator_gainprm[id * NGAIN + IDX_CAS_DERIVATIVE_TIME_CONSTANT], previous_errors=PIDErrors( - error=0.0, - derivative_error=0.0, + error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_ERROR], + derivative_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_DERIVATIVE_ERROR], integral_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_INTEGRAL_ERROR], ))) # Save errors + d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_ERROR] = pos_output.errors.error + d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_DERIVATIVE_ERROR] = pos_output.errors.derivative_error d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_INTEGRAL_ERROR] = pos_output.errors.integral_error des_vel = pos_output.output else: @@ -190,17 +209,9 @@ cdef mjtNum c_pi_cascade_bias(const mjModel*m, const mjData*d, int id): max_qvel = m.actuator_gainprm[id * NGAIN + IDX_CAS_MAX_VEL] des_vel = fmax(-max_qvel, fmin(max_qvel, des_vel)) - # Apply Exponential Moving Average smoothing to the velocity setpoint - ctrl_ema_vel = d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_STORED_EMA_SMOOTH_V] - smoothing_factor = m.actuator_gainprm[id * NGAIN + IDX_CAS_EMA_SMOOTH_V] - - smoothed_vel_setpoint = (smoothing_factor * ctrl_ema_vel) + (1 - smoothing_factor) * des_vel - - d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_STORED_EMA_SMOOTH_V] = smoothed_vel_setpoint - vel_output = _pid(parameters=PIDParameters( dt_seconds=dt_in_sec, - setpoint=smoothed_vel_setpoint, + setpoint=des_vel, feedback=d.actuator_velocity[id], Kp=m.actuator_gainprm[id * NGAIN + IDX_CAS_PROPORTIONAL_GAIN_V], error_deadband=0.0, @@ -223,7 +234,7 @@ cdef mjtNum c_pi_cascade_bias(const mjModel*m, const mjData*d, int id): f = vel_output.output - # gravity compensation + # Gravity compensation f += d.qfrc_bias[id] if effort_limit_low != 0.0 or effort_limit_high != 0.0: @@ -237,9 +248,9 @@ cdef enum USER_DEFINED_ACTUATOR_DATA: cdef mjtNum c_custom_bias(const mjModel*m, const mjData*d, int id) with gil: """ - Switches between PID and Cascaded PI type custom bias computation based on the + Switches between PID and Cascaded PID-PI type custom bias computation based on the defined actuator's actuator_user field. - user="1": Cascade PI + user="1": Cascade PID-PI default: PID :param m: mjModel :param d: mjData diff --git a/mujoco_py/tests/test_pid.py b/mujoco_py/tests/test_pid.py index fb422d5def7f3c63b4cb432e92c2b9328fa75559..21b6d4f0dfba6778c3787c2c66f7cdfe5af7ae7a 100644 --- a/mujoco_py/tests/test_pid.py +++ b/mujoco_py/tests/test_pid.py @@ -16,6 +16,8 @@ MODEL_XML = """ <option gravity="0 0 -9.81" integrator="RK4" timestep="0.001"/> <size nstack="3000"/> <worldbody> + <camera name="camera1" pos="3 0 0" zaxis="1 0 0" /> + <geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/> <body name="cart" pos="0 0 0"> <geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/> @@ -35,8 +37,16 @@ PID_ACTUATOR = """ <general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-100 100" gainprm="200 10 10.0 0.1 0.1 0" joint="hinge" name="a-hinge"/> """ -CASCADED_PID_ACTUATOR = """ - <general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-3 3" gainprm="5 0 0 10 .1 1.5 .97 3" joint="hinge" name="a-hinge" user="1"/> +CASCADED_PIPI_ACTUATOR = """ + <general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-3 3" gainprm="5 0 0 0 0 10 .1 1.5 .97 3" joint="hinge" name="a-hinge" user="1"/> +""" + +CASCADED_PDPI_ACTUATOR = """ + <general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-3 3" gainprm="10 0 0 .1 1 10 .1 1.5 .97 3" joint="hinge" name="a-hinge" user="1"/> +""" + +CASCADED_PDPI_ACTUATOR_NO_D = """ + <general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-3 3" gainprm="10 0 0 0 0 10 .1 1.5 .97 3" joint="hinge" name="a-hinge" user="1"/> """ P_ONLY_ACTUATOR = """ @@ -50,8 +60,7 @@ POSITION_ACTUATOR = """ def test_mj_pid(): """ - To enable PID control in the mujoco, please - refer to the setting in the PID_ACTUATOR. + To enable PID control in the mujoco, please refer to the setting in the PID_ACTUATOR. Here we set Kp = 200, Ti = 10, Td = 0.1 (also iClamp = 10.0, dSmooth be 0.1) """ @@ -61,7 +70,7 @@ def test_mj_pid(): sim = MjSim(model) cymj.set_pid_control(sim.model, sim.data) - # pertubation of pole to be unbalanced + # Perturbation of pole to be unbalanced init_pos = 0.1 * (random.random() - 0.5) print('init pos', init_pos) sim.data.qpos[0] = init_pos @@ -79,8 +88,8 @@ def test_mj_pid(): def test_mj_proportional_only(): """ - check new PID control is backward compatible with position control - when only has Kp term. + Check new PID control is backward compatible with position control + when it only has a Kp term. """ model = load_model_from_xml(MODEL_XML.format(actuator=P_ONLY_ACTUATOR, nuser_actuator=1)) sim = MjSim(model) @@ -100,22 +109,22 @@ def test_mj_proportional_only(): assert abs(sim.data.qpos[0] - sim2.data.qpos[0]) <= 1e-7, "%d step violates" % i -def test_cascaded_pid(): +def test_cascaded_pipi(): """ - To enable Cascaded PID control in the mujoco, please - refer to the setting in the CASCADED_PID_ACTUATOR. user param should be set to 1 + To enable Cascaded control in mujoco, please refer to the setting in + the CASCADED_PIPI_ACTUATOR. user param should be set to 1 Here we set Kp = 5 for the position control loop and Kp = 10 for the velocity control Ti = 0.1 and integral_max_clamp=1.5. EMA smoothing constant is set to 0.97, and velocity limit is 3 rad/s """ random.seed(30) - xml = MODEL_XML.format(actuator=CASCADED_PID_ACTUATOR, nuser_actuator=1) + xml = MODEL_XML.format(actuator=CASCADED_PIPI_ACTUATOR, nuser_actuator=1) model = load_model_from_xml(xml) sim = MjSim(model) cymj.set_pid_control(sim.model, sim.data) - # pertubation of pole to be unbalanced + # Perturbation of pole to be unbalanced init_pos = 0.1 * (random.random() - 1.0) print('init pos', init_pos) sim.data.qpos[0] = init_pos @@ -133,12 +142,59 @@ def test_cascaded_pid(): print('final pos', sim.data.qpos[0]) assert abs(sim.data.qpos[0] - desired_pos) < 1e-3 - assert max_torque <= 3 # torque limit set on the actuator + assert max_torque <= 3 # Torque limit set on the actuator + +def test_cascaded_pdpi(): + """ + This tests using a PD-PI loop with the cascaded controller. This is achieved by setting the + integral time constant and clamp equal to zero. + + Here we setup two sims (CASCADED_PDPI_ACTUATOR and CASCADED_PDPI_ACTUATOR_NO_D), one with + derivative gain and one without. With the exception of the derivative term, all other gain + parameters are the same. The goal is to show the stability added by the derivative term. + """ + random.seed(30) + xml = MODEL_XML.format(actuator=CASCADED_PDPI_ACTUATOR, nuser_actuator=1) + model = load_model_from_xml(xml) + sim = MjSim(model) + cymj.set_pid_control(sim.model, sim.data) + + """ + sim2 uses the same Kp gain on the position loop as sim but does not have any derivative gain. + It is expected that given this Kp gain and no derivative gain, the system will be unstable + and fail to hold the desired position. + """ + xml = MODEL_XML.format(actuator=CASCADED_PDPI_ACTUATOR_NO_D, nuser_actuator=1) + model2 = load_model_from_xml(xml) + sim2 = MjSim(model2) + cymj.set_pid_control(sim2.model, sim2.data) + + # Perturbation of pole to be unbalanced + init_pos = 0.1 * (random.random() - 1.0) + print('init pos', init_pos) + sim.data.qpos[0] = sim2.data.qpos[0] = init_pos + + desired_pos = 0.0 + sim.data.ctrl[0] = sim2.data.ctrl[0] = desired_pos + print('desired position:', desired_pos) + + max_torque = 0 + + for _ in range(2000): + sim.step() + sim2.step() + if abs(sim.data.actuator_force[0]) > max_torque: + max_torque = abs(sim.data.actuator_force[0]) + + print('final pos', sim.data.qpos[0]) + assert abs(sim2.data.qpos[0] - desired_pos) > 1e-3 # Verify instability without D term + assert abs(sim.data.qpos[0] - desired_pos) < 1e-3 # Verify stability with D term + assert max_torque <= 3 # Torque limit set on the actuator def test_mjsize_exception(): """nuser_actuator must be set large enough to use custom controllers.""" - xml = MODEL_XML.format(actuator=CASCADED_PID_ACTUATOR, nuser_actuator=0) + xml = MODEL_XML.format(actuator=CASCADED_PIPI_ACTUATOR, nuser_actuator=0) model = load_model_from_xml(xml) sim = MjSim(model) with pytest.raises(Exception): diff --git a/mujoco_py/version.py b/mujoco_py/version.py index 72098a78b95d8999c73b94c1e31b575e55fef932..0371ae31a31fae9d663147e0ddf303ebb8dc0b2c 100644 --- a/mujoco_py/version.py +++ b/mujoco_py/version.py @@ -1,6 +1,6 @@ __all__ = ['__version__', 'get_version'] -version_info = (2, 0, 2, 11) +version_info = (2, 0, 2, 12) # format: # ('mujoco_major', 'mujoco_minor', 'mujoco_py_major', 'mujoco_py_minor')