diff --git a/mujoco_py/mjpid.pyx b/mujoco_py/mjpid.pyx index fc3dbfd717142495c7acc698bb6be9bdf6cfab97..52d7627b7fddf1907c9575df9585e620b1dbe388 100644 --- a/mujoco_py/mjpid.pyx +++ b/mujoco_py/mjpid.pyx @@ -1,21 +1,78 @@ from libc.math cimport fabs, fmax, fmin from mujoco_py.generated import const -""" - Kp == Kp - Ki == Kp/Ti - Kd == Kp*Td +cdef int CONTROLLER_TYPE_PI_CASCADE = 1, - In this situation, Kp is a knob to tune the agressiveness, wheras Ti and Td will - change the response time of the system in a predictable way. Lower Ti or Td means - that the system will respond to error more quickly/agressively. +cdef struct PIDErrors: + double error + double integral_error + double derivative_error - error deadband: if set will shrink error within to 0.0 - clamp on integral term: helps on saturation problem in I. - derivative smoothing term: reduces high frequency noise in D. +cdef struct PIDOutput: + PIDErrors errors + double output + +cdef struct PIDParameters: + double dt_seconds # PID sampling time. + double setpoint + double feedback + double Kp + double error_deadband + double integral_max_clamp + double integral_time_const + double derivative_gain_smoothing + double derivative_time_const + PIDErrors previous_errors + +cdef mjtNum c_zero_gains(const mjModel*m, const mjData*d, int id) with gil: + return 0.0 + +cdef PIDOutput _pid(PIDParameters parameters): + """ + A general purpose PID controller implemented in the standard form. + Kp == Kp + Ki == Kp/Ti + Kd == Kp*Td + + In this situation, Kp is a knob to tune the agressiveness, wheras Ti and Td will + change the response time of the system in a predictable way. Lower Ti or Td means + that the system will respond to error more quickly/agressively. + + error deadband: if set will shrink error within to 0.0 + clamp on integral term: helps on saturation problem in I. + derivative smoothing term: reduces high frequency noise in D. + + :param parameters: PID parameters + :return: A PID output struct containing the control output and the error state + """ + + cdef double error = parameters.setpoint - parameters.feedback + + # clamp error that's within the error deadband + if fabs(error) < parameters.error_deadband: + error = 0.0 + + # compute derivative error + cdef double derivative_error = (error - parameters.previous_errors.error) / parameters.dt_seconds + + derivative_error = (1.0 - parameters.derivative_gain_smoothing) * parameters.previous_errors.derivative_error + \ + parameters.derivative_gain_smoothing * derivative_error + + cdef double derivative_error_term = derivative_error * parameters.derivative_time_const + + # update and clamp integral error + integral_error = parameters.previous_errors.integral_error + integral_error += error * parameters.dt_seconds + integral_error = fmax(-parameters.integral_max_clamp, fmin(parameters.integral_max_clamp, integral_error)) + + cdef double integral_error_term = 0.0 + if parameters.integral_time_const != 0: + integral_error_term = integral_error / parameters.integral_time_const + + f = parameters.Kp * (error + integral_error_term + derivative_error_term) + + return PIDOutput(output=f, errors=PIDErrors(error=error, derivative_error=derivative_error, integral_error=integral_error)) - set in gainprm="Kp Ti Td iClamp errBand iSmooth" in mujoco xml. -""" cdef enum USER_DEFINED_ACTUATOR_PARAMS: IDX_PROPORTIONAL_GAIN = 0, IDX_INTEGRAL_TIME_CONSTANT = 1, @@ -24,77 +81,189 @@ cdef enum USER_DEFINED_ACTUATOR_PARAMS: IDX_DERIVATIVE_GAIN_SMOOTHING = 4, IDX_ERROR_DEADBAND = 5, - -cdef enum USER_DEFINED_CONTROLLER_DATA: +cdef enum USER_DEFINED_CONTROLLER_DATA_PID: IDX_INTEGRAL_ERROR = 0, IDX_LAST_ERROR = 1, IDX_DERIVATIVE_ERROR_LAST = 2, - NUM_USER_DATA_PER_ACT = 3, + NUM_USER_DATA_PER_ACT_PID = 3, + +cdef mjtNum c_pid_bias(const mjModel*m, const mjData*d, int id): + """ + To activate PID, set gainprm="Kp Ti Td iClamp errBand iSmooth" in a general type actuator in mujoco xml + """ + cdef double dt_in_sec = m.opt.timestep + cdef int NGAIN = int(const.NGAIN) + result = _pid(parameters=PIDParameters( + dt_seconds=dt_in_sec, + setpoint=d.ctrl[id], + feedback=d.actuator_length[id], + Kp=m.actuator_gainprm[id * NGAIN + IDX_PROPORTIONAL_GAIN], + error_deadband=m.actuator_gainprm[id * NGAIN + IDX_ERROR_DEADBAND], + integral_max_clamp=m.actuator_gainprm[id * NGAIN + IDX_INTEGRAL_MAX_CLAMP], + integral_time_const=m.actuator_gainprm[id * NGAIN + IDX_INTEGRAL_TIME_CONSTANT], + derivative_gain_smoothing=m.actuator_gainprm[id * NGAIN + IDX_DERIVATIVE_GAIN_SMOOTHING], + derivative_time_const=m.actuator_gainprm[id * NGAIN + IDX_DERIVATIVE_TIME_CONSTANT], + previous_errors=PIDErrors( + error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_LAST_ERROR], + derivative_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR_LAST], + integral_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_INTEGRAL_ERROR], + ))) -cdef mjtNum c_zero_gains(const mjModel* m, const mjData* d, int id) with gil: - return 0.0 + d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_LAST_ERROR] = result.errors.error + d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR_LAST] = result.errors.derivative_error + d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_INTEGRAL_ERROR] = result.errors.integral_error + + f = result.output + cdef double effort_limit_low = m.actuator_forcerange[id * 2] + cdef double effort_limit_high = m.actuator_forcerange[id * 2 + 1] + + if effort_limit_low != 0.0 or effort_limit_high != 0.0: + f = fmax(effort_limit_low, fmin(effort_limit_high, f)) + return f -cdef mjtNum c_pid_bias(const mjModel* m, const mjData* d, int id) with gil: +cdef enum USER_DEFINED_ACTUATOR_PARAMS_CASCADE: + IDX_CAS_PROPORTIONAL_GAIN = 0, + IDX_CAS_INTEGRAL_TIME_CONSTANT = 1, + IDX_CAS_INTEGRAL_MAX_CLAMP = 2, + IDX_CAS_PROPORTIONAL_GAIN_V = 3, + IDX_CAS_INTEGRAL_TIME_CONSTANT_V = 4, + IDX_CAS_INTEGRAL_MAX_CLAMP_V = 5, + IDX_CAS_EMA_SMOOTH_V = 6, + IDX_CAS_MAX_VEL = 7, + +cdef enum USER_DEFINED_CONTROLLER_DATA_CASCADE: + IDX_CAS_INTEGRAL_ERROR = 0, + IDX_CAS_INTEGRAL_ERROR_V = 1, + IDX_CAS_STORED_EMA_SMOOTH_V = 2, + NUM_USER_DATA_PER_ACT_CAS = 3, + +cdef NUM_USER_DATA_PER_ACT = max(int(NUM_USER_DATA_PER_ACT_PID), int(NUM_USER_DATA_PER_ACT_CAS)) + +cdef mjtNum c_pi_cascade_bias(const mjModel*m, const mjData*d, int id): + """ + A cascaded PID controller implementation that can control position + and velocity setpoints for a given actuator. + + Currently, the cascaded controller is implemented as PI controllers for both the position and + the velocity terms and therefore require Kp, Ti and max_clamp_integral parameters to be + specified for both. Additionally, an exponential moving average smoothing is applied to the + velocity component for added stability, and the controller is gravity compensated via the + `qfrc_bias` term. + + These are provided as part of gainprm in the following order: `gainprm="Kp_pos Ti_pos + max_clamp_pos Kp_vel Ti_vel max_clamp_vel ema_smooth_factor max_vel"`, where ema_smooth_factor + denotes the EMA smoothing factor and max_vel is a velocity constraint applied to the velocity setpoint. + """ cdef double dt_in_sec = m.opt.timestep - cdef double error = d.ctrl[id] - d.actuator_length[id] cdef int NGAIN = int(const.NGAIN) - cdef double Kp = m.actuator_gainprm[id * NGAIN + IDX_PROPORTIONAL_GAIN] - cdef double error_deadband = m.actuator_gainprm[id * NGAIN + IDX_ERROR_DEADBAND] - cdef double integral_max_clamp = m.actuator_gainprm[id * NGAIN + IDX_INTEGRAL_MAX_CLAMP] - cdef double integral_time_const = m.actuator_gainprm[id * NGAIN + IDX_INTEGRAL_TIME_CONSTANT] - cdef double derivative_gain_smoothing = \ - m.actuator_gainprm[id * NGAIN + IDX_DERIVATIVE_GAIN_SMOOTHING] - cdef double derivate_time_const = m.actuator_gainprm[id * NGAIN + IDX_DERIVATIVE_TIME_CONSTANT] + cdef double Kp_cas = m.actuator_gainprm[id * NGAIN + IDX_CAS_PROPORTIONAL_GAIN] - cdef double effort_limit_low = m.actuator_forcerange[id * 2] - cdef double effort_limit_high = m.actuator_forcerange[id * 2 + 1] + if Kp_cas != 0: + # Run a position PID loop and use the result to set a desired velocity signal + pos_output = _pid(parameters=PIDParameters( + dt_seconds=dt_in_sec, + setpoint=d.ctrl[id], + feedback=d.actuator_length[id], + Kp=Kp_cas, + error_deadband=0.0, + integral_max_clamp=m.actuator_gainprm[id * NGAIN + IDX_CAS_INTEGRAL_MAX_CLAMP], + integral_time_const=m.actuator_gainprm[id * NGAIN + IDX_CAS_INTEGRAL_TIME_CONSTANT], + derivative_gain_smoothing=0.0, + derivative_time_const=0.0, + previous_errors=PIDErrors( + error=0.0, + derivative_error=0.0, + integral_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_INTEGRAL_ERROR], + ))) - if fabs(error) < error_deadband: - error = 0.0 + # Save errors + d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_INTEGRAL_ERROR] = pos_output.errors.integral_error + des_vel = pos_output.output + else: + # If P gain on position loop is zero, only use the velocity controller + des_vel = d.ctrl[id] - integral_error = d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_INTEGRAL_ERROR] - integral_error += error * dt_in_sec - integral_error = fmax(-integral_max_clamp, fmin(integral_max_clamp, integral_error)) + # Clamp max angular velocity + max_qvel = m.actuator_gainprm[id * NGAIN + IDX_CAS_MAX_VEL] + des_vel = fmax(-max_qvel, fmin(max_qvel, des_vel)) - last_error = d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_LAST_ERROR] - cdef double derivative_error = (error - last_error) / dt_in_sec + # Apply Exponential Moving Average smoothing to the velocity setpoint + ctrl_ema_vel = d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_STORED_EMA_SMOOTH_V] + smoothing_factor = m.actuator_gainprm[id * NGAIN + IDX_CAS_EMA_SMOOTH_V] - derivative_error_last = d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR_LAST] + smoothed_vel_setpoint = (smoothing_factor * ctrl_ema_vel) + (1 - smoothing_factor) * des_vel - derivative_error = (1.0 - derivative_gain_smoothing) * derivative_error_last + \ - derivative_gain_smoothing * derivative_error + d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_STORED_EMA_SMOOTH_V] = smoothed_vel_setpoint - cdef double integral_error_term = 0.0 - if integral_time_const != 0: - integral_error_term = integral_error / integral_time_const + vel_output = _pid(parameters=PIDParameters( + dt_seconds=dt_in_sec, + setpoint=smoothed_vel_setpoint, + feedback=d.actuator_velocity[id], + Kp=m.actuator_gainprm[id * NGAIN + IDX_CAS_PROPORTIONAL_GAIN_V], + error_deadband=0.0, + integral_max_clamp=m.actuator_gainprm[id * NGAIN + IDX_CAS_INTEGRAL_MAX_CLAMP_V], + integral_time_const=m.actuator_gainprm[id * NGAIN + IDX_CAS_INTEGRAL_TIME_CONSTANT_V], + derivative_gain_smoothing=0.0, + derivative_time_const=0.0, + previous_errors=PIDErrors( + error=0.0, + derivative_error=0.0, + integral_error=d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_INTEGRAL_ERROR_V], + ))) - cdef double derivative_error_term = derivative_error * derivate_time_const + # Save errors + d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_CAS_INTEGRAL_ERROR_V] = vel_output.errors.integral_error - f = Kp * (error + integral_error_term + derivative_error_term) - # print(id, d.ctrl[id], d.actuator_length[id], error, integral_error_term, derivative_error_term, - # derivative_error, dt_in_sec, last_error, integral_error, derivative_error_last, f) + # Limit max torque at the output + cdef double effort_limit_low = m.actuator_forcerange[id * 2] + cdef double effort_limit_high = m.actuator_forcerange[id * 2 + 1] + + f = vel_output.output + + # gravity compensation + f += d.qfrc_bias[id] - d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_LAST_ERROR] = error - d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR_LAST] = derivative_error - d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_INTEGRAL_ERROR] = integral_error - if effort_limit_low != 0.0 or effort_limit_high != 0.0: f = fmax(effort_limit_low, fmin(effort_limit_high, f)) + return f +cdef enum USER_DEFINED_ACTUATOR_DATA: + IDX_CONTROLLER_TYPE = 0 + NUM_ACTUATOR_DATA = 1 + +cdef mjtNum c_custom_bias(const mjModel*m, const mjData*d, int id) with gil: + """ + Switches between PID and Cascaded PI type custom bias computation based on the + defined actuator's actuator_user field. + user="1": Cascade PI + default: PID + :param m: mjModel + :param d: mjData + :param id: actuator ID + :return: Custom actuator force + """ + controller_type = int(m.actuator_user[id * m.nuser_actuator + IDX_CONTROLLER_TYPE]) + + if controller_type == CONTROLLER_TYPE_PI_CASCADE: + return c_pi_cascade_bias(m, d, id) + return c_pid_bias(m, d, id) def set_pid_control(m, d): global mjcb_act_gain global mjcb_act_bias if m.nuserdata < m.nu * NUM_USER_DATA_PER_ACT: - raise Exception('nuserdata is not set large enough to store PID internal states') + raise Exception('nuserdata is not set large enough to store PID internal states.') + + if m.nuser_actuator < m.nu * NUM_ACTUATOR_DATA: + raise Exception('nuser_actuator is not set large enough to store controller types') for i in range(m.nuserdata): d.userdata[i] = 0.0 mjcb_act_gain = c_zero_gains - mjcb_act_bias = c_pid_bias + mjcb_act_bias = c_custom_bias diff --git a/mujoco_py/tests/test_pid.py b/mujoco_py/tests/test_pid.py index 790fd12412f817e7d1424fad2e9130280975e593..fb422d5def7f3c63b4cb432e92c2b9328fa75559 100644 --- a/mujoco_py/tests/test_pid.py +++ b/mujoco_py/tests/test_pid.py @@ -5,7 +5,7 @@ from mujoco_py import (MjSim, load_model_from_xml, cymj) MODEL_XML = """ <mujoco model="inverted pendulum"> - <size nuserdata="100"/> + <size nuserdata="100" nuser_actuator="{nuser_actuator}"/> <compiler inertiafromgeom="true"/> <default> <joint armature="0" damping="1" limited="true"/> @@ -35,6 +35,10 @@ PID_ACTUATOR = """ <general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-100 100" gainprm="200 10 10.0 0.1 0.1 0" joint="hinge" name="a-hinge"/> """ +CASCADED_PID_ACTUATOR = """ + <general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-3 3" gainprm="5 0 0 10 .1 1.5 .97 3" joint="hinge" name="a-hinge" user="1"/> +""" + P_ONLY_ACTUATOR = """ <general ctrlrange='-1 1' gaintype="user" biastype="user" gainprm="200" joint="hinge" name="a-hinge"/> """ @@ -44,51 +48,98 @@ POSITION_ACTUATOR = """ """ -""" - To enable PID control in the mujoco, please - refer to the setting in the PID_ACTUATOR. - - Here we set Kp = 200, Ti = 10, Td = 0.1 (also iClamp = 10.0, dSmooth be 0.1) -""" def test_mj_pid(): - xml = MODEL_XML.format(actuator=PID_ACTUATOR) - model = load_model_from_xml(xml) - sim = MjSim(model) - cymj.set_pid_control(sim.model, sim.data) + """ + To enable PID control in the mujoco, please + refer to the setting in the PID_ACTUATOR. - # pertubation of pole to be unbalanced - init_pos = 0.1 * (random.random() - 0.5) - print('init pos', init_pos) - sim.data.qpos[0] = init_pos + Here we set Kp = 200, Ti = 10, Td = 0.1 (also iClamp = 10.0, dSmooth be 0.1) + """ + random.seed(30) + xml = MODEL_XML.format(actuator=PID_ACTUATOR, nuser_actuator=1) + model = load_model_from_xml(xml) + sim = MjSim(model) + cymj.set_pid_control(sim.model, sim.data) - pos = 0.0 - sim.data.ctrl[0] = pos - print('desire position:', pos) + # pertubation of pole to be unbalanced + init_pos = 0.1 * (random.random() - 0.5) + print('init pos', init_pos) + sim.data.qpos[0] = init_pos - for _ in range(100): - sim.step() + pos = 0.0 + sim.data.ctrl[0] = pos + print('desired position:', pos) - print('final pos', sim.data.qpos[0]) - assert abs(sim.data.qpos[0] - pos) < 0.01 + for _ in range(1000): + sim.step() -""" + print('final pos', sim.data.qpos[0]) + assert abs(sim.data.qpos[0] - pos) < 1e-4 + + +def test_mj_proportional_only(): + """ check new PID control is backward compatible with position control - when only has Kp term. -""" -def test_mj_proptional_only(): - model = load_model_from_xml(MODEL_XML.format(actuator=P_ONLY_ACTUATOR)) - sim = MjSim(model) - cymj.set_pid_control(sim.model, sim.data) - - model2 = load_model_from_xml(MODEL_XML.format(actuator=POSITION_ACTUATOR)) - sim2 = MjSim(model2) - - init_pos = 0.1 * (random.random() - 0.5) - sim.data.qpos[0] = sim2.data.qpos[0] = init_pos - sim.data.ctrl[0] = sim2.data.ctrl[0] = 0 - - for i in range(2000): - print(i, sim.data.qpos[0], sim2.data.qpos[0]) - sim.step() - sim2.step() - assert abs(sim.data.qpos[0] - sim2.data.qpos[0]) <= 1e-7, "%d step violates" % i + when only has Kp term. + """ + model = load_model_from_xml(MODEL_XML.format(actuator=P_ONLY_ACTUATOR, nuser_actuator=1)) + sim = MjSim(model) + cymj.set_pid_control(sim.model, sim.data) + + model2 = load_model_from_xml(MODEL_XML.format(actuator=POSITION_ACTUATOR, nuser_actuator=1)) + sim2 = MjSim(model2) + + init_pos = 0.1 * (random.random() - 0.5) + sim.data.qpos[0] = sim2.data.qpos[0] = init_pos + sim.data.ctrl[0] = sim2.data.ctrl[0] = 0 + + for i in range(2000): + print(i, sim.data.qpos[0], sim2.data.qpos[0]) + sim.step() + sim2.step() + assert abs(sim.data.qpos[0] - sim2.data.qpos[0]) <= 1e-7, "%d step violates" % i + + +def test_cascaded_pid(): + """ + To enable Cascaded PID control in the mujoco, please + refer to the setting in the CASCADED_PID_ACTUATOR. user param should be set to 1 + + Here we set Kp = 5 for the position control loop and Kp = 10 for the velocity control + Ti = 0.1 and integral_max_clamp=1.5. + EMA smoothing constant is set to 0.97, and velocity limit is 3 rad/s + """ + random.seed(30) + xml = MODEL_XML.format(actuator=CASCADED_PID_ACTUATOR, nuser_actuator=1) + model = load_model_from_xml(xml) + sim = MjSim(model) + cymj.set_pid_control(sim.model, sim.data) + + # pertubation of pole to be unbalanced + init_pos = 0.1 * (random.random() - 1.0) + print('init pos', init_pos) + sim.data.qpos[0] = init_pos + + desired_pos = 0.0 + sim.data.ctrl[0] = desired_pos + print('desired position:', desired_pos) + + max_torque = 0 + + for _ in range(1000): + sim.step() + if abs(sim.data.actuator_force[0]) > max_torque: + max_torque = abs(sim.data.actuator_force[0]) + + print('final pos', sim.data.qpos[0]) + assert abs(sim.data.qpos[0] - desired_pos) < 1e-3 + assert max_torque <= 3 # torque limit set on the actuator + + +def test_mjsize_exception(): + """nuser_actuator must be set large enough to use custom controllers.""" + xml = MODEL_XML.format(actuator=CASCADED_PID_ACTUATOR, nuser_actuator=0) + model = load_model_from_xml(xml) + sim = MjSim(model) + with pytest.raises(Exception): + cymj.set_pid_control(sim.model, sim.data) diff --git a/mujoco_py/version.py b/mujoco_py/version.py index 4c5dffa74bbeee9cb19acfac4e08ea80ade2b44f..72098a78b95d8999c73b94c1e31b575e55fef932 100644 --- a/mujoco_py/version.py +++ b/mujoco_py/version.py @@ -1,6 +1,6 @@ __all__ = ['__version__', 'get_version'] -version_info = (2, 0, 2, 10) +version_info = (2, 0, 2, 11) # format: # ('mujoco_major', 'mujoco_minor', 'mujoco_py_major', 'mujoco_py_minor')