diff --git a/mujoco_py/cymj.pyx b/mujoco_py/cymj.pyx index 953233c2d81bc4306c218c4982008b1b2beb217e..45cbce3abf30f189b6771e81ebc4cbe951aac2ea 100644 --- a/mujoco_py/cymj.pyx +++ b/mujoco_py/cymj.pyx @@ -14,7 +14,6 @@ from tempfile import TemporaryDirectory import numpy as np from cython cimport view from cython.parallel import parallel, prange - from mujoco_py.generated import const include "generated/wrappers.pxi" @@ -23,6 +22,7 @@ include "mjsim.pyx" include "mjsimstate.pyx" include "mjrendercontext.pyx" include "mjbatchrenderer.pyx" +include "mjpid.pyx" cdef extern from "gl/glshim.h": diff --git a/mujoco_py/mjpid.pyx b/mujoco_py/mjpid.pyx new file mode 100644 index 0000000000000000000000000000000000000000..708f018bc8c58d8eee5bf4bd031e8ae0337ba9ad --- /dev/null +++ b/mujoco_py/mjpid.pyx @@ -0,0 +1,97 @@ +from libc.math cimport fabs, fmax, fmin +from mujoco_py.generated import const + +""" + Kp == Kp + Ki == Kp/Ti + Kd == Kp*Td + + In this situation, Kp is a knob to tune the agressiveness, wheras Ti and Td will + change the response time of the system in a predictable way. Lower Ti or Td means + that the system will respond to error more quickly/agressively. + + error deadband: if set will shrink error within to 0.0 + clamp on integral term: helps on saturation problem in I. + derivative smoothing term: reduces high frequency noise in D. + + set in gainprm="Kp Ti Td iClamp errBand iSmooth" in mujoco xml. +""" +cdef enum USER_DEFINED_ACTUATOR_PARAMS: + IDX_PROPORTIONAL_GAIN = 0, + IDX_INTEGRAL_TIME_CONSTANT = 1, + IDX_INTEGRAL_MAX_CLAMP = 2, + IDX_DERIVATIVE_TIME_CONSTANT = 3, + IDX_DERIVATIVE_GAIN_SMOOTHING = 4, + IDX_ERROR_DEADBAND = 5, + + +cdef enum USER_DEFINED_CONTROLLER_DATA: + IDX_INTEGRAL_ERROR = 0, + IDX_LAST_ERROR = 1, + IDX_DERIVATIVE_ERROR_LAST = 2, + NUM_USER_DATA_PER_ACT = 3, + + +cdef mjtNum c_zero_gains(const mjModel* m, const mjData* d, int id) with gil: + return 0.0 + + +cdef mjtNum c_pid_bias(const mjModel* m, const mjData* d, int id) with gil: + cdef double dt_in_sec = m.opt.timestep + cdef double error = d.ctrl[id] - d.actuator_length[id] + cdef int NGAIN = int(const.NGAIN) + + cdef double Kp = m.actuator_gainprm[id * NGAIN + IDX_PROPORTIONAL_GAIN] + cdef double error_deadband = m.actuator_gainprm[id * NGAIN + IDX_ERROR_DEADBAND] + cdef double integral_max_clamp = m.actuator_gainprm[id * NGAIN + IDX_INTEGRAL_MAX_CLAMP] + cdef double integral_time_const = m.actuator_gainprm[id * NGAIN + IDX_INTEGRAL_TIME_CONSTANT] + cdef double derivative_gain_smoothing = \ + m.actuator_gainprm[id * NGAIN + IDX_DERIVATIVE_GAIN_SMOOTHING] + cdef double derivate_time_const = m.actuator_gainprm[id * NGAIN + IDX_DERIVATIVE_TIME_CONSTANT] + + cdef double effort_limit_low = m.actuator_forcerange[id * 2] + cdef double effort_limit_high = m.actuator_forcerange[id * 2 + 1] + + if fabs(error) < error_deadband: + error = 0.0 + + integral_error = d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_INTEGRAL_ERROR] + integral_error += error * dt_in_sec + integral_error = fmax(-integral_max_clamp, fmin(integral_max_clamp, integral_error)) + + last_error = d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_LAST_ERROR] + cdef double derivative_error = (error - last_error) / dt_in_sec + + derivative_error_last = d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR_LAST] + + derivative_error = (1.0 - derivative_gain_smoothing) * derivative_error_last + \ + derivative_gain_smoothing * derivative_error + + cdef double integral_error_term = 0.0 + if integral_time_const != 0: + integral_error_term = integral_error / integral_time_const + + cdef double derivative_error_term = derivative_error * derivate_time_const + + f = Kp * (error + integral_error_term + derivative_error_term) + # print(id, d.ctrl[id], d.actuator_length[id], error, integral_error_term, derivative_error_term, + # derivative_error, dt_in_sec, last_error, integral_error, derivative_error_last, f) + + d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_LAST_ERROR] = error + d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_DERIVATIVE_ERROR_LAST] = derivative_error + d.userdata[id * NUM_USER_DATA_PER_ACT + IDX_INTEGRAL_ERROR] = integral_error + return fmax(effort_limit_low, fmin(effort_limit_high, f)) + + +def set_pid_control(m, d): + global mjcb_act_gain + global mjcb_act_bias + + if m.nuserdata < m.nu * NUM_USER_DATA_PER_ACT: + raise Exception('nuserdata is not set large enough to store PID internal states') + + for i in range(m.nuserdata): + d.userdata[i] = 0.0 + + mjcb_act_gain = c_zero_gains + mjcb_act_bias = c_pid_bias diff --git a/mujoco_py/pxd/mujoco.pxd b/mujoco_py/pxd/mujoco.pxd index 304abad8f7679c1e25081318e96b292609a5d37f..5d993c9fa3131fd18308a9cd5205f9bfe5f827ee 100644 --- a/mujoco_py/pxd/mujoco.pxd +++ b/mujoco_py/pxd/mujoco.pxd @@ -26,8 +26,8 @@ cdef extern from "mujoco.h" nogil: # mjfSensor mjcb_sensor; # mjfTime mjcb_time; # mjfAct mjcb_act_dyn; - # mjfAct mjcb_act_gain; - # mjfAct mjcb_act_bias; + mjfAct mjcb_act_gain; + mjfAct mjcb_act_bias; # mjfSolImp mjcb_sol_imp; # mjfSolRef mjcb_sol_ref; # @@ -88,19 +88,19 @@ cdef extern from "mujoco.h" nogil: # Parse XML file in MJCF or URDF format, compile it, return low-level model. # If vfs is not NULL, look up files in vfs before reading from disk. # If error is not NULL, it must have size error_sz. - mjModel* mj_loadXML(const char* filename, const mjVFS* vfs, + mjModel* mj_loadXML(const char* filename, const mjVFS* vfs, char* error, int error_sz); # Update XML data structures with info from low-level model, save as MJCF. # If error is not NULL, it must have size error_sz. - int mj_saveLastXML(const char* filename, const mjModel* m, + int mj_saveLastXML(const char* filename, const mjModel* m, char* error, int error_sz); # Free last XML model if loaded. Called internally at each load. void mj_freeLastXML(); # Print internal XML schema as plain text or HTML, with style-padding or . - int mj_printSchema(const char* filename, char* buffer, int buffer_sz, + int mj_printSchema(const char* filename, char* buffer, int buffer_sz, int flg_html, int flg_pad); @@ -122,7 +122,7 @@ cdef extern from "mujoco.h" nogil: void mj_inverse(const mjModel* m, mjData* d); # Forward dynamics with skip; skipstage is mjtStage. - void mj_forwardSkip(const mjModel* m, mjData* d, + void mj_forwardSkip(const mjModel* m, mjData* d, int skipstage, int skipsensorenergy); # Inverse dynamics with skip; skipstage is mjtStage. @@ -200,17 +200,17 @@ cdef extern from "mujoco.h" nogil: #--------------------- Printing ------------------------------------------------------- # Print model to text file. - void mj_printModel(const mjModel* m, const char* filename); + void mj_printModel(const mjModel* m, const char* filename); # Print data to text file. - void mj_printData(const mjModel* m, mjData* d, const char* filename); + void mj_printData(const mjModel* m, mjData* d, const char* filename); # Print matrix to screen. void mju_printMat(const mjtNum* mat, int nr, int nc); # Print sparse matrix to screen. void mju_printMatSparse(const mjtNum* mat, int nr, - const int* rownnz, const int* rowadr, + const int* rownnz, const int* rowadr, const int* colind); @@ -351,35 +351,35 @@ cdef extern from "mujoco.h" nogil: int mj_isDual(const mjModel* m); # Multiply dense or sparse constraint Jacobian by vector. - void mj_mulJacVec(const mjModel* m, mjData* d, + void mj_mulJacVec(const mjModel* m, mjData* d, mjtNum* res, const mjtNum* vec); # Multiply dense or sparse constraint Jacobian transpose by vector. void mj_mulJacTVec(const mjModel* m, mjData* d, mjtNum* res, const mjtNum* vec); # Compute 3/6-by-nv end-effector Jacobian of global point attached to given body. - void mj_jac(const mjModel* m, const mjData* d, + void mj_jac(const mjModel* m, const mjData* d, mjtNum* jacp, mjtNum* jacr, const mjtNum point[3], int body); # Compute body frame end-effector Jacobian. - void mj_jacBody(const mjModel* m, const mjData* d, + void mj_jacBody(const mjModel* m, const mjData* d, mjtNum* jacp, mjtNum* jacr, int body); # Compute body center-of-mass end-effector Jacobian. - void mj_jacBodyCom(const mjModel* m, const mjData* d, + void mj_jacBodyCom(const mjModel* m, const mjData* d, mjtNum* jacp, mjtNum* jacr, int body); # Compute geom end-effector Jacobian. - void mj_jacGeom(const mjModel* m, const mjData* d, + void mj_jacGeom(const mjModel* m, const mjData* d, mjtNum* jacp, mjtNum* jacr, int geom); # Compute site end-effector Jacobian. - void mj_jacSite(const mjModel* m, const mjData* d, + void mj_jacSite(const mjModel* m, const mjData* d, mjtNum* jacp, mjtNum* jacr, int site); # Compute translation end-effector Jacobian of point, and rotation Jacobian of axis. - void mj_jacPointAxis(const mjModel* m, mjData* d, - mjtNum* jacPoint, mjtNum* jacAxis, + void mj_jacPointAxis(const mjModel* m, mjData* d, + mjtNum* jacPoint, mjtNum* jacAxis, const mjtNum point[3], const mjtNum axis[3], int body); # Get id of object with specified name, return -1 if not found; type is mjtObj. @@ -399,20 +399,20 @@ cdef extern from "mujoco.h" nogil: # Add inertia matrix to destination matrix. # Destination can be sparse uncompressed, or dense when all int* are NULL - void mj_addM(const mjModel* m, mjData* d, mjtNum* dst, + void mj_addM(const mjModel* m, mjData* d, mjtNum* dst, int* rownnz, int* rowadr, int* colind); # Apply cartesian force and torque (outside xfrc_applied mechanism). - void mj_applyFT(const mjModel* m, mjData* d, - const mjtNum* force, const mjtNum* torque, + void mj_applyFT(const mjModel* m, mjData* d, + const mjtNum* force, const mjtNum* torque, const mjtNum* point, int body, mjtNum* qfrc_target); # Compute object 6D velocity in object-centered frame, world/local orientation. - void mj_objectVelocity(const mjModel* m, const mjData* d, + void mj_objectVelocity(const mjModel* m, const mjData* d, int objtype, int objid, mjtNum* res, int flg_local); # Compute object 6D acceleration in object-centered frame, world/local orientation. - void mj_objectAcceleration(const mjModel* m, const mjData* d, + void mj_objectAcceleration(const mjModel* m, const mjData* d, int objtype, int objid, mjtNum* res, int flg_local); # Extract 6D force:torque for one contact, in contact frame. @@ -448,7 +448,7 @@ cdef extern from "mujoco.h" nogil: # Return geomid and distance (x) to nearest surface, or -1 if no intersection. # geomgroup, flg_static are as in mjvOption; geomgroup==NULL skips group exclusion. mjtNum mj_ray(const mjModel* m, const mjData* d, const mjtNum* pnt, const mjtNum* vec, - const mjtByte* geomgroup, mjtByte flg_static, int bodyexclude, + const mjtByte* geomgroup, mjtByte flg_static, int bodyexclude, int* geomid); # Interect ray with hfield, return nearest distance or -1 if no intersection. @@ -460,7 +460,7 @@ cdef extern from "mujoco.h" nogil: const mjtNum* pnt, const mjtNum* vec); # Interect ray with pure geom, return nearest distance or -1 if no intersection. - mjtNum mju_rayGeom(const mjtNum* pos, const mjtNum* mat, const mjtNum* size, + mjtNum mju_rayGeom(const mjtNum* pos, const mjtNum* mat, const mjtNum* size, const mjtNum* pnt, const mjtNum* vec, int geomtype); @@ -473,11 +473,11 @@ cdef extern from "mujoco.h" nogil: void mjv_defaultPerturb(mjvPerturb* pert); # Transform pose from room to model space. - void mjv_room2model(mjtNum* modelpos, mjtNum* modelquat, const mjtNum* roompos, + void mjv_room2model(mjtNum* modelpos, mjtNum* modelquat, const mjtNum* roompos, const mjtNum* roomquat, const mjvScene* scn); # Transform pose from model to room space. - void mjv_model2room(mjtNum* roompos, mjtNum* roomquat, const mjtNum* modelpos, + void mjv_model2room(mjtNum* roompos, mjtNum* roomquat, const mjtNum* modelpos, const mjtNum* modelquat, const mjvScene* scn); # Get camera info in model space; average left and right OpenGL cameras. @@ -495,11 +495,11 @@ cdef extern from "mujoco.h" nogil: void mjv_alignToCamera(mjtNum* res, const mjtNum* vec, const mjtNum* forward); # Move camera with mouse; action is mjtMouse. - void mjv_moveCamera(const mjModel* m, int action, mjtNum reldx, mjtNum reldy, + void mjv_moveCamera(const mjModel* m, int action, mjtNum reldx, mjtNum reldy, const mjvScene* scn, mjvCamera* cam); # Move perturb object with mouse; action is mjtMouse. - void mjv_movePerturb(const mjModel* m, const mjData* d, int action, mjtNum reldx, + void mjv_movePerturb(const mjModel* m, const mjData* d, int action, mjtNum reldx, mjtNum reldy, const mjvScene* scn, mjvPerturb* pert); # Move model with mouse; action is mjtMouse. @@ -507,12 +507,12 @@ cdef extern from "mujoco.h" nogil: const mjtNum* roomup, mjvScene* scn); # Copy perturb pos,quat from selected body; set scale for perturbation. - void mjv_initPerturb(const mjModel* m, const mjData* d, + void mjv_initPerturb(const mjModel* m, const mjData* d, const mjvScene* scn, mjvPerturb* pert); # Set perturb pos,quat in d->mocap when selected body is mocap, and in d->qpos otherwise. # Write d->qpos only if flg_paused and subtree root for selected body has free joint. - void mjv_applyPerturbPose(const mjModel* m, mjData* d, const mjvPerturb* pert, + void mjv_applyPerturbPose(const mjModel* m, mjData* d, const mjvPerturb* pert, int flg_paused); # Set perturb force,torque in d->xfrc_applied, if selected body is dynamic. @@ -540,8 +540,8 @@ cdef extern from "mujoco.h" nogil: # Set (type, size, pos, mat) for connector-type geom between given points. # Assume that mjv_initGeom was already called to set all other properties. - void mjv_makeConnector(mjvGeom* geom, int type, mjtNum width, - mjtNum a0, mjtNum a1, mjtNum a2, + void mjv_makeConnector(mjvGeom* geom, int type, mjtNum width, + mjtNum a0, mjtNum a1, mjtNum a2, mjtNum b0, mjtNum b1, mjtNum b2); # Set default abstract scene. @@ -554,11 +554,11 @@ cdef extern from "mujoco.h" nogil: void mjv_freeScene(mjvScene* scn); # Update entire scene given model state. - void mjv_updateScene(const mjModel* m, mjData* d, const mjvOption* opt, + void mjv_updateScene(const mjModel* m, mjData* d, const mjvOption* opt, const mjvPerturb* pert, mjvCamera* cam, int catmask, mjvScene* scn); # Add geoms from selected categories to existing scene. - void mjv_addGeoms(const mjModel* m, mjData* d, const mjvOption* opt, + void mjv_addGeoms(const mjModel* m, mjData* d, const mjvOption* opt, const mjvPerturb* pert, int catmask, mjvScene* scn); # Make list of lights. @@ -880,7 +880,7 @@ cdef extern from "mujoco.h" nogil: # Coordinate transform of 6D motion or force vector in rotation:translation format. # rotnew2old is 3-by-3, NULL means no rotation; flg_force specifies force or motion type. void mju_transformSpatial(mjtNum res[6], const mjtNum vec[6], int flg_force, - const mjtNum newpos[3], const mjtNum oldpos[3], + const mjtNum newpos[3], const mjtNum oldpos[3], const mjtNum rotnew2old[9]); @@ -904,23 +904,23 @@ cdef extern from "mujoco.h" nogil: const int* rownnz, const int* rowadr, const int* colind); # Multiply sparse matrix and dense vector: res = mat * vec. - void mju_mulMatVecSparse(mjtNum* res, const mjtNum* mat, const mjtNum* vec, int nr, + void mju_mulMatVecSparse(mjtNum* res, const mjtNum* mat, const mjtNum* vec, int nr, const int* rownnz, const int* rowadr, const int* colind); # Compress layout of sparse matrix. - void mju_compressSparse(mjtNum* mat, int nr, int nc, + void mju_compressSparse(mjtNum* mat, int nr, int nc, int* rownnz, int* rowadr, int* colind); # Set dst = a*dst + b*src, return nnz of result, modify dst sparsity pattern as needed. # Both vectors are sparse. The required scratch space is 2*n. int mju_combineSparse(mjtNum* dst, const mjtNum* src, int n, mjtNum a, mjtNum b, - int dst_nnz, int src_nnz, int* dst_ind, const int* src_ind, + int dst_nnz, int src_nnz, int* dst_ind, const int* src_ind, mjtNum* scratch, int nscratch); # Set res = matT * diag * mat if diag is not NULL, and res = matT * mat otherwise. # The required scratch space is 3*nc. The result has uncompressed layout. - void mju_sqrMatTDSparse(mjtNum* res, const mjtNum* mat, const mjtNum* matT, - const mjtNum* diag, int nr, int nc, + void mju_sqrMatTDSparse(mjtNum* res, const mjtNum* mat, const mjtNum* matT, + const mjtNum* diag, int nr, int nc, int* res_rownnz, int* res_rowadr, int* res_colind, const int* rownnz, const int* rowadr, const int* colind, const int* rownnzT, const int* rowadrT, const int* colindT, @@ -974,8 +974,8 @@ cdef extern from "mujoco.h" nogil: #--------------------- Poses ---------------------------------------------------------- # Multiply two poses. - void mju_mulPose(mjtNum posres[3], mjtNum quatres[4], - const mjtNum pos1[3], const mjtNum quat1[4], + void mju_mulPose(mjtNum posres[3], mjtNum quatres[4], + const mjtNum pos1[3], const mjtNum quat1[4], const mjtNum pos2[3], const mjtNum quat2[4]); # Negate pose. diff --git a/mujoco_py/tests/test_pid.py b/mujoco_py/tests/test_pid.py new file mode 100644 index 0000000000000000000000000000000000000000..4d2a51d6a86d25a53e7d5dec6f864c949e31c561 --- /dev/null +++ b/mujoco_py/tests/test_pid.py @@ -0,0 +1,94 @@ +import numpy as np +import pytest +import random +from mujoco_py import (MjSim, load_model_from_xml, cymj) + +MODEL_XML = """ +<mujoco model="inverted pendulum"> + <size nuserdata="100"/> + <compiler inertiafromgeom="true"/> + <default> + <joint armature="0" damping="1" limited="true"/> + <geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/> + <tendon/> + <motor ctrlrange="-3 3"/> + </default> + <option gravity="0 0 -9.81" integrator="RK4" timestep="0.001"/> + <size nstack="3000"/> + <worldbody> + <geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/> + <body name="cart" pos="0 0 0"> + <geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/> + <body name="pole" pos="0 0 0"> + <joint axis="0 1 0" name="hinge" pos="0 0 0" range="-90 90" type="hinge"/> + <geom fromto="0 0 0 0.001 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.049 0.3" type="capsule"/> + </body> + </body> + </worldbody> + <actuator> + {actuator} + </actuator> +</mujoco> +""" + +PID_ACTUATOR = """ + <general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-100 100" gainprm="200 10 10.0 0.1 0.1 0" joint="hinge" name="a-hinge"/> +""" + +P_ONLY_ACTUATOR = """ + <general ctrlrange='-1 1' gaintype="user" biastype="user" forcerange="-100 100" gainprm="200" joint="hinge" name="a-hinge"/> +""" + +POSITION_ACTUATOR = """ + <position ctrlrange='-1 1' forcerange="-100 100" kp=200 joint="hinge" name="a-hinge"/> +""" + + +""" + To enable PID control in the mujoco, please + refer to the setting in the PID_ACTUATOR. + + Here we set Kp = 200, Ti = 10, Td = 0.1 (also iClamp = 10.0, dSmooth be 0.1) +""" +def test_mj_pid(): + xml = MODEL_XML.format(actuator=PID_ACTUATOR) + model = load_model_from_xml(xml) + sim = MjSim(model) + cymj.set_pid_control(sim.model, sim.data) + + # pertubation of pole to be unbalanced + init_pos = 0.1 * (random.random() - 0.5) + print('init pos', init_pos) + sim.data.qpos[0] = init_pos + + pos = 0.0 + sim.data.ctrl[0] = pos + print('desire position:', pos) + + for _ in range(100): + sim.step() + + print('final pos', sim.data.qpos[0]) + assert abs(sim.data.qpos[0] - pos) < 0.01 + +""" + check new PID control is backward compatible with position control + when only has Kp term. +""" +def test_mj_proptional_only(): + model = load_model_from_xml(MODEL_XML.format(actuator=P_ONLY_ACTUATOR)) + sim = MjSim(model) + cymj.set_pid_control(sim.model, sim.data) + + model2 = load_model_from_xml(MODEL_XML.format(actuator=POSITION_ACTUATOR)) + sim2 = MjSim(model2) + + init_pos = 0.1 * (random.random() - 0.5) + sim.data.qpos[0] = sim2.data.qpos[0] = init_pos + sim.data.ctrl[0] = sim2.data.ctrl[0] = 0 + + for i in range(20): + print(i, sim.data.qpos[0], sim2.data.qpos[0]) + sim.step() + sim2.step() + assert abs(sim.data.qpos[0] - sim2.data.qpos[0]) <= 1e-5, "%d step violates" % i diff --git a/mujoco_py/version.py b/mujoco_py/version.py index ce0a1039c5a47daae9ec3121186ef09e34ffa570..b3bcd4808f55d11f524327c7cb261a4a5b032172 100644 --- a/mujoco_py/version.py +++ b/mujoco_py/version.py @@ -1,6 +1,6 @@ __all__ = ['__version__', 'get_version'] -version_info = (2, 0, 2, 5) +version_info = (2, 0, 2, 6) # format: # ('mujoco_major', 'mujoco_minor', 'mujoco_py_major', 'mujoco_py_minor')