From dea45c7c29ea064de974a3b9b1ab45977a2dbd75 Mon Sep 17 00:00:00 2001 From: He-chien Tsai <depot051@gmail.com> Date: Sat, 19 Mar 2016 08:52:40 +0800 Subject: [PATCH] python 3 compat init --- examples/animation.py | 4 ++-- examples/hmm-EM.py | 7 +++--- examples/hmm.py | 11 +++++----- examples/hsmm.py | 7 +++--- examples/svi.py | 14 ++++++------ pyhsmm/basic/__init__.py | 6 ++--- pyhsmm/basic/abstractions.py | 5 +++-- pyhsmm/internals/hmm_states.py | 27 ++++++++++++----------- pyhsmm/internals/hsmm_inb_states.py | 23 +++++++++---------- pyhsmm/internals/hsmm_states.py | 21 +++++++++--------- pyhsmm/internals/transitions.py | 3 ++- pyhsmm/models.py | 34 +++++++++++++++++------------ pyhsmm/util/__init__.py | 2 +- pyhsmm/util/general.py | 21 ++++++++++-------- pyhsmm/util/profiling.py | 11 +++++----- pyhsmm/util/stats.py | 6 ++--- pyhsmm/util/text.py | 7 +++--- setup.py | 6 ++--- tests/test_hmm_geweke.py | 21 +++++++++--------- tests/test_hmm_likelihood.py | 1 + 20 files changed, 127 insertions(+), 110 deletions(-) diff --git a/examples/animation.py b/examples/animation.py index 9f7170c..cd591c4 100644 --- a/examples/animation.py +++ b/examples/animation.py @@ -1,4 +1,5 @@ from __future__ import division +from builtins import range import os import numpy as np import numpy.random as npr @@ -7,7 +8,6 @@ plt.ion() npr.seed(0) import pyhsmm -from pyhsmm.util.text import progprint_xrange ############### # load data # @@ -32,7 +32,7 @@ obs_hypparams = {'mu_0':np.zeros(obs_dim), # instantiate a Sticky-HDP-HMM obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) - for state in xrange(Nmax)] + for state in range(Nmax)] model = pyhsmm.models.WeakLimitStickyHDPHMM( kappa=50.,alpha=6.,gamma=6.,init_state_concentration=1., obs_distns=obs_distns) diff --git a/examples/hmm-EM.py b/examples/hmm-EM.py index 08b21d6..9294de7 100644 --- a/examples/hmm-EM.py +++ b/examples/hmm-EM.py @@ -1,4 +1,5 @@ from __future__ import division +from builtins import range import numpy as np np.seterr(divide='ignore') # these warnings are usually harmless for this code from matplotlib import pyplot as plt @@ -24,7 +25,7 @@ obs_hypparams = {'mu_0':np.zeros(obs_dim), 'kappa_0':0.25, 'nu_0':obs_dim+2} -obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(N)] +obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(N)] # Build the HMM model that will represent the fitmodel fitmodel = pyhsmm.models.HMM( @@ -32,7 +33,7 @@ fitmodel = pyhsmm.models.HMM( obs_distns=obs_distns) fitmodel.add_data(data) -print 'Gibbs sampling for initialization' +print('Gibbs sampling for initialization') for idx in progprint_xrange(25): fitmodel.resample_model() @@ -41,7 +42,7 @@ plt.figure() fitmodel.plot() plt.gcf().suptitle('Gibbs-sampled initialization') -print 'EM' +print('EM') likes = fitmodel.EM_fit() diff --git a/examples/hmm.py b/examples/hmm.py index 5df1ac6..e15810e 100644 --- a/examples/hmm.py +++ b/examples/hmm.py @@ -1,4 +1,5 @@ from __future__ import division +from builtins import range import numpy as np np.seterr(divide='ignore') # these warnings are usually harmless for this code from matplotlib import pyplot as plt @@ -9,8 +10,7 @@ matplotlib.rcParams['font.size'] = 8 import pyhsmm from pyhsmm.util.text import progprint_xrange -print \ -''' +print(''' This demo shows how HDP-HMMs can fail when the underlying data has state persistence without some kind of temporal regularization (in the form of a sticky bias or duration modeling): without setting the number of states to be @@ -18,8 +18,7 @@ the correct number a priori, lots of extra states can be intsantiated. BUT the effect is much more relevant on real data (when the data doesn't exactly fit the model). Maybe this demo should use multinomial emissions... -''' - +''') ############### # load data # ############### @@ -42,7 +41,7 @@ obs_hypparams = {'mu_0':np.zeros(obs_dim), ### HDP-HMM without the sticky bias -obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(Nmax)] +obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax)] posteriormodel = pyhsmm.models.WeakLimitHDPHMM(alpha=6.,gamma=6.,init_state_concentration=1., obs_distns=obs_distns) posteriormodel.add_data(data) @@ -55,7 +54,7 @@ plt.gcf().suptitle('HDP-HMM sampled model after 100 iterations') ### Sticky-HDP-HMM -obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(Nmax)] +obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax)] posteriormodel = pyhsmm.models.WeakLimitStickyHDPHMM( kappa=50.,alpha=6.,gamma=6.,init_state_concentration=1., obs_distns=obs_distns) diff --git a/examples/hsmm.py b/examples/hsmm.py index b3834e7..f39149f 100644 --- a/examples/hsmm.py +++ b/examples/hsmm.py @@ -1,4 +1,4 @@ -from __future__ import division + import numpy as np np.seterr(divide='ignore') # these warnings are usually harmless for this code from matplotlib import pyplot as plt @@ -9,12 +9,11 @@ from pyhsmm.util.text import progprint_xrange SAVE_FIGURES = False -print \ -''' +print(''' This demo shows the HDP-HSMM in action. Its iterations are slower than those for the (Sticky-)HDP-HMM, but explicit duration modeling can be a big advantage for conditioning the prior or for discovering structure in data. -''' +''') ############### # load data # diff --git a/examples/svi.py b/examples/svi.py index 6da60d5..2547ae5 100644 --- a/examples/svi.py +++ b/examples/svi.py @@ -1,4 +1,4 @@ -from __future__ import division + import numpy as np from numpy import newaxis as na from matplotlib import pyplot as plt @@ -14,19 +14,19 @@ datapath = str(join(dirname(__file__),'svi_data.gz')) ### load data if not isfile(datapath): - print 'download svi_data.gz data and put it in examples/' - print 'https://github.com/mattjj/example_data' + print('download svi_data.gz data and put it in examples/') + print('https://github.com/mattjj/example_data') import sys; sys.exit(1) -print 'loading data...' +print('loading data...') alldata = np.loadtxt(datapath) allseqs = np.array_split(alldata,250) datas, heldout = hold_out(allseqs,0.05) training_size = sum(data.shape[0] for data in datas) -print '...done!' +print('...done!') -print '%d total frames' % sum(data.shape[0] for data in alldata) -print 'split into %d training and %d test sequences' % (len(datas),len(heldout)) +print('%d total frames' % sum(data.shape[0] for data in alldata)) +print('split into %d training and %d test sequences' % (len(datas),len(heldout))) ### inference! diff --git a/pyhsmm/basic/__init__.py b/pyhsmm/basic/__init__.py index b379a33..bc4f9b7 100644 --- a/pyhsmm/basic/__init__.py +++ b/pyhsmm/basic/__init__.py @@ -1,3 +1,3 @@ -import models -import distributions -import abstractions +from . import models +from . import distributions +from . import abstractions diff --git a/pyhsmm/basic/abstractions.py b/pyhsmm/basic/abstractions.py index 58a53df..ab8efaf 100644 --- a/pyhsmm/basic/abstractions.py +++ b/pyhsmm/basic/abstractions.py @@ -1,4 +1,6 @@ from __future__ import division +from future.utils import with_metaclass + import abc import numpy as np from matplotlib import pyplot as plt @@ -8,8 +10,7 @@ import pyhsmm from pyhsmm.util.stats import flattendata, sample_discrete, sample_discrete_from_log, combinedata from pyhsmm.util.general import rcumsum -class DurationDistribution(Distribution): - __metaclass__ = abc.ABCMeta +class DurationDistribution(with_metaclass(abc.ABCMeta, Distribution)): # in addition to the methods required by Distribution, we also require a # log_sf implementation diff --git a/pyhsmm/internals/hmm_states.py b/pyhsmm/internals/hmm_states.py index 328ac8a..30ce024 100644 --- a/pyhsmm/internals/hmm_states.py +++ b/pyhsmm/internals/hmm_states.py @@ -1,4 +1,6 @@ from __future__ import division +from builtins import range +from future.utils import with_metaclass import numpy as np from numpy import newaxis as na import abc @@ -16,8 +18,7 @@ from pyhsmm.util.general import rle # Mixins and bases # ###################### -class _StatesBase(object): - __metaclass__ = abc.ABCMeta +class _StatesBase(with_metaclass(abc.ABCMeta, object)): def __init__(self,model,T=None,data=None,stateseq=None, generate=True,initialize_from_prior=True, fixed_stateseq=False): @@ -141,7 +142,7 @@ class _SeparateTransMixin(object): class _PossibleChangepointsMixin(object): def __init__(self,model,data,changepoints=None,**kwargs): changepoints = changepoints if changepoints is not None \ - else [(t,t+1) for t in xrange(data.shape[0])] + else [(t,t+1) for t in range(data.shape[0])] self.changepoints = changepoints self.segmentstarts = np.array([start for start,stop in changepoints],dtype=np.int32) @@ -227,7 +228,7 @@ class HMMStatesPython(_StatesBase): A = self.trans_matrix stateseq = np.zeros(T,dtype=np.int32) - for idx in xrange(T): + for idx in range(T): stateseq[idx] = sample_discrete(nextstate_distn) nextstate_distn = A[stateseq[idx]] @@ -257,7 +258,7 @@ class HMMStatesPython(_StatesBase): betal = np.zeros_like(aBl) - for t in xrange(betal.shape[0]-2,-1,-1): + for t in range(betal.shape[0]-2,-1,-1): betal[t] = logsumexp(Al + betal[t+1] + aBl[t+1],axis=1) np.seterr(**errs) @@ -278,7 +279,7 @@ class HMMStatesPython(_StatesBase): alphal = np.zeros_like(aBl) alphal[0] = np.log(init_state_distn) + aBl[0] - for t in xrange(alphal.shape[0]-1): + for t in range(alphal.shape[0]-1): alphal[t+1] = logsumexp(alphal[t] + Al.T,axis=1) + aBl[t+1] np.seterr(**errs) @@ -300,7 +301,7 @@ class HMMStatesPython(_StatesBase): logtot = 0. betan[-1] = 1. - for t in xrange(T-2,-1,-1): + for t in range(T-2,-1,-1): cmax = aBl[t+1].max() betan[t] = A.dot(betan[t+1] * np.exp(aBl[t+1] - cmax)) norm = betan[t].sum() @@ -327,7 +328,7 @@ class HMMStatesPython(_StatesBase): logtot = 0. in_potential = init_state_distn - for t in xrange(T): + for t in range(T): cmax = aBl[t].max() alphan[t] = in_potential * np.exp(aBl[t] - cmax) norm = alphan[t].sum() @@ -369,7 +370,7 @@ class HMMStatesPython(_StatesBase): stateseq = np.empty(T,dtype=np.int32) nextstate_unsmoothed = init_state_distn - for idx in xrange(T): + for idx in range(T): logdomain = betal[idx] + aBl[idx] logdomain[nextstate_unsmoothed == 0] = -np.inf if np.any(np.isfinite(logdomain)): @@ -392,7 +393,7 @@ class HMMStatesPython(_StatesBase): stateseq = np.empty(T,dtype=np.int32) nextstate_unsmoothed = init_state_distn - for idx in xrange(T): + for idx in range(T): logdomain = aBl[idx] logdomain[nextstate_unsmoothed == 0] = -np.inf stateseq[idx] = sample_discrete(nextstate_unsmoothed * betan * np.exp(logdomain - np.amax(logdomain))) @@ -412,7 +413,7 @@ class HMMStatesPython(_StatesBase): stateseq = np.empty(T,dtype=np.int32) next_potential = np.ones(AT.shape[0]) - for t in xrange(T-1,-1,-1): + for t in range(T-1,-1,-1): stateseq[t] = sample_discrete(next_potential * alphan[t]) next_potential = AT[stateseq[t]] @@ -573,7 +574,7 @@ class HMMStatesPython(_StatesBase): scores = np.zeros_like(aBl) args = np.zeros(aBl.shape,dtype=np.int32) - for t in xrange(scores.shape[0]-2,-1,-1): + for t in range(scores.shape[0]-2,-1,-1): vals = Al + scores[t+1] + aBl[t+1] vals.argmax(axis=1,out=args[t+1]) vals.max(axis=1,out=scores[t]) @@ -588,7 +589,7 @@ class HMMStatesPython(_StatesBase): stateseq = np.empty(T,dtype=np.int32) stateseq[0] = (scores[0] + np.log(init_state_distn) + aBl[0]).argmax() - for idx in xrange(1,T): + for idx in range(1,T): stateseq[idx] = args[idx,stateseq[idx-1]] return stateseq diff --git a/pyhsmm/internals/hsmm_inb_states.py b/pyhsmm/internals/hsmm_inb_states.py index 428051f..e1c6ce2 100644 --- a/pyhsmm/internals/hsmm_inb_states.py +++ b/pyhsmm/internals/hsmm_inb_states.py @@ -1,4 +1,6 @@ from __future__ import division +from builtins import zip, range +from future.utils import with_metaclass import numpy as np import abc import scipy.stats as stats @@ -11,16 +13,15 @@ except ImportError: from ..util.stats import sample_markov from ..util.general import top_eigenvector, cumsum -from hmm_states import HMMStatesPython, HMMStatesEigen, _SeparateTransMixin -from hsmm_states import HSMMStatesEigen +from .hmm_states import HMMStatesPython, HMMStatesEigen, _SeparateTransMixin +from .hsmm_states import HSMMStatesEigen # TODO these classes are currently backed by HMM message passing, but they can # be made much more time and memory efficient. i have the code to do it in some # other branches, but dense matrix multiplies are actually competitive. -class _HSMMStatesIntegerNegativeBinomialBase(HSMMStatesEigen, HMMStatesEigen): - __metaclass__ = abc.ABCMeta +class _HSMMStatesIntegerNegativeBinomialBase(with_metaclass(abc.ABCMeta, HSMMStatesEigen, HMMStatesEigen)): @property def rs(self): @@ -169,7 +170,7 @@ class HSMMStatesIntegerNegativeBinomial(_HSMMStatesIntegerNegativeBinomialBase): pmess = np.zeros(ends[-1]) # betal[-1] is 0 - for t in xrange(T-1,-1,-1): + for t in range(T-1,-1,-1): pmess += self.hmm_aBl[t] betastarl[t] = logsumexp(np.log(foo) + pmess, axis=1) betal[t-1] = logsumexp(Al + betastarl[t], axis=1) @@ -196,7 +197,7 @@ class HSMMStatesIntegerNegativeBinomial(_HSMMStatesIntegerNegativeBinomialBase): self.expected_durations = np.zeros((self.num_states,self.T)) eye = np.eye(self.num_states)/num_r_samples - for i in xrange(num_r_samples): + for i in range(num_r_samples): self.model._resample_from_mf() self.clear_caches() @@ -206,7 +207,7 @@ class HSMMStatesIntegerNegativeBinomial(_HSMMStatesIntegerNegativeBinomialBase): self.expected_transcounts += \ count_transitions(self.stateseq_norep,minlength=self.num_states)\ / num_r_samples - for state in xrange(self.num_states): + for state in range(self.num_states): self.expected_durations[state] += \ np.bincount( self.durations_censored[self.stateseq_norep == state], @@ -225,7 +226,7 @@ class HSMMStatesIntegerNegativeBinomial(_HSMMStatesIntegerNegativeBinomialBase): mf_aBl = self.mf_aBl - for i in xrange(num_r_samples): + for i in range(num_r_samples): for d in self.dur_distns: d._resample_r_from_mf() self.clear_caches() @@ -248,9 +249,9 @@ class HSMMStatesIntegerNegativeBinomial(_HSMMStatesIntegerNegativeBinomialBase): self.expected_transcounts += expected_transcounts / num_r_samples # collect duration statistics by sampling from messages - for j in xrange(num_stateseq_samples_per_r): + for j in range(num_stateseq_samples_per_r): self._resample_from_mf(trans,init,aBl,hmm_alphal,hmm_betal) - for state in xrange(self.num_states): + for state in range(self.num_states): self.expected_durations[state] += \ np.bincount( self.durations_censored[self.stateseq_norep == state], @@ -445,7 +446,7 @@ class HSMMStatesTruncatedIntegerNegativeBinomial(HSMMStatesDelayedIntegerNegativ def bwd_enter_rows(self): As = [np.diag(np.repeat(p,r)) + np.diag(np.repeat(1-p,r-1),k=1) for r,p in zip(self.rs,self.ps)] enters = [stats.binom.pmf(np.arange(r)[::-1],r-1,p) for A,r,p in zip(As,self.rs,self.ps)] - # norms = [sum(v.dot(np.linalg.matrix_power(A,d))[-1]*(1-p) for d in xrange(delay)) + # norms = [sum(v.dot(np.linalg.matrix_power(A,d))[-1]*(1-p) for d in range(delay)) # for A,v,p,delay in zip(As,enters,self.ps,self.delays)] # enters = [v.dot(np.linalg.matrix_power(A,self.delays[state])) / (1.-norm) enters = [v.dot(np.linalg.matrix_power(A,self.delays[state])) diff --git a/pyhsmm/internals/hsmm_states.py b/pyhsmm/internals/hsmm_states.py index c9663d2..a4b4108 100644 --- a/pyhsmm/internals/hsmm_states.py +++ b/pyhsmm/internals/hsmm_states.py @@ -1,4 +1,5 @@ from __future__ import division +from builtins import range, map import numpy as np from numpy import newaxis as na from scipy.misc import logsumexp @@ -6,8 +7,8 @@ from scipy.misc import logsumexp from pyhsmm.util.stats import sample_discrete from pyhsmm.util.general import rle, rcumsum, cumsum -import hmm_states -from hmm_states import _StatesBase, _SeparateTransMixin, \ +from . import hmm_states +from .hmm_states import _StatesBase, _SeparateTransMixin, \ HMMStatesPython, HMMStatesEigen @@ -398,7 +399,7 @@ class HSMMStatesPython(_StatesBase): self.expected_durations = expected_durations = \ np.zeros((self.num_states,self.T)) - for state in xrange(self.num_states): + for state in range(self.num_states): expected_durations[state] += \ np.bincount( self.durations_censored[self.stateseq_norep == state], @@ -476,11 +477,11 @@ class HSMMStatesPython(_StatesBase): dur_potentials,cumulative_obs_potentials, alphastarl,betal,normalizer): if self.trunc is not None: - raise NotImplementedError, "_expected_durations can't handle trunc" + raise NotImplementedError("_expected_durations can't handle trunc") T = self.T logpmfs = -np.inf*np.ones_like(alphastarl) errs = np.seterr(invalid='ignore') - for t in xrange(T): + for t in range(T): cB, offset = cumulative_obs_potentials(t) np.logaddexp(dur_potentials(t) + alphastarl[t] + betal[t:] + cB - (normalizer + offset), @@ -685,7 +686,7 @@ class _PossibleChangepointsMixin(hmm_states._PossibleChangepointsMixin,HSMMState self.expected_durations = expected_durations = \ np.zeros((self.num_states,self.Tfull)) - for state in xrange(self.num_states): + for state in range(self.num_states): expected_durations[state] += \ np.bincount( self.durations_censored[self.stateseq_norep == state], @@ -872,7 +873,7 @@ class HSMMStatesPossibleChangepoints(_PossibleChangepointsMixin,HSMMStatesPython logpmfs = -np.inf*np.ones((self.Tfull,alphastarl.shape[1])) errs = np.seterr(invalid='ignore') # logaddexp(-inf,-inf) # TODO censoring not handled correctly here - for tblock in xrange(self.Tblock): + for tblock in range(self.Tblock): possible_durations = self.segmentlens[tblock:].cumsum()[:self.trunc] cB, offset = cumulative_obs_potentials(tblock) logpmfs[possible_durations -1] = np.logaddexp( @@ -991,7 +992,7 @@ def hsmm_messages_backwards_log( T, _ = betal.shape betal[-1] = 0. - for t in xrange(T-1,-1,-1): + for t in range(T-1,-1,-1): cB, offset = cumulative_obs_potentials(t) dp = dur_potentials(t) betastarl[t] = logsumexp( @@ -1020,7 +1021,7 @@ def hsmm_messages_forwards_log( T, _ = alphal.shape alphastarl[0] = initial_state_potential - for t in xrange(T-1): + for t in range(T-1): cB = reverse_cumulative_obs_potentials(t) alphal[t] = logsumexp( alphastarl[t+1-cB.shape[0]:t+1] + cB + reverse_dur_potentials(t), axis=0) @@ -1098,7 +1099,7 @@ def hsmm_maximizing_assignment( betastar_scores, betastar_args = np.empty((T,N)), np.empty((T,N),dtype=np.int) beta_scores[-1] = 0. - for t in xrange(T-1,-1,-1): + for t in range(T-1,-1,-1): cB, offset = cumulative_obs_potentials(t) vals = beta_scores[t:t+cB.shape[0]] + cB + dur_potentials(t) diff --git a/pyhsmm/internals/transitions.py b/pyhsmm/internals/transitions.py index 8ee8d6b..5921c66 100644 --- a/pyhsmm/internals/transitions.py +++ b/pyhsmm/internals/transitions.py @@ -1,4 +1,5 @@ from __future__ import division +from builtins import range import numpy as np from numpy import newaxis as na np.seterr(invalid='raise') @@ -35,7 +36,7 @@ class _HMMTransitionsBase(object): weights=row) for row in trans_matrix] elif None not in (alpha,self.N) or alphav is not None: self._row_distns = [Multinomial(alpha_0=alpha,K=self.N,alphav_0=alphav) - for n in xrange(self.N)] # sample from prior + for n in range(self.N)] # sample from prior @property def trans_matrix(self): diff --git a/pyhsmm/models.py b/pyhsmm/models.py index eb7c4fb..1de2dd1 100644 --- a/pyhsmm/models.py +++ b/pyhsmm/models.py @@ -1,4 +1,9 @@ from __future__ import division +from future.utils import iteritems, itervalues +from builtins import map + +import sys + import numpy as np import itertools import collections @@ -81,7 +86,7 @@ class _HMMBase(Model): # generating brand new data sequence counts = np.bincount(s.stateseq,minlength=self.num_states) obs = [iter(o.rvs(count)) for o, count in zip(s.obs_distns,counts)] - s.data = np.squeeze(np.vstack([obs[state].next() for state in s.stateseq])) + s.data = np.squeeze(np.vstack([next(obs[state]) for state in s.stateseq])) else: # filling in missing data data = s.data @@ -89,7 +94,7 @@ class _HMMBase(Model): counts = np.bincount(s.stateseq[nan_idx],minlength=self.num_states) obs = [iter(o.rvs(count)) for o, count in zip(s.obs_distns,counts)] for idx, state in zip(nan_idx, s.stateseq[nan_idx]): - data[idx] = obs[state].next() + data[idx] = next(obs[state]) return s.data @@ -141,7 +146,7 @@ class _HMMBase(Model): prev_k = k else: from joblib import Parallel, delayed - import parallel + from . import parallel parallel.cmaxes = cmaxes parallel.alphal = alphal @@ -183,7 +188,8 @@ class _HMMBase(Model): @property def used_states(self): 'a list of the used states in the order they appear' - canonical_ids = collections.defaultdict(itertools.count().next) + c = itertools.count() + canonical_ids = collections.defaultdict(c.__next__ if sys.version_info.major == 3 else c.next) for s in self.states_list: for state in s.stateseq: canonical_ids[state] @@ -471,7 +477,7 @@ class _HMMGibbsSampling(_HMMBase,ModelGibbsSampling): def _joblib_resample_states(self,states_list,num_procs): from joblib import Parallel, delayed - import parallel + from . import parallel # warn('joblib is segfaulting on OS X only, not sure why') @@ -556,7 +562,7 @@ class _HMMMeanField(_HMMBase,ModelMeanField): def _joblib_meanfield_update_states(self,states_list,num_procs): if len(states_list) > 0: from joblib import Parallel, delayed - import parallel + from . import parallel joblib_args = list_split( [self._get_joblib_pair(s) for s in states_list], @@ -1261,13 +1267,13 @@ class _SeparateTransMixin(object): ### Gibbs sampling def resample_trans_distn(self): - for group_id, trans_distn in self.trans_distns.iteritems(): + for group_id, trans_distn in iteritems(self.trans_distns): trans_distn.resample([s.stateseq for s in self.states_list if hash(s.group_id) == hash(group_id)]) self._clear_caches() def resample_init_state_distn(self): - for group_id, init_state_distn in self.init_state_distns.iteritems(): + for group_id, init_state_distn in iteritems(self.init_state_distns): init_state_distn.resample([s.stateseq[0] for s in self.states_list if hash(s.group_id) == hash(group_id)]) self._clear_caches() @@ -1275,13 +1281,13 @@ class _SeparateTransMixin(object): ### Mean field def meanfield_update_trans_distn(self): - for group_id, trans_distn in self.trans_distns.iteritems(): + for group_id, trans_distn in iteritems(self.trans_distns): states_list = [s for s in self.states_list if hash(s.group_id) == hash(group_id)] if len(states_list) > 0: trans_distn.meanfieldupdate([s.expected_transcounts for s in states_list]) def meanfield_update_init_state_distn(self): - for group_id, init_state_distn in self.init_state_distns.iteritems(): + for group_id, init_state_distn in iteritems(self.init_state_distns): states_list = [s for s in self.states_list if hash(s.group_id) == hash(group_id)] if len(states_list) > 0: init_state_distn.meanfieldupdate([s.expected_states[0] for s in states_list]) @@ -1290,23 +1296,23 @@ class _SeparateTransMixin(object): vlb = 0. vlb += sum(s.get_vlb() for s in self.states_list) vlb += sum(trans_distn.get_vlb() - for trans_distn in self.trans_distns.itervalues()) + for trans_distn in itervalues(self.trans_distns)) vlb += sum(init_state_distn.get_vlb() - for init_state_distn in self.init_state_distns.itervalues()) + for init_state_distn in itervalues(self.init_state_distns)) vlb += sum(o.get_vlb() for o in self.obs_distns) return vlb ### SVI def _meanfield_sgdstep_trans_distn(self,mb_states_list,prob,stepsize): - for group_id, trans_distn in self.trans_distns.iteritems(): + for group_id, trans_distn in iteritems(self.trans_distns): trans_distn.meanfield_sgdstep( [s.expected_transcounts for s in mb_states_list if hash(s.group_id) == hash(group_id)], prob,stepsize) def _meanfield_sgdstep_init_state_distn(self,mb_states_list,prob,stepsize): - for group_id, init_state_distn in self.init_state_distns.iteritems(): + for group_id, init_state_distn in iteritems(self.init_state_distns): init_state_distn.meanfield_sgdstep( [s.expected_states[0] for s in mb_states_list if hash(s.group_id) == hash(group_id)], diff --git a/pyhsmm/util/__init__.py b/pyhsmm/util/__init__.py index f09c650..2f3d09d 100644 --- a/pyhsmm/util/__init__.py +++ b/pyhsmm/util/__init__.py @@ -1,2 +1,2 @@ __all__ = ['general','plot','stats','text'] -import general, plot, stats, text +from . import general, plot, stats, text diff --git a/pyhsmm/util/general.py b/pyhsmm/util/general.py index ac85b7f..4b354a5 100644 --- a/pyhsmm/util/general.py +++ b/pyhsmm/util/general.py @@ -1,11 +1,14 @@ from __future__ import division +from builtins import range, map, zip, filter + import numpy as np from numpy.lib.stride_tricks import as_strided as ast import scipy.linalg import copy, collections, os, shutil, hashlib from contextlib import closing -from urllib2 import urlopen -from itertools import izip, chain, count, ifilter +from six.moves.urllib.request import urlopen +from itertools import chain, count +from functools import reduce def solve_psd(A,b,chol=None,overwrite_b=False,overwrite_A=False): if A.shape[0] < 5000 and chol is None: @@ -145,7 +148,7 @@ def _sieve(stream): # just for fun; doesn't work over a few hundred val = stream.next() yield val - for x in ifilter(lambda x: x%val != 0, _sieve(stream)): + for x in filter(lambda x: x%val != 0, _sieve(stream)): yield x def primes(): @@ -172,7 +175,7 @@ def top_eigenvector(A,niter=1000,force_iteration=False): else: x1 = np.repeat(1./n,n) x2 = x1.copy() - for itr in xrange(niter): + for itr in range(niter): np.dot(A.T,x1,out=x2) x2 /= x2.sum() x1,x2 = x2,x1 @@ -203,7 +206,7 @@ def count_transitions(stateseq,minlength=None): if minlength is None: minlength = stateseq.max() + 1 out = np.zeros((minlength,minlength),dtype=np.int32) - for a,b in izip(stateseq[:-1],stateseq[1:]): + for a,b in zip(stateseq[:-1],stateseq[1:]): out[a,b] += 1 return out @@ -223,14 +226,14 @@ def hold_out(datalist,frac): def sgd_passes(tau,kappa,datalist,minibatchsize=1,npasses=1): N = len(datalist) - for superitr in xrange(npasses): + for superitr in range(npasses): if minibatchsize == 1: perm = np.random.permutation(N) - for idx, rho_t in izip(perm,sgd_steps(tau,kappa)): + for idx, rho_t in zip(perm,sgd_steps(tau,kappa)): yield datalist[idx], rho_t else: minibatch_indices = np.array_split(np.random.permutation(N),N/minibatchsize) - for indices, rho_t in izip(minibatch_indices,sgd_steps(tau,kappa)): + for indices, rho_t in zip(minibatch_indices,sgd_steps(tau,kappa)): yield [datalist[idx] for idx in indices], rho_t def sgd_sampling(tau,kappa,datalist,minibatchsize=1): @@ -252,7 +255,7 @@ def minibatchsize(lst): def random_subset(lst,sz): perm = np.random.permutation(len(lst)) - return [lst[perm[idx]] for idx in xrange(sz)] + return [lst[perm[idx]] for idx in range(sz)] def get_file(remote_url,local_path): if not os.path.isfile(local_path): diff --git a/pyhsmm/util/profiling.py b/pyhsmm/util/profiling.py index 019daf3..8d20227 100644 --- a/pyhsmm/util/profiling.py +++ b/pyhsmm/util/profiling.py @@ -1,6 +1,7 @@ -from __future__ import division +from __future__ import division, print_function import numpy as np -import sys, StringIO, inspect, os, functools, time, collections +import sys, inspect, os, functools, time, collections +from future.utils import iteritems ### use @timed for really basic timing @@ -21,15 +22,15 @@ def show_timings(stream=None): if len(_timings) > 0: results = [(inspect.getsourcefile(f),f.__name__, len(vals),np.sum(vals),np.mean(vals),np.std(vals)) - for f, vals in _timings.iteritems()] + for f, vals in iteritems(_timings)] filename_lens = max(len(filename) for filename, _, _, _, _, _ in results) name_lens = max(len(name) for _, name, _, _, _, _ in results) fmt = '{:>%d} {:>%d} {:>10} {:>10} {:>10} {:>10}' % (filename_lens, name_lens) - print >>stream, fmt.format('file','name','ncalls','tottime','avg time','std dev') + print(fmt.format('file','name','ncalls','tottime','avg time','std dev'), file=stream) fmt = '{:>%d} {:>%d} {:>10} {:>10.3} {:>10.3} {:>10.3}' % (filename_lens, name_lens) - print >>stream, '\n'.join(fmt.format(*tup) for tup in sorted(results)) + print('\n'.join(fmt.format(*tup) for tup in sorted(results)), file=stream) ### use @line_profiled for a thin wrapper around line_profiler diff --git a/pyhsmm/util/stats.py b/pyhsmm/util/stats.py index 3052414..ae7922a 100644 --- a/pyhsmm/util/stats.py +++ b/pyhsmm/util/stats.py @@ -1,5 +1,5 @@ from __future__ import division -from itertools import izip +from builtins import zip, range import numpy as np from numpy.random import random na = np.newaxis @@ -107,7 +107,7 @@ def diag_whiten(datalist): def count_transitions(stateseq, num_states): out = np.zeros((num_states,num_states),dtype=np.int32) - for i,j in izip(stateseq[:-1],stateseq[1:]): + for i,j in zip(stateseq[:-1],stateseq[1:]): out[i,j] += 1 return out @@ -227,7 +227,7 @@ def sample_crp_tablecounts(concentration,customers,colweights): for (i,j), n in np.ndenumerate(customers): w = colweights[j] - for k in xrange(n): + for k in range(n): m[i,j] += randseq[starts[i,j]+k] \ < (concentration * w) / (k + concentration * w) diff --git a/pyhsmm/util/text.py b/pyhsmm/util/text.py index d219338..b92e451 100644 --- a/pyhsmm/util/text.py +++ b/pyhsmm/util/text.py @@ -1,3 +1,4 @@ +from builtins import range import numpy as np import sys, time @@ -20,7 +21,7 @@ def sec2str(seconds): return '%0.2f' % seconds def progprint_xrange(*args,**kwargs): - xr = xrange(*args) + xr = range(*args) return progprint(xr,total=len(xr),**kwargs) def progprint(iterator,total=None,perline=25,show_times=True): @@ -50,8 +51,8 @@ def progprint(iterator,total=None,perline=25,show_times=True): sys.stdout.write(' [ %d ]\n' % (idx+1)) idx += 1 sys.stdout.flush() - print '' + print('') if show_times and len(times) > 0: total = sec2str(seconds=np.sum(times)) - print '%7.2fsec avg, %s total\n' % (np.mean(times),total) + print('%7.2fsec avg, %s total\n' % (np.mean(times),total)) diff --git a/setup.py b/setup.py index 39682e0..5226901 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ from warnings import warn import os import sys from glob import glob -from six.moves.urllib.request import urlretrieve +from future.moves.urllib.request import urlretrieve import tarfile import shutil @@ -109,8 +109,8 @@ setup(name='pyhsmm', keywords=['bayesian', 'inference', 'mcmc', 'time-series', 'monte-carlo', 'variational inference', 'mean field', 'vb'], install_requires=[ - "numpy", "scipy", "matplotlib", "nose", "pybasicbayes >= 0.1.3"], - setup_requires=['numpy'], + "numpy", "scipy", "matplotlib", "nose", "pybasicbayes >= 0.1.3", "future"], + setup_requires=['numpy', "future"], ext_modules=ext_modules, classifiers=[ 'Development Status :: 4 - Beta', diff --git a/tests/test_hmm_geweke.py b/tests/test_hmm_geweke.py index 31e6c3d..264d993 100644 --- a/tests/test_hmm_geweke.py +++ b/tests/test_hmm_geweke.py @@ -1,4 +1,5 @@ from __future__ import division +from builtins import range import numpy as np from functools import wraps from nose.plugins.attrib import attr @@ -65,7 +66,7 @@ def discrete_geweke_test(fig): # generate state sequences and parameters from the prior prior_stateseqs = [] prior_weights = [] - for itr in xrange(num_iter): + for itr in range(num_iter): hmm.resample_model() # sample parameters from the prior _, stateseq = hmm.generate(T,keep=False) prior_stateseqs.append(stateseq) @@ -79,7 +80,7 @@ def discrete_geweke_test(fig): gibbs_stateseqs = [] gibbs_weights = [] - for itr in xrange(num_iter): + for itr in range(num_iter): s.data = None hmm._generate_obs(s) # resamples data given state sequence, obs params hmm.resample_model() # resamples everything else as usual @@ -90,7 +91,7 @@ def discrete_geweke_test(fig): # test that they look similar by checking probability of co-assignment time_indices = np.arange(T) - for itr in xrange(num_checks): + for itr in range(num_checks): i,j = np.random.choice(time_indices,replace=False,size=2) prior_prob_of_coassignment = (prior_stateseqs[:,i] == prior_stateseqs[:,j]).std() gibbs_prob_of_coassignment = (gibbs_stateseqs[:,i] == gibbs_stateseqs[:,j]).std() @@ -126,12 +127,12 @@ def discrete_geweke_multiple_seqs_test(fig): obs_distns=obs_distns) # generate state sequences and parameters from the prior - prior_stateseqss = [[] for _ in xrange(num_seqs)] + prior_stateseqss = [[] for _ in range(num_seqs)] prior_weights = [] - for itr in xrange(num_iter): + for itr in range(num_iter): hmm.resample_model() # sample parameters from the prior - for itr2 in xrange(num_seqs): + for itr2 in range(num_seqs): _, stateseq = hmm.generate(T,keep=False) prior_stateseqss[itr2].append(stateseq) @@ -142,13 +143,13 @@ def discrete_geweke_multiple_seqs_test(fig): prior_weights = np.array(prior_weights) # generate state sequences and parameters using Gibbs - for itr in xrange(num_seqs): + for itr in range(num_seqs): hmm.generate(T,keep=True) assert len(hmm.states_list) == num_seqs - gibbs_stateseqss = [[] for _ in xrange(num_seqs)] + gibbs_stateseqss = [[] for _ in range(num_seqs)] gibbs_weights = [] - for itr in xrange(num_iter): + for itr in range(num_iter): for s in hmm.states_list: s.data = None hmm._generate_obs(s) # resamples data given state sequence, obs params @@ -166,7 +167,7 @@ def discrete_geweke_multiple_seqs_test(fig): # test that they look similar by checking probability of co-assignment time_indices = np.arange(T) seq_indices = np.arange(num_seqs) - for itr in xrange(num_checks): + for itr in range(num_checks): i,j = np.random.choice(time_indices,replace=False,size=2) si,sj = np.random.choice(seq_indices,replace=True,size=2) prior_prob_of_coassignment = \ diff --git a/tests/test_hmm_likelihood.py b/tests/test_hmm_likelihood.py index 5ffb248..58af94b 100644 --- a/tests/test_hmm_likelihood.py +++ b/tests/test_hmm_likelihood.py @@ -1,4 +1,5 @@ from __future__ import division +from builtins import range import numpy as np from numpy import newaxis as na from inspect import getargspec -- GitLab