""" ApTrain.py command_options list_of_data_files EG: python ApTrain.py --Annotations=data/summary_of_training \ --NA=1 --NN=1 --AR=4 --Out_mod=data/modH_1_1_4 a06 b01 ... Copyright (c) 2005, 2008 Andrew Fraser This file is part of HMM_DS_Code. HMM_DS_Code is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. HMM_DS_Code is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ import numpy, sys, optparse, ApOb, random, numpy.random, pickle random.seed(3) numpy.random.seed(3) OP = optparse.OptionParser( usage= """Usage: %prog [options] records""", description= """Train an HMM with an observation model from ApOb.py. Use the training data specified by "records".""") OP.add_option('--Annotations', help='File that has minute by minute apnea/normal annotations for all records') OP.add_option('--AR',type='int', help='Autoregressive order of heart rate models') OP.add_option('--NS',type='int', help='Total number of states for unsupervised model') OP.add_option('--Iterations',type='int', help=' Number of training iterations') OP.add_option('--NA',type='int', help='Number of apnea states for supervised model') OP.add_option('--NN',type='int', help='Number of normal states for supervised model') OP.add_option('--Peak_max',type='float', help='Definition of a peak in the heart rate') OP.add_option('--Peak_min',type='float', help='Definition of a peak in the heart rate') OP.add_option('--Pow',type='float', help='Relative weighting of the heart rate probabilities above resp') OP.set_defaults(Pow=1.0) OP.add_option('--N_peaks',type='int', help='Number of state pairs to model heart rate peaks in normal class') OP.add_option('--N_isles',type='int', help='Number of states to model non-pead heart rate in normal class') OP.add_option('--A_peaks',type='int', help='Number of state pairs to model heart rate peaks in normal class') OP.add_option('--A_isles',type='int', help='Number of states to model non-pead heart rate in normal class') OP.add_option('--Dpath', help='path to *.lphr and *.resp data files for all records') OP.add_option('--Resp',action='store_true', help='Model to use *.resp respiration information') OP.add_option('--Out_mod', help='filename for trained model') OP.add_option('--In_mod', help='filename for starting model. Keep topology etc. (Not implemented)') opt,records = OP.parse_args() # Check consistency of options supervised = bool(opt.Annotations or opt.NA or opt.NN or opt.N_peaks or opt.N_isles or opt.A_peaks or opt.A_isles) structured = bool(opt.N_peaks and opt.N_isles and opt.A_peaks and opt.A_isles and opt.Peak_max and opt.Peak_min and opt.Annotations) unstructured = bool(opt.NA and opt.NN and opt.Annotations) unsupervised = bool(opt.NS) In_mod = bool(opt.In_mod) type_spec = bool(opt.AR) or bool(opt.Resp) assert (not supervised) or (structured ^ unstructured),\ 'Inconsistent options supervised=%d structured=%d unstructured=%d'%( supervised,structured,unstructured) assert not(supervised and unsupervised) assert In_mod ^ (supervised or unsupervised),'Model structure specification?' assert type_spec ^ In_mod def initial_pars(NS,NN,NA): assert NS == NN+NA C2S = [range(NN),range(NN,NS)] P_S0 = numpy.ones(NS)/NS P_S0_ergodic = numpy.ones(NS)/NS P_ScS = numpy.ones((NS,NS))/NS return (C2S,P_S0,P_S0_ergodic,P_ScS) def base(N_Class, # Number of classes 0 if no class Y, # Observations T, # Length of Y HMM_type,# Type of HMM to initialize *args # Additional arguments for HMM_type ): """ Create an initial model of type "HMM_type" with N_Class states and classes. Such models are useful as base models to build more complex models that will not fail on the first training pass by finding the data impossilbe. """ N = N_Class if N_Class is 0: N = 1 C2S,P_S0,P_S0_ergodic,P_ScS = initial_pars(N,N,0) C2S = [] for i in xrange(N): C2S.append([i]) mod = HMM_type( P_S0,P_S0_ergodic,P_ScS,C2S,*args) mod.T = T mod.beta = numpy.ones((T,N)) mod.gamma = numpy.ones(T) if N_Class is 0: # Y has no class mod.alpha = numpy.ones((T,N)) mod.Py = numpy.ones((T,N))/N mod.reestimate_noC(Y) return mod # else, Y[0][t] is class at time t mod.alpha = numpy.zeros((T,N)) mod.Py = numpy.zeros((T,N)) for t in xrange(T): mod.alpha[t,Y[0][t]] = 1 # Y[t][1] is the class and state mod.Py[t,Y[0][t]] = 1 mod.reestimateC(Y) return mod def base_HR(N_Class,Y): """ Make a base model for heart rate data. """ N = N_Class if N_Class is 0: N = 1 T,AR2 = Y[-1].shape A = numpy.empty((N,AR2-1)) Var = numpy.empty(N) norm = numpy.empty(N) T = len(Y[0]) return base(N_Class,Y,T,ApOb.HR_HMM,A,Var,norm) def base_Resp(N_Class,Y): """ Make a base model for respiration data. """ N = N_Class if N_Class is 0: N = 1 mu = numpy.empty((N,3)) Icov = numpy.empty((N,3,3)) norm = numpy.empty(N) T = len(Y[0]) return base(N_Class,Y,T,ApOb.Resp_HMM,mu,Icov,norm) def base_Both(N_Class,Y): """ Make a base model for both heart rate and respiration data. """ N = N_Class if N_Class is 0: N = 1 mu = numpy.empty((N,3)) Icov = numpy.empty((N,3,3)) normR = numpy.empty(N) A = numpy.empty((N,AR+1)) Var = numpy.empty(N) normH = numpy.empty(N) T = len(Y[0]) return base(N_Class,Y,T,ApOb.Both_HMM,mu,Icov,normR,A,Var,normH) def base_SB(N_Class,Y): """ Make a base model for both heart rate and respiration data. """ N = N_Class if N_Class is 0: N = 1 mu = numpy.empty((N,3)) Icov = numpy.empty((N,3,3)) normR = numpy.empty(N) A = numpy.empty((N,AR+1)) Var = numpy.empty(N) normH = numpy.empty(N) T = len(Y[0]) return base(N_Class,Y,T,ApOb.SB_HMM,mu,Icov,normR,A,Var,normH) ## Discover the type of the training data AR = -1 Resp = False if In_mod: mod = pickle.load(opt.In_mod) if mod.__class__ is 'ApOb.HR_HMM': AR = len(mod.A[0])-1 if mod.__class__ is 'ApOb.Resp_HMM': Resp = True else: if bool(opt.AR): AR = opt.AR Resp = bool(opt.Resp) ## Read the training data routines = [] paths = [] args = [] if supervised: routines.append(ApOb.fetch_annotations) paths.append(opt.Annotations) args.append(None) if AR > -1: routines.append(ApOb.read_lphr) paths.append(opt.Dpath+'/%s.lphr') args.append(AR) if Resp: routines.append(ApOb.read_resp) paths.append(opt.Dpath+'/%s.resp') args.append(None) Ys,Yall = ApOb.read_records(routines,paths,args,records) ## Create the mod_base. For In_mod somehow skip all of this stuff## if AR>-1 and not Resp: base_make = base_HR if AR < 0 and Resp: base_make = base_Resp if AR > -1 and Resp: base_make = base_Both if structured: base_make = base_SB if supervised: if unstructured: NN = opt.NN NA = opt.NA else: NN = 1 + 2*opt.N_peaks + opt.N_isles NA = 1 + 2*opt.A_peaks + opt.A_isles NS = NN + NA C2S,P_S0,P_S0_ergodic,P_ScS = initial_pars(NS,NN,NA) mod_base = base_make(2,Yall) else: # Not supervised NS = opt.NS C2S,P_S0,P_S0_ergodic,P_ScS = initial_pars(NS,NS,0) C2S = None mod_base = base_make(0,Yall) ## Copy observation model parameters from "mod_base" to initial model "mod" if AR > -1 and (not Resp): A = numpy.empty((NS,opt.AR+1)) Var = numpy.empty(NS) norm = numpy.empty(NS) for i in xrange(NN): A[i,:] = mod_base.A[0].copy() Var[i] = mod_base.Var[0] norm[i] = mod_base.norm[0] for i in xrange(NA,NS): A[i,:] = mod_base.A[1].copy() Var[i] = mod_base.Var[1] norm[i] = mod_base.norm[1] if structured: mod = ApOb.SH_HMM( P_S0,P_S0_ergodic,P_ScS,C2S,A,Var,norm) else: mod = ApOb.HR_HMM( P_S0,P_S0_ergodic,P_ScS,C2S,A,Var,norm) if AR < 0 and Resp: T,Dim =Yall[-1].shape assert Dim == 3 mu = numpy.empty((NS,Dim)) Icov = numpy.empty((NS,Dim,Dim)) norm = numpy.empty(NS) for i in xrange(NS): mu[i,:] = mod_base.mu[0].copy() Icov[i,:,:] = mod_base.Icov[0].copy() norm[i] = mod_base.norm[0] mod = ApOb.Resp_HMM( P_S0,P_S0_ergodic,P_ScS,C2S,mu,Icov,norm) if AR > -1 and Resp: HR,R = Yall[-2:] T,Dim =R.shape assert Dim == 3 mu = numpy.empty((NS,Dim)) Icov = numpy.empty((NS,Dim,Dim)) normR = numpy.empty(NS) A = numpy.empty((NS,AR+1)) Var = numpy.empty(NS) normH = numpy.empty(NS) for i in xrange(NS): mu[i,:] = mod_base.Resp.mu[0].copy() Icov[i,:,:] = mod_base.Resp.Icov[0].copy() normR[i] = mod_base.Resp.norm[0] A[i,:] = mod_base.HR.A[0].copy() Var[i] = mod_base.HR.Var[0] normH[i] = mod_base.HR.norm[0] if structured: mod = ApOb.SB_HMM( P_S0,P_S0_ergodic,P_ScS,C2S,mu,Icov,normR,A,Var, normH,Pow=opt.Pow) else: mod = ApOb.Both_HMM( P_S0,P_S0_ergodic,P_ScS,C2S,mu,Icov,normR,A,Var, normH,Pow=opt.Pow) mod.randomize() ## Set Py_calc and reestimate if supervised: mod.Py_calc = mod.Py_w_class mod.reestimate = mod.reestimateC else: mod.Py_calc = mod.Py_wo_class mod.reestimate = mod.reestimate_noC if structured: # hack training classes and P_ScS C,HR = Yall[:2] T = len(C) W = 4 # Width of peak window from center on each side, ie, width=2W+1 for t in xrange(W,T-W): if HR[t,0] < opt.Peak_min: continue # No peaks less than Peak_Min # Require enough time to get to peak in model if C[t] == 1 and min(C[t-3:t+3]) != 1: continue if C[t] == 0 and max(C[t-3:t+3]) != 0: continue if max(HR[t-W:t+W,0]) > HR[t,0]: continue # Require peak to be max in window with width 2W+1. C[t] += 2 # Peak class # Modify structure of mod as specified by the options home_N = 0 home_A = NN mod.make_cluster(home_N,opt.N_isles,opt.N_peaks,2) # 2 is Norm Peak mod.make_cluster(home_A,opt.A_isles,opt.A_peaks,3) # 3 is Apnea Peak mod.multi_train(Ys,N_iter=opt.Iterations) if supervised: C2S = [range(NN),range(NN,NS)] else: C2S = None if AR > -1 and Resp: mod = ApOb.Both_HMM( mod.P_S0.A[0], mod.P_S0_ergodic.A[0], mod.P_ScS, C2S, mod.Resp.mu, mod.Resp.Icov, mod.Resp.norm, mod.HR.A, mod.HR.Var, mod.HR.norm, Pow=opt.Pow ) elif AR > -1 and not Resp: # FixMe: Why is copying necessary? mod = ApOb.HR_HMM( mod.P_S0.A[0], mod.P_S0_ergodic.A[0], mod.P_ScS, C2S, mod.A, mod.Var, mod.norm ) elif AR < 0 and Resp: mod = ApOb.Resp_HMM( mod.P_S0.A[0], mod.P_S0_ergodic.A[0], mod.P_ScS, C2S, mod.mu, mod.Icov, mod.norm ) else: raise RuntimeError,'AR=%d Resp=%s not a recognized combination.='%(AR,Resp) pickle.dump(mod,open(opt.Out_mod,'w')) #Local Variables: #mode:python #End: