old musi stuff

This commit is contained in:
2025-10-29 21:50:43 -05:00
parent c3b0526f59
commit 6897f996ee
11 changed files with 1319 additions and 7 deletions

745
utils.py Normal file
View File

@@ -0,0 +1,745 @@
#Utility functions for working with 3D gaze data.
#Authors: Brendan David-John, Candace Peacock
import itertools
import numpy as np
import pandas as pd
import os
import sys
import math
import pandas as pd
import numpy as np
import json
import glob # This was missing
from sklearn.preprocessing import scale
from sklearn.utils import resample
subjs_skip = []
discrete_feats = ['sac_det','fix_det']
#https://pubmed.ncbi.nlm.nih.gov/25713524/ & https://www.sciencedirect.com/science/article/abs/pii/0025556475900759
max_sacc_speed = 800
#https://www.geeksforgeeks.org/python-make-a-list-of-intervals-with-sequential-numbers/
def intervals_extract(iterable):
iterable = sorted(set(iterable))
for key, group in itertools.groupby(enumerate(iterable),
lambda t: t[1] - t[0]):
group = list(group)
yield [group[0][1], group[-1][1]]
#find consecutive chunks of true values in bools, and return start and end indicies
#input: 1d np array
#output: 2d np array with indices
def intervals_from_bool(bools):
#get python list of indicies
bool_idx = np.where(bools == True)[0].tolist()
#pull out the intervals
intervals = intervals_extract(bool_idx)
#convert to flat np array
intervals = [item for sublist in intervals for item in sublist]
#unroll it so start and end points fall into 1st/2nd column
intervals = np.reshape(intervals, (-1, 2))
return intervals
'''
Function to compute angular gaze velocity using the a numerically stable atan2 method
Inputs: gaze_vectors: Nx3 array with N 3D gaze vectors (x,y,z),
dt: 1D array with N elements containing timestep for each gaze sample
Output: 1D array with N elements containing angular gaze velocity in degrees
'''
def compute_gaze_velocity(gaze_vectors,dt):
#get array of i+1 vectors
gaze_vectors_1 = gaze_vectors[0:-1,:]
gaze_vectors_2 = gaze_vectors[1:,:]
#numerically stable method: atan2(norm(u-v),norm(u+v)) assuming u and v are already normalized
gaze_dist = 2*np.arctan2(np.sqrt(np.square(gaze_vectors_1-gaze_vectors_2).sum(axis=1)),np.sqrt(np.square(gaze_vectors_1+gaze_vectors_2).sum(axis=1))) #2*np.arctan2(mag(u-v),mag(u+v))
gaze_velocity = np.rad2deg(gaze_dist) / dt
#make nans and infs zero
np.nan_to_num(gaze_velocity,copy=False,posinf=0.0,neginf=0.0)
#correct outliers over 800 deg/s by interpolating across good samples
gaze_velocity_good_idx = (gaze_velocity <= max_sacc_speed).nonzero()[0]
gaze_velocity = np.interp(range(gaze_velocity.shape[0]),gaze_velocity_good_idx,gaze_velocity[gaze_velocity_good_idx])
#append zero for first element, so size matches input
return np.insert(gaze_velocity,0,0.0)
'''
Function to compute angular gaze velocity using the arccos dot method
Inputs: gaze_vectors: Nx3 array with N 3D gaze vectors (x,y,z),
dt :1D array with N elements containing timestep for each gaze sample
Output: 1D array with N elements containing angular gaze velocity in degrees
'''
def compute_gaze_velocity_naive(gaze_vectors,dt):
#get array of i+1 vectors
gaze_vectors_1 = gaze_vectors[0:-1,:]
gaze_vectors_2 = gaze_vectors[1:,:]
#arc cos of dot product method, assuming vectors are normalized
gaze_dist_naive = np.arccos(np.sum(gaze_vectors_1*gaze_vectors_2, axis=1))
gaze_velocity_naive = np.rad2deg(gaze_dist_naive) / dt
#make nans and infs zero
np.nan_to_num(gaze_velocity_naive,copy=False,posinf=0.0,neginf=0.0)
#correct outliers over 800 deg/s by interpolating across good samples
gaze_velocity_naive_good_idx = (gaze_velocity_naive <= max_sacc_speed).nonzero()[0]
gaze_velocity_naive = np.interp(range(gaze_velocity_naive.shape[0]),gaze_velocity_naive_good_idx,gaze_velocity_naive[gaze_velocity_naive_good_idx])
#append zero for first element, so size matches input
return np.insert(gaze_velocity_naive,0,0.0)
'''
Function to classify gaze samples as saccade events using velocity thresholding.
Inputs: gaze_velocity: 1D array with N elements representing gaze velocity in degrees,
time_stamps: 1D array with N elements representing timestamps in seconds,
ivt_threshold: scalar in degrees, saccade samples are classified above this value
min_dur: scalar in seconds, saccades shorter than this are discarded
Output: 2 element tuple.
First element: 1D boolean array with N elements where True indiciates a sample is part of saccade, and False if not
Second element: 2D array that is Mx2, where M is the number of saccade events detected and the first column is start index, second end index
'''
def saccade_classification_ivt(gaze_velocity,time_stamps,ivt_threshold=100.0,min_dur=.012,max_dur=2.0):
#find samples over threshold
sac_bool = gaze_velocity > ivt_threshold
sac_intervals = intervals_from_bool(sac_bool)
#correct saccades shorter than minimum duration
sacc_durs = time_stamps[sac_intervals[:,1]] - time_stamps[sac_intervals[:,0]]
short_sacc_idx = np.where((sacc_durs<min_dur)|(sacc_durs>max_dur))[0]
#use short_sacc_idx to remove the bad intervals, and set those bad intervals to False in sac_bool
bad_sac_intervals = sac_intervals[short_sacc_idx,:]
sac_intervals_clean = np.delete(sac_intervals,short_sacc_idx,0)
num_bad_intervals = len(bad_sac_intervals)
for i in range(num_bad_intervals):
curr_interval = bad_sac_intervals[i,:]
sac_bool[curr_interval[0]:curr_interval[1]+1] = False
return (sac_bool,sac_intervals_clean)
'''
Function to classify gaze samples as fixation events using velocity thresholding.
Inputs: gaze_velocity: 1D array with N elements representing gaze velocity in degrees,
time_stamps: 1D array with N elements representing timestamps in seconds,
ivt_threshold: scalar in degrees, fixation samples are classified below this value
min_dur: scalar in seconds, fixations shorter than this are discarded
max_dur: scalar in secodns, fixations longer than this are discarded
Output: 2 element tuple.
First element: 1D boolean array with N elements where True indiciates a sample is part of saccade, and False if not
Second element: 2D array that is Mx2, where M is the number of saccade events detected and the first column is start index, second end index
'''
def fixation_classification_ivt(gaze_velocity,time_stamps,ivt_threshold=20.0,min_dur=0.100,max_dur=2.0):
#find samples over threshold
fix_bool = gaze_velocity < ivt_threshold
fix_intervals = intervals_from_bool(fix_bool)
#correct fixation shorter than minimum duration
fix_durs = time_stamps[fix_intervals[:,1]] - time_stamps[fix_intervals[:,0]]
short_fix_idx = np.where((fix_durs<min_dur) | (fix_durs>max_dur))[0]
#short_fix_idx = np.where(fix_durs<min_dur)[0]
#use short_fix to remove the bad intervals, and set those bad intervals to False in fix_bool
bad_fix_intervals = fix_intervals[short_fix_idx,:]
fix_intervals_clean = np.delete(fix_intervals,short_fix_idx,0)
num_bad_intervals = len(bad_fix_intervals)
for i in range(num_bad_intervals):
curr_interval = bad_fix_intervals[i,:]
fix_bool[curr_interval[0]:curr_interval[1]+1] = False
return (fix_bool,fix_intervals_clean)
'''
Compute angular dispersion over input set of 3D gaze vectors, computed based on max angular displacement
of a sample in window from the centroid.
Inputs: gaze_vectors: Nx3 array with N 3D gaze vectors (x,y,z)
Outputs: Maximum dispersion in degrees
'''
def compute_dispersion(gaze_vectors):
#get gaze_distances in angle from centroid of gaze positions
centroid = np.mean(gaze_vectors,0)
#numerically stable method: atan2(norm(u-v),norm(u+v)) assuming u and v are already normalized
gaze_dist = np.rad2deg(2*np.arctan2(np.sqrt(np.square(gaze_vectors-centroid)).sum(axis=1),np.sqrt(np.square(gaze_vectors+centroid)).sum(axis=1))) #2*np.arctan2(mag(u-v),mag(u+v))
# gaze_dist = 2*np.arctan2(np.sqrt(np.square(gaze_vectors-gaze_vectors).sum(axis=1)),np.sqrt(np.square(gaze_vectors+gaze_vectors).sum(axis=1))) #2*np.arctan2(mag(u-v),mag(u+v))
#make nans and infs zero
# print(gaze_dist)
np.nan_to_num(gaze_dist,copy=False,posinf=0.0,neginf=0.0)
#return maximum
return np.max(gaze_dist)
'''
Function to classify gaze samples as fixation events using I-DT by enforcing a maximum angular dispersion within a time window of min duration
Implementation based on: https://github.com/ecekt/eyegaze/blob/master/gaze.py and https://github.com/ASAPLableni/VR-centred_I-DT_algorithm
Inputs: gaze_vectors: Nx3 array with N 3D gaze vectors (x,y,z)
time_stamps: 1D array with N elements representing timestamps in seconds,
sac_bool: 1D boolean array indicating which samples were marked as saccades from I-VT, used to ensure samples do not have more than one label
min_dur: scalar in seconds, fixations shorter than this are discarded
max_dur: scalar in secodns, fixations longer than this are discarded
max_disp: scalar in degrees, maximum spatial dispersion allowed within a time window for a fixation
window_size: how big of window to bin samples within when classifying dispersion
Output: 2 element tuple.
First element: 1D boolean array with N elements where True indiciates a sample is part of saccade, and False if not
Second element: 2D array that is Mx2, where M is the number of saccade events detected and the first column is start index, second end index
'''
def fixation_classification_idt(gaze_vectors,time_stamps,sac_bool,min_dur=0.100,max_dur=2.0,max_disp=1.0):
fix_bool = np.zeros(sac_bool.shape, dtype=bool)
window_range = [0,0]
current = 0 #pointer to represent the current beginning point of the window
last = 0
fix_idx = []
while (current < len(gaze_vectors)):
t0 = time_stamps[current] #beginning time
t1 = t0 + min_dur #time after a min. fix. threshold has been observed
for r in range(current, len(gaze_vectors)):
if(time_stamps[r]>= t0 and time_stamps[r]<= t1):
last = r
elif time_stamps[r] > t1:
break
window_range = [current,last]
#now check the dispersion in this window
dispersion = compute_dispersion(gaze_vectors[current:last+1,:])
if (dispersion <= max_disp):
#add new points
while(dispersion <= max_disp and last + 1 < len(gaze_vectors)):
last += 1
window_range = [current,last]
#print current, last, "*"
#print "*"
dispersion = compute_dispersion(gaze_vectors[current:last+1,:])
#dispersion threshold is exceeded
#fixation at the centroid [current,last]
fix_bool[current:last+1] = True
current = last + 1 #this will move the pointer to a novel window
else:
current += 1
last = current
#correct with saccade bool, find points marked as both, change to False in fix_bool
conflict_idx = np.where((fix_bool == True) & (sac_bool == True))[0]
fix_bool[conflict_idx] = False
fix_intervals = intervals_from_bool(fix_bool)
#correct fixation shorter than minimum duration or longer than max
fix_durs = time_stamps[fix_intervals[:,1]] - time_stamps[fix_intervals[:,0]]
short_fix_idx = np.where((fix_durs<min_dur) | (fix_durs>max_dur))[0]
#use short_fix to remove the bad intervals, and set those bad intervals to False in fix_bool
bad_fix_intervals = fix_intervals[short_fix_idx,:]
fix_intervals_clean = np.delete(fix_intervals,short_fix_idx,0)
num_bad_intervals = len(bad_fix_intervals)
for i in range(num_bad_intervals):
curr_interval = bad_fix_intervals[i,:]
fix_bool[curr_interval[0]:curr_interval[1]+1] = False
return (fix_bool,fix_intervals_clean)
'''
Summary: Compute a continous time series for feature values by averaging over events in the last N seconds
Inputs: features: 2D array with rows corresponding to events, and columns corresponding to feature values
fixation_times: 1D array containing the timestamps corresponding to each row of fixation_times
time_stamps: 1D array representing every sample in the time series, containing times in seconds
N: Scalar indicating the number of seconds to consider past events within for computing average feature values
Output: 2D array, with rows corresponding to each sample in time_stamps and a column for each feature signal.
'''
def compute_feature_signal_time_avg(features,fixation_times,time_stamps,N):
num_features = features.shape[1]
num_rows = len(time_stamps)
feature_signal = np.zeros((num_rows,num_features),dtype=float)
#loop over each time stamp, grab which elements have time greater than current time -2 but less than current time, average, add to row
for i in range(num_rows):
curr_time = time_stamps[i]
#get events N seconds prior to curr_time
curr_events = features[(fixation_times>=curr_time-N) & (fixation_times<=curr_time),:]
curr_row = np.zeros((1,num_features),dtype=float)
if len(curr_events) >0:
curr_row = np.mean(curr_events,0)
feature_signal[i,:] = curr_row
return feature_signal
'''
Summary: Compute a continous time series for feature values by averaging over the past N events
Inputs: features: 2D array with rows corresponding to events, and columns corresponding to feature values
fixation_times: 1D array containing the timestamps corresponding to each row of fixation_times
time_stamps: 1D array representing every sample in the time series, containing times in seconds
N: Scalar indicating the number of past events to consider when computing average feature values
Output: 2D array, with rows corresponding to each sample in time_stamps and a column for each feature signal.
'''
def compute_feature_signal_event_avg(features,fixation_times,time_stamps,N):
num_features = features.shape[1]
num_rows = len(time_stamps)
feature_signal = np.zeros((num_rows,num_features),dtype=float)
#loop over each time stamp, grab past N elements, average, add to row
for i in range(num_rows):
curr_time = time_stamps[i]
#get events prior to curr_time
curr_events = features[(fixation_times<=curr_time),:]
#grab last N of them if possible
prior_events = curr_events[-N:,:]
curr_row = np.zeros((1,num_features),dtype=float)
if len(prior_events) >0:
curr_row = np.mean(prior_events,0)
feature_signal[i,:] = curr_row
return feature_signal
#wrapper function to call feature signal methods
def compute_feature_signal(method,features,event_times,time_stamps,N):
if method == 'time_avg':
return compute_feature_signal_time_avg(features,event_times,time_stamps,N)
elif method == 'event_avg':
return compute_feature_signal_event_avg(features,event_times,time_stamps,N)
elif method == 'event_discrete':
return compute_feature_signal_event_discrete(features,event_times,time_stamps,N)
else:
return None
#returns three column np array (x,y,z) containing head corrected gaze vectors
def head_correction(df,param):
gaze_vectors = df[['combined.gazeDirection.x','combined.gazeDirection.y','combined.gazeDirection.z']].values
num_rows = np.size(gaze_vectors,0)
transformed_gaze_vectors = np.zeros(gaze_vectors.shape)
if param.transform_type == 'coordinate_frame':
#transform vectors
head_pos = df[['head.pos.x','head.pos.y','head.pos.z']].values
head_up = df[['head.right.x','head.right.y','head.right.z']].values
head_right = df[['head.up.x','head.up.y','head.up.z']].values
head_dir = df[['head.dir.x','head.dir.y','head.dir.z']].values
for i in range(num_rows):
#get current gaze vector and transforms
gaze_vector = gaze_vectors[i,:]
curr_head_up = head_up[i,:]
curr_head_right = head_right[i,:]
curr_head_dir = head_dir[i,:]
'''Transformation matrix of the form:
[ R.x, R.y, R.z, 0,
U.x, U.y, U.z, 0,
D.x, D.y, D.z, 0,
0, 0, 0, 1 ]
'''
#make transformation matrix
transform_mat = np.ma.row_stack((np.append(curr_head_right ,0),\
np.append(curr_head_up ,0),\
np.append(curr_head_dir,0),\
np.array([0.0, 0.0, 0.0, 1.0])))
#add homogenous coordinate to vector
gaze_vector = np.append(gaze_vector,1.0)
#right-multiply row vector with transpose of matrix as defined above
transformed_gaze_vector = np.matmul(gaze_vector,transform_mat.T)
#place transformed gaze_vector back into gaze_vectors w/o homogenous coordinate
transformed_gaze_vectors[i,:] = transformed_gaze_vector[0:3]
#normalize all rows again to account for any rounding errors
row_norm_vals = np.sqrt(np.square(transformed_gaze_vectors).sum(axis=1))
transformed_gaze_vectors = transformed_gaze_vectors/row_norm_vals[:,None]
elif param.transform_type == 'quaternions':
q_x = df['RotationX'].values
q_y = df['RotationY'].values
q_z = df['RotationZ'].values
q_w = df['RotationW'].values
for i in range(num_rows):
#get current gaze vector and transforms
gaze_vector = gaze_vectors[i,:]
curr_q_x = q_x[i]
curr_q_y = q_y[i]
curr_q_z = q_z[i]
curr_q_w = q_w[i]
# first column
one_one = np.square(curr_q_w) + np.square(curr_q_x) - np.square(curr_q_y) - np.square(curr_q_z)
two_one = 2 * (curr_q_w*curr_q_z + curr_q_x*curr_q_y)
three_one = 2 * (curr_q_x*curr_q_z - curr_q_w*curr_q_y)
# second column
one_two = 2* (curr_q_x*curr_q_y - curr_q_w*curr_q_z)
two_two = np.square(curr_q_w) + np.square(curr_q_x) - np.square(curr_q_y) - np.square(curr_q_z)
three_two = 2* (curr_q_w*curr_q_x - curr_q_y*curr_q_z)
# third column
one_three = 2* (curr_q_w*curr_q_y - curr_q_x*curr_q_z)
two_three = 2* (curr_q_y*curr_q_z - curr_q_w*curr_q_x)
three_three = np.square(curr_q_w) + np.square(curr_q_x) - np.square(curr_q_y) - np.square(curr_q_z)
# compute rotation matrix as per Diaz et al.
rot_mat = np.array([[one_one, two_one, three_one],\
[one_two, two_two, three_two],\
[one_three, two_three, three_three]])
transformed_gaze_vectors[i,:] = np.matmul(rot_mat,gaze_vector)
#normalize all rows again to account for any rounding errors
row_norm_vals = np.sqrt(np.square(transformed_gaze_vectors).sum(axis=1))
transformed_gaze_vectors = transformed_gaze_vectors/row_norm_vals[:,None]
return transformed_gaze_vectors
def window_signal_overlap(X, y, window_size=10):
M, N = X.shape
windows = np.array([])
lbls = []
FP_indxs = np.where(y==1)[0]
TP_indxs = np.where(y==-1)[0]
combined = np.hstack((FP_indxs, TP_indxs))
for i in combined:
window = np.array([])
starts = range(max(0, i-window_size+1), i+1)
ends = range(i+1, min(M, i+window_size))
for (s, e) in zip(starts, ends):
window = X[s:e]
curr_size = window.shape[0]
window = window.reshape(-1, curr_size*N)
# window is clipped from front or back, add zero padding
if curr_size < window_size:
padding_size = window_size*N - curr_size*N
padding = np.zeros(shape=(1,padding_size))
if i-window_size < 0:
window = np.hstack((padding,window))
else:
window = np.hstack((window, padding))
if windows.shape[0] == 0:
windows = window
else:
windows = np.vstack((windows, window))
lbl = y[i]
lbls.append(lbl)
lbls = np.array(lbls, dtype=int).reshape(-1,1)
return (windows, lbls)
def window_signal_center(X, y, window_size=10):
M, N = X.shape
windows = np.array([])
lbls = []
FP_indxs = np.where(y==1)[0]
TP_indxs = np.where(y==-1)[0]
combined = np.hstack((FP_indxs, TP_indxs))
for i in combined:
start = max(0, i-int(window_size/2))
end = min(M, i+int(window_size/2))
window = X[start:end, :]
curr_size = window.shape[0]
window = window.reshape(-1, curr_size*N)
# window is clipped from front or back, add zero padding
if curr_size < window_size:
padding_size = window_size*N - curr_size*N
padding = np.zeros(shape=(1,padding_size))
if i-int(window_size/2) < 0:
window = np.hstack((padding,window))
else:
window = np.hstack((window, padding))
if windows.shape[0] == 0:
windows = window
else:
windows = np.vstack((windows, window))
lbl = y[i]
lbls.append(lbl)
lbls = np.array(lbls, dtype=int).reshape(-1,1)
return (windows, lbls)
def window_signal_start(X, y, window_size=10):
M, N = X.shape
windows = np.array([])
lbls = []
FP_indxs = np.where(y==1)[0]
TP_indxs = np.where(y==-1)[0]
combined = np.hstack((FP_indxs, TP_indxs))
for i in combined:
start = i
end = min(M, i+window_size)
window = X[start:end, :]
curr_size = window.shape[0]
window = window.reshape(-1, curr_size*N)
# window is clipped from front or back, add zero padding
if curr_size < window_size:
padding_size = window_size*N - curr_size*N
padding = np.zeros(shape=(1,padding_size))
window = np.hstack((window, padding))
if windows.shape[0] == 0:
windows = window
else:
windows = np.vstack((windows, window))
lbl = y[i]
lbls.append(lbl)
lbls = np.array(lbls, dtype=int).reshape(-1,1)
return (windows, lbls)
def window_signal(X, y, mode='center', window_size=10):
if mode == 'center':
return window_signal_center(X, y, window_size=window_size)
elif mode == 'start':
return window_signal_start(X, y, window_size=window_size)
elif mode == 'overlap':
return window_signal_overlap(X, y, window_size=window_size)
def get_header(features, window_size):
header = ""
for i in range(window_size):
h = ['%s_%d'%(f, i) for f in features]
h = ",".join(h)
if not len(header):
header = h
continue
header = '%s,%s'%(header, h)
header = '%s,%s'%(header, 'label')
return header
def is_discrete_feat(feat):
isdisc = False
for f in discrete_feats:
if f in feat:
isdisc = True
break
return isdisc
def read_data(path):
files = glob.glob(os.path.join(path, '*'))
y = []
X = []
subjs = []
discrete_cols = []
cols = []
for file in files:
subj = file.split(os.sep)[-1]
subj = subj.split('.')[0]
subj = subj.split('_')[0]
if subj in subjs_skip:
continue
data = pd.read_csv(file)
if not len(discrete_cols):
cols = data.columns
discrete_cols = []
for i in range(len(cols)):
if is_discrete_feat(cols[i]):
discrete_cols.append(cols[i])
if not len(y):
y = data['label'].to_numpy().reshape(-1, 1)
else:
y = np.vstack((y, data['label'].to_numpy().reshape(-1,1)))
curr_X = data[data.columns[data.columns != 'label']].to_numpy()
if not len(X):
X = curr_X
else:
X = np.vstack((X, curr_X))
M = curr_X.shape[0]
curr_subjs = np.repeat(subj, M).reshape(-1, 1)
if not len(subjs):
subjs = curr_subjs
else:
subjs = np.vstack((subjs, curr_subjs))
return (X, y, subjs, discrete_cols, cols)
def read_feat_data(path):
''' Reads feature data in director 'path' and stores them in PD data frame'''
files = glob.glob(os.path.join(path, '*'))
combined = pd.DataFrame()
for file in files:
data = pd.read_csv(file, dtype={'selection_type': 'str'}, index_col=0)
filename = file.split(os.sep)[-1]
filename = filename.split('.')[0]
subj = filename.split('_')[0]
data['sbj'] = subj
block = filename.split('_')[1]
data['block'] = block
combined = combined.append(data)
return combined
def read_window_data(main_dir, MODE, WINDOW_SIZE, data_type='train'):
data_dir = os.path.join(main_dir, 'w_%s_%d'%(MODE, WINDOW_SIZE))
data_dir = os.path.join(data_dir, data_type)
X, y, subjs, discrete_cols, cols = read_data(main_dir)
return X, y, subjs, discrete_cols, cols
def normalize_subj_data(X, y, subjs, exclude_idxs):
X_out, y_out = [], []
sbjs = np.unique(subjs)
for sbj in sbjs:
subj_idxs = np.where(subjs==sbj)[0]
X_subj, y_subj = X[subj_idxs], y[subj_idxs]
if X_subj.shape[0] < 1:
continue
if len(exclude_idxs):
mask = ~np.isin(np.arange(X.shape[1]), exclude_idxs)
X_subj[:,mask] = scale(X_subj[:,mask])
else:
X_Subj = scale(X_subj)
if not len(X_out):
X_out = X_subj
y_out = y_subj
else:
X_out = np.vstack((X_out, X_subj))
y_out = np.vstack((y_out, y_subj))
return X_out, y_out
def get_feat_indxs(columns, query_cols):
indxs = []
for i in range(len(columns)):
c = columns[i]
c = c.split('_')
c.pop()
c = '_'.join(c)
if c in query_cols:
indxs.append(i)
return indxs
def filter_features(featfilepath, X, headers, count_thresh=1):
feat_counter = pd.read_csv(featfilepath,squeeze=True)
imp_indxs = np.where(feat_counter['num']>count_thresh)[0]
important_feats = feat_counter['feat'].values[imp_indxs]
indxs = get_feat_indxs(headers, important_feats)
X = X[:,indxs]
return X
def compute_feature_signal_time_avg(features,fixation_times,time_stamps,N):
num_features = features.shape[1]
num_rows = len(time_stamps)
feature_signal = np.zeros((num_rows,num_features),dtype=float)
#loop over each time stamp, grab which elements have time greater than current time -2 but less than current time, average, add to row
for i in range(num_rows):
curr_time = time_stamps[i]
#get events N seconds prior to curr_time
curr_events = features[(fixation_times>=curr_time-N) & (fixation_times<=curr_time),:]
curr_row = np.zeros((1,num_features),dtype=float)
if len(curr_events) >0:
curr_row = np.mean(curr_events,0)
feature_signal[i,:] = curr_row
return feature_signal
'''
Summary: Compute a continous time series for feature values by averaging over the past N events
Inputs: features: 2D array with rows corresponding to events, and columns corresponding to feature values
fixation_times: 1D array containing the timestamps corresponding to each row of fixation_times
time_stamps: 1D array representing every sample in the time series, containing times in seconds
N: Scalar indicating the number of past events to consider when computing average feature values
Output: 2D array, with rows corresponding to each sample in time_stamps and a column for each feature signal.
'''
def compute_feature_signal_event_avg(features,fixation_times,time_stamps,N):
num_features = features.shape[1]
num_rows = len(time_stamps)
feature_signal = np.zeros((num_rows,num_features),dtype=float)
#loop over each time stamp, grab past N elements, average, add to row
for i in range(num_rows):
curr_time = time_stamps[i]
#get events prior to curr_time
curr_events = features[(fixation_times<=curr_time),:]
#grab last N of them if possible
prior_events = curr_events[-N:,:]
curr_row = np.zeros((1,num_features),dtype=float)
if len(prior_events) >0:
curr_row = np.mean(prior_events,0)
feature_signal[i,:] = curr_row
return feature_signal
#wrapper function to call feature signal methods
def compute_feature_signal(method,features,event_times,time_stamps,N):
if method == 'time_avg':
return compute_feature_signal_time_avg(features,event_times,time_stamps,N)
elif method == 'event_avg':
return compute_feature_signal_event_avg(features,event_times,time_stamps,N)
elif method == 'event_discrete':
return compute_feature_signal_event_discrete(features,event_times,time_stamps,N)
else:
return None
def get_header(features, window_size):
header = ""
for i in range(window_size):
h = ['%s_%d'%(f, i) for f in features]
h = ",".join(h)
if not len(header):
header = h
continue
header = '%s,%s'%(header, h)
header = '%s,%s'%(header, 'label')
return header