old musi stuff
This commit is contained in:
745
utils.py
Normal file
745
utils.py
Normal file
@@ -0,0 +1,745 @@
|
||||
#Utility functions for working with 3D gaze data.
|
||||
#Authors: Brendan David-John, Candace Peacock
|
||||
import itertools
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import json
|
||||
import glob # This was missing
|
||||
from sklearn.preprocessing import scale
|
||||
from sklearn.utils import resample
|
||||
|
||||
|
||||
subjs_skip = []
|
||||
discrete_feats = ['sac_det','fix_det']
|
||||
|
||||
#https://pubmed.ncbi.nlm.nih.gov/25713524/ & https://www.sciencedirect.com/science/article/abs/pii/0025556475900759
|
||||
max_sacc_speed = 800
|
||||
|
||||
#https://www.geeksforgeeks.org/python-make-a-list-of-intervals-with-sequential-numbers/
|
||||
def intervals_extract(iterable):
|
||||
iterable = sorted(set(iterable))
|
||||
for key, group in itertools.groupby(enumerate(iterable),
|
||||
lambda t: t[1] - t[0]):
|
||||
group = list(group)
|
||||
yield [group[0][1], group[-1][1]]
|
||||
|
||||
#find consecutive chunks of true values in bools, and return start and end indicies
|
||||
#input: 1d np array
|
||||
#output: 2d np array with indices
|
||||
def intervals_from_bool(bools):
|
||||
#get python list of indicies
|
||||
bool_idx = np.where(bools == True)[0].tolist()
|
||||
|
||||
#pull out the intervals
|
||||
intervals = intervals_extract(bool_idx)
|
||||
|
||||
#convert to flat np array
|
||||
intervals = [item for sublist in intervals for item in sublist]
|
||||
|
||||
#unroll it so start and end points fall into 1st/2nd column
|
||||
intervals = np.reshape(intervals, (-1, 2))
|
||||
|
||||
return intervals
|
||||
|
||||
'''
|
||||
Function to compute angular gaze velocity using the a numerically stable atan2 method
|
||||
Inputs: gaze_vectors: Nx3 array with N 3D gaze vectors (x,y,z),
|
||||
dt: 1D array with N elements containing timestep for each gaze sample
|
||||
Output: 1D array with N elements containing angular gaze velocity in degrees
|
||||
'''
|
||||
def compute_gaze_velocity(gaze_vectors,dt):
|
||||
#get array of i+1 vectors
|
||||
gaze_vectors_1 = gaze_vectors[0:-1,:]
|
||||
gaze_vectors_2 = gaze_vectors[1:,:]
|
||||
|
||||
#numerically stable method: atan2(norm(u-v),norm(u+v)) assuming u and v are already normalized
|
||||
gaze_dist = 2*np.arctan2(np.sqrt(np.square(gaze_vectors_1-gaze_vectors_2).sum(axis=1)),np.sqrt(np.square(gaze_vectors_1+gaze_vectors_2).sum(axis=1))) #2*np.arctan2(mag(u-v),mag(u+v))
|
||||
gaze_velocity = np.rad2deg(gaze_dist) / dt
|
||||
|
||||
#make nans and infs zero
|
||||
np.nan_to_num(gaze_velocity,copy=False,posinf=0.0,neginf=0.0)
|
||||
|
||||
#correct outliers over 800 deg/s by interpolating across good samples
|
||||
gaze_velocity_good_idx = (gaze_velocity <= max_sacc_speed).nonzero()[0]
|
||||
gaze_velocity = np.interp(range(gaze_velocity.shape[0]),gaze_velocity_good_idx,gaze_velocity[gaze_velocity_good_idx])
|
||||
|
||||
#append zero for first element, so size matches input
|
||||
return np.insert(gaze_velocity,0,0.0)
|
||||
|
||||
'''
|
||||
Function to compute angular gaze velocity using the arccos dot method
|
||||
Inputs: gaze_vectors: Nx3 array with N 3D gaze vectors (x,y,z),
|
||||
dt :1D array with N elements containing timestep for each gaze sample
|
||||
Output: 1D array with N elements containing angular gaze velocity in degrees
|
||||
'''
|
||||
def compute_gaze_velocity_naive(gaze_vectors,dt):
|
||||
#get array of i+1 vectors
|
||||
gaze_vectors_1 = gaze_vectors[0:-1,:]
|
||||
gaze_vectors_2 = gaze_vectors[1:,:]
|
||||
|
||||
#arc cos of dot product method, assuming vectors are normalized
|
||||
gaze_dist_naive = np.arccos(np.sum(gaze_vectors_1*gaze_vectors_2, axis=1))
|
||||
gaze_velocity_naive = np.rad2deg(gaze_dist_naive) / dt
|
||||
|
||||
#make nans and infs zero
|
||||
np.nan_to_num(gaze_velocity_naive,copy=False,posinf=0.0,neginf=0.0)
|
||||
|
||||
#correct outliers over 800 deg/s by interpolating across good samples
|
||||
gaze_velocity_naive_good_idx = (gaze_velocity_naive <= max_sacc_speed).nonzero()[0]
|
||||
gaze_velocity_naive = np.interp(range(gaze_velocity_naive.shape[0]),gaze_velocity_naive_good_idx,gaze_velocity_naive[gaze_velocity_naive_good_idx])
|
||||
|
||||
#append zero for first element, so size matches input
|
||||
return np.insert(gaze_velocity_naive,0,0.0)
|
||||
|
||||
'''
|
||||
Function to classify gaze samples as saccade events using velocity thresholding.
|
||||
Inputs: gaze_velocity: 1D array with N elements representing gaze velocity in degrees,
|
||||
time_stamps: 1D array with N elements representing timestamps in seconds,
|
||||
ivt_threshold: scalar in degrees, saccade samples are classified above this value
|
||||
min_dur: scalar in seconds, saccades shorter than this are discarded
|
||||
Output: 2 element tuple.
|
||||
First element: 1D boolean array with N elements where True indiciates a sample is part of saccade, and False if not
|
||||
Second element: 2D array that is Mx2, where M is the number of saccade events detected and the first column is start index, second end index
|
||||
'''
|
||||
def saccade_classification_ivt(gaze_velocity,time_stamps,ivt_threshold=100.0,min_dur=.012,max_dur=2.0):
|
||||
#find samples over threshold
|
||||
sac_bool = gaze_velocity > ivt_threshold
|
||||
sac_intervals = intervals_from_bool(sac_bool)
|
||||
|
||||
#correct saccades shorter than minimum duration
|
||||
sacc_durs = time_stamps[sac_intervals[:,1]] - time_stamps[sac_intervals[:,0]]
|
||||
short_sacc_idx = np.where((sacc_durs<min_dur)|(sacc_durs>max_dur))[0]
|
||||
|
||||
#use short_sacc_idx to remove the bad intervals, and set those bad intervals to False in sac_bool
|
||||
bad_sac_intervals = sac_intervals[short_sacc_idx,:]
|
||||
|
||||
sac_intervals_clean = np.delete(sac_intervals,short_sacc_idx,0)
|
||||
|
||||
num_bad_intervals = len(bad_sac_intervals)
|
||||
for i in range(num_bad_intervals):
|
||||
curr_interval = bad_sac_intervals[i,:]
|
||||
sac_bool[curr_interval[0]:curr_interval[1]+1] = False
|
||||
|
||||
return (sac_bool,sac_intervals_clean)
|
||||
|
||||
'''
|
||||
Function to classify gaze samples as fixation events using velocity thresholding.
|
||||
Inputs: gaze_velocity: 1D array with N elements representing gaze velocity in degrees,
|
||||
time_stamps: 1D array with N elements representing timestamps in seconds,
|
||||
ivt_threshold: scalar in degrees, fixation samples are classified below this value
|
||||
min_dur: scalar in seconds, fixations shorter than this are discarded
|
||||
max_dur: scalar in secodns, fixations longer than this are discarded
|
||||
Output: 2 element tuple.
|
||||
First element: 1D boolean array with N elements where True indiciates a sample is part of saccade, and False if not
|
||||
Second element: 2D array that is Mx2, where M is the number of saccade events detected and the first column is start index, second end index
|
||||
'''
|
||||
def fixation_classification_ivt(gaze_velocity,time_stamps,ivt_threshold=20.0,min_dur=0.100,max_dur=2.0):
|
||||
#find samples over threshold
|
||||
fix_bool = gaze_velocity < ivt_threshold
|
||||
|
||||
fix_intervals = intervals_from_bool(fix_bool)
|
||||
|
||||
#correct fixation shorter than minimum duration
|
||||
fix_durs = time_stamps[fix_intervals[:,1]] - time_stamps[fix_intervals[:,0]]
|
||||
short_fix_idx = np.where((fix_durs<min_dur) | (fix_durs>max_dur))[0]
|
||||
#short_fix_idx = np.where(fix_durs<min_dur)[0]
|
||||
|
||||
#use short_fix to remove the bad intervals, and set those bad intervals to False in fix_bool
|
||||
bad_fix_intervals = fix_intervals[short_fix_idx,:]
|
||||
fix_intervals_clean = np.delete(fix_intervals,short_fix_idx,0)
|
||||
|
||||
num_bad_intervals = len(bad_fix_intervals)
|
||||
for i in range(num_bad_intervals):
|
||||
curr_interval = bad_fix_intervals[i,:]
|
||||
fix_bool[curr_interval[0]:curr_interval[1]+1] = False
|
||||
|
||||
return (fix_bool,fix_intervals_clean)
|
||||
'''
|
||||
Compute angular dispersion over input set of 3D gaze vectors, computed based on max angular displacement
|
||||
of a sample in window from the centroid.
|
||||
Inputs: gaze_vectors: Nx3 array with N 3D gaze vectors (x,y,z)
|
||||
Outputs: Maximum dispersion in degrees
|
||||
'''
|
||||
def compute_dispersion(gaze_vectors):
|
||||
#get gaze_distances in angle from centroid of gaze positions
|
||||
centroid = np.mean(gaze_vectors,0)
|
||||
|
||||
#numerically stable method: atan2(norm(u-v),norm(u+v)) assuming u and v are already normalized
|
||||
gaze_dist = np.rad2deg(2*np.arctan2(np.sqrt(np.square(gaze_vectors-centroid)).sum(axis=1),np.sqrt(np.square(gaze_vectors+centroid)).sum(axis=1))) #2*np.arctan2(mag(u-v),mag(u+v))
|
||||
|
||||
# gaze_dist = 2*np.arctan2(np.sqrt(np.square(gaze_vectors-gaze_vectors).sum(axis=1)),np.sqrt(np.square(gaze_vectors+gaze_vectors).sum(axis=1))) #2*np.arctan2(mag(u-v),mag(u+v))
|
||||
#make nans and infs zero
|
||||
# print(gaze_dist)
|
||||
np.nan_to_num(gaze_dist,copy=False,posinf=0.0,neginf=0.0)
|
||||
|
||||
|
||||
#return maximum
|
||||
return np.max(gaze_dist)
|
||||
|
||||
'''
|
||||
Function to classify gaze samples as fixation events using I-DT by enforcing a maximum angular dispersion within a time window of min duration
|
||||
Implementation based on: https://github.com/ecekt/eyegaze/blob/master/gaze.py and https://github.com/ASAPLableni/VR-centred_I-DT_algorithm
|
||||
Inputs: gaze_vectors: Nx3 array with N 3D gaze vectors (x,y,z)
|
||||
time_stamps: 1D array with N elements representing timestamps in seconds,
|
||||
sac_bool: 1D boolean array indicating which samples were marked as saccades from I-VT, used to ensure samples do not have more than one label
|
||||
min_dur: scalar in seconds, fixations shorter than this are discarded
|
||||
max_dur: scalar in secodns, fixations longer than this are discarded
|
||||
max_disp: scalar in degrees, maximum spatial dispersion allowed within a time window for a fixation
|
||||
window_size: how big of window to bin samples within when classifying dispersion
|
||||
Output: 2 element tuple.
|
||||
First element: 1D boolean array with N elements where True indiciates a sample is part of saccade, and False if not
|
||||
Second element: 2D array that is Mx2, where M is the number of saccade events detected and the first column is start index, second end index
|
||||
'''
|
||||
def fixation_classification_idt(gaze_vectors,time_stamps,sac_bool,min_dur=0.100,max_dur=2.0,max_disp=1.0):
|
||||
fix_bool = np.zeros(sac_bool.shape, dtype=bool)
|
||||
window_range = [0,0]
|
||||
|
||||
current = 0 #pointer to represent the current beginning point of the window
|
||||
last = 0
|
||||
fix_idx = []
|
||||
|
||||
while (current < len(gaze_vectors)):
|
||||
t0 = time_stamps[current] #beginning time
|
||||
t1 = t0 + min_dur #time after a min. fix. threshold has been observed
|
||||
|
||||
for r in range(current, len(gaze_vectors)):
|
||||
if(time_stamps[r]>= t0 and time_stamps[r]<= t1):
|
||||
last = r
|
||||
elif time_stamps[r] > t1:
|
||||
break
|
||||
|
||||
window_range = [current,last]
|
||||
|
||||
#now check the dispersion in this window
|
||||
dispersion = compute_dispersion(gaze_vectors[current:last+1,:])
|
||||
|
||||
if (dispersion <= max_disp):
|
||||
#add new points
|
||||
while(dispersion <= max_disp and last + 1 < len(gaze_vectors)):
|
||||
last += 1
|
||||
window_range = [current,last]
|
||||
#print current, last, "*"
|
||||
#print "*"
|
||||
dispersion = compute_dispersion(gaze_vectors[current:last+1,:])
|
||||
|
||||
#dispersion threshold is exceeded
|
||||
#fixation at the centroid [current,last]
|
||||
fix_bool[current:last+1] = True
|
||||
|
||||
current = last + 1 #this will move the pointer to a novel window
|
||||
|
||||
else:
|
||||
current += 1
|
||||
last = current
|
||||
|
||||
#correct with saccade bool, find points marked as both, change to False in fix_bool
|
||||
conflict_idx = np.where((fix_bool == True) & (sac_bool == True))[0]
|
||||
fix_bool[conflict_idx] = False
|
||||
|
||||
fix_intervals = intervals_from_bool(fix_bool)
|
||||
|
||||
#correct fixation shorter than minimum duration or longer than max
|
||||
fix_durs = time_stamps[fix_intervals[:,1]] - time_stamps[fix_intervals[:,0]]
|
||||
short_fix_idx = np.where((fix_durs<min_dur) | (fix_durs>max_dur))[0]
|
||||
|
||||
#use short_fix to remove the bad intervals, and set those bad intervals to False in fix_bool
|
||||
bad_fix_intervals = fix_intervals[short_fix_idx,:]
|
||||
fix_intervals_clean = np.delete(fix_intervals,short_fix_idx,0)
|
||||
|
||||
num_bad_intervals = len(bad_fix_intervals)
|
||||
for i in range(num_bad_intervals):
|
||||
curr_interval = bad_fix_intervals[i,:]
|
||||
fix_bool[curr_interval[0]:curr_interval[1]+1] = False
|
||||
|
||||
return (fix_bool,fix_intervals_clean)
|
||||
|
||||
'''
|
||||
Summary: Compute a continous time series for feature values by averaging over events in the last N seconds
|
||||
Inputs: features: 2D array with rows corresponding to events, and columns corresponding to feature values
|
||||
fixation_times: 1D array containing the timestamps corresponding to each row of fixation_times
|
||||
time_stamps: 1D array representing every sample in the time series, containing times in seconds
|
||||
N: Scalar indicating the number of seconds to consider past events within for computing average feature values
|
||||
Output: 2D array, with rows corresponding to each sample in time_stamps and a column for each feature signal.
|
||||
'''
|
||||
def compute_feature_signal_time_avg(features,fixation_times,time_stamps,N):
|
||||
num_features = features.shape[1]
|
||||
num_rows = len(time_stamps)
|
||||
feature_signal = np.zeros((num_rows,num_features),dtype=float)
|
||||
|
||||
#loop over each time stamp, grab which elements have time greater than current time -2 but less than current time, average, add to row
|
||||
for i in range(num_rows):
|
||||
curr_time = time_stamps[i]
|
||||
|
||||
#get events N seconds prior to curr_time
|
||||
curr_events = features[(fixation_times>=curr_time-N) & (fixation_times<=curr_time),:]
|
||||
|
||||
curr_row = np.zeros((1,num_features),dtype=float)
|
||||
if len(curr_events) >0:
|
||||
curr_row = np.mean(curr_events,0)
|
||||
feature_signal[i,:] = curr_row
|
||||
|
||||
return feature_signal
|
||||
|
||||
'''
|
||||
Summary: Compute a continous time series for feature values by averaging over the past N events
|
||||
Inputs: features: 2D array with rows corresponding to events, and columns corresponding to feature values
|
||||
fixation_times: 1D array containing the timestamps corresponding to each row of fixation_times
|
||||
time_stamps: 1D array representing every sample in the time series, containing times in seconds
|
||||
N: Scalar indicating the number of past events to consider when computing average feature values
|
||||
Output: 2D array, with rows corresponding to each sample in time_stamps and a column for each feature signal.
|
||||
'''
|
||||
def compute_feature_signal_event_avg(features,fixation_times,time_stamps,N):
|
||||
num_features = features.shape[1]
|
||||
num_rows = len(time_stamps)
|
||||
feature_signal = np.zeros((num_rows,num_features),dtype=float)
|
||||
|
||||
#loop over each time stamp, grab past N elements, average, add to row
|
||||
for i in range(num_rows):
|
||||
curr_time = time_stamps[i]
|
||||
|
||||
#get events prior to curr_time
|
||||
curr_events = features[(fixation_times<=curr_time),:]
|
||||
|
||||
#grab last N of them if possible
|
||||
prior_events = curr_events[-N:,:]
|
||||
|
||||
curr_row = np.zeros((1,num_features),dtype=float)
|
||||
if len(prior_events) >0:
|
||||
curr_row = np.mean(prior_events,0)
|
||||
feature_signal[i,:] = curr_row
|
||||
|
||||
return feature_signal
|
||||
|
||||
#wrapper function to call feature signal methods
|
||||
def compute_feature_signal(method,features,event_times,time_stamps,N):
|
||||
if method == 'time_avg':
|
||||
return compute_feature_signal_time_avg(features,event_times,time_stamps,N)
|
||||
elif method == 'event_avg':
|
||||
return compute_feature_signal_event_avg(features,event_times,time_stamps,N)
|
||||
elif method == 'event_discrete':
|
||||
return compute_feature_signal_event_discrete(features,event_times,time_stamps,N)
|
||||
else:
|
||||
return None
|
||||
|
||||
#returns three column np array (x,y,z) containing head corrected gaze vectors
|
||||
def head_correction(df,param):
|
||||
gaze_vectors = df[['combined.gazeDirection.x','combined.gazeDirection.y','combined.gazeDirection.z']].values
|
||||
|
||||
num_rows = np.size(gaze_vectors,0)
|
||||
transformed_gaze_vectors = np.zeros(gaze_vectors.shape)
|
||||
|
||||
if param.transform_type == 'coordinate_frame':
|
||||
#transform vectors
|
||||
head_pos = df[['head.pos.x','head.pos.y','head.pos.z']].values
|
||||
head_up = df[['head.right.x','head.right.y','head.right.z']].values
|
||||
head_right = df[['head.up.x','head.up.y','head.up.z']].values
|
||||
head_dir = df[['head.dir.x','head.dir.y','head.dir.z']].values
|
||||
|
||||
for i in range(num_rows):
|
||||
#get current gaze vector and transforms
|
||||
gaze_vector = gaze_vectors[i,:]
|
||||
curr_head_up = head_up[i,:]
|
||||
curr_head_right = head_right[i,:]
|
||||
curr_head_dir = head_dir[i,:]
|
||||
|
||||
'''Transformation matrix of the form:
|
||||
[ R.x, R.y, R.z, 0,
|
||||
U.x, U.y, U.z, 0,
|
||||
D.x, D.y, D.z, 0,
|
||||
0, 0, 0, 1 ]
|
||||
'''
|
||||
#make transformation matrix
|
||||
transform_mat = np.ma.row_stack((np.append(curr_head_right ,0),\
|
||||
np.append(curr_head_up ,0),\
|
||||
np.append(curr_head_dir,0),\
|
||||
np.array([0.0, 0.0, 0.0, 1.0])))
|
||||
|
||||
|
||||
#add homogenous coordinate to vector
|
||||
gaze_vector = np.append(gaze_vector,1.0)
|
||||
|
||||
#right-multiply row vector with transpose of matrix as defined above
|
||||
transformed_gaze_vector = np.matmul(gaze_vector,transform_mat.T)
|
||||
|
||||
#place transformed gaze_vector back into gaze_vectors w/o homogenous coordinate
|
||||
transformed_gaze_vectors[i,:] = transformed_gaze_vector[0:3]
|
||||
|
||||
#normalize all rows again to account for any rounding errors
|
||||
row_norm_vals = np.sqrt(np.square(transformed_gaze_vectors).sum(axis=1))
|
||||
transformed_gaze_vectors = transformed_gaze_vectors/row_norm_vals[:,None]
|
||||
|
||||
elif param.transform_type == 'quaternions':
|
||||
q_x = df['RotationX'].values
|
||||
q_y = df['RotationY'].values
|
||||
q_z = df['RotationZ'].values
|
||||
q_w = df['RotationW'].values
|
||||
|
||||
for i in range(num_rows):
|
||||
#get current gaze vector and transforms
|
||||
gaze_vector = gaze_vectors[i,:]
|
||||
|
||||
curr_q_x = q_x[i]
|
||||
curr_q_y = q_y[i]
|
||||
curr_q_z = q_z[i]
|
||||
curr_q_w = q_w[i]
|
||||
|
||||
# first column
|
||||
one_one = np.square(curr_q_w) + np.square(curr_q_x) - np.square(curr_q_y) - np.square(curr_q_z)
|
||||
two_one = 2 * (curr_q_w*curr_q_z + curr_q_x*curr_q_y)
|
||||
three_one = 2 * (curr_q_x*curr_q_z - curr_q_w*curr_q_y)
|
||||
|
||||
# second column
|
||||
one_two = 2* (curr_q_x*curr_q_y - curr_q_w*curr_q_z)
|
||||
two_two = np.square(curr_q_w) + np.square(curr_q_x) - np.square(curr_q_y) - np.square(curr_q_z)
|
||||
three_two = 2* (curr_q_w*curr_q_x - curr_q_y*curr_q_z)
|
||||
|
||||
# third column
|
||||
one_three = 2* (curr_q_w*curr_q_y - curr_q_x*curr_q_z)
|
||||
two_three = 2* (curr_q_y*curr_q_z - curr_q_w*curr_q_x)
|
||||
three_three = np.square(curr_q_w) + np.square(curr_q_x) - np.square(curr_q_y) - np.square(curr_q_z)
|
||||
|
||||
# compute rotation matrix as per Diaz et al.
|
||||
rot_mat = np.array([[one_one, two_one, three_one],\
|
||||
[one_two, two_two, three_two],\
|
||||
[one_three, two_three, three_three]])
|
||||
|
||||
transformed_gaze_vectors[i,:] = np.matmul(rot_mat,gaze_vector)
|
||||
|
||||
#normalize all rows again to account for any rounding errors
|
||||
row_norm_vals = np.sqrt(np.square(transformed_gaze_vectors).sum(axis=1))
|
||||
transformed_gaze_vectors = transformed_gaze_vectors/row_norm_vals[:,None]
|
||||
|
||||
return transformed_gaze_vectors
|
||||
|
||||
def window_signal_overlap(X, y, window_size=10):
|
||||
M, N = X.shape
|
||||
windows = np.array([])
|
||||
lbls = []
|
||||
|
||||
FP_indxs = np.where(y==1)[0]
|
||||
TP_indxs = np.where(y==-1)[0]
|
||||
combined = np.hstack((FP_indxs, TP_indxs))
|
||||
|
||||
for i in combined:
|
||||
window = np.array([])
|
||||
starts = range(max(0, i-window_size+1), i+1)
|
||||
ends = range(i+1, min(M, i+window_size))
|
||||
|
||||
for (s, e) in zip(starts, ends):
|
||||
window = X[s:e]
|
||||
curr_size = window.shape[0]
|
||||
window = window.reshape(-1, curr_size*N)
|
||||
|
||||
# window is clipped from front or back, add zero padding
|
||||
if curr_size < window_size:
|
||||
padding_size = window_size*N - curr_size*N
|
||||
padding = np.zeros(shape=(1,padding_size))
|
||||
if i-window_size < 0:
|
||||
window = np.hstack((padding,window))
|
||||
else:
|
||||
window = np.hstack((window, padding))
|
||||
|
||||
if windows.shape[0] == 0:
|
||||
windows = window
|
||||
else:
|
||||
windows = np.vstack((windows, window))
|
||||
lbl = y[i]
|
||||
lbls.append(lbl)
|
||||
|
||||
|
||||
lbls = np.array(lbls, dtype=int).reshape(-1,1)
|
||||
return (windows, lbls)
|
||||
|
||||
|
||||
def window_signal_center(X, y, window_size=10):
|
||||
M, N = X.shape
|
||||
windows = np.array([])
|
||||
lbls = []
|
||||
|
||||
FP_indxs = np.where(y==1)[0]
|
||||
TP_indxs = np.where(y==-1)[0]
|
||||
combined = np.hstack((FP_indxs, TP_indxs))
|
||||
|
||||
for i in combined:
|
||||
start = max(0, i-int(window_size/2))
|
||||
end = min(M, i+int(window_size/2))
|
||||
|
||||
window = X[start:end, :]
|
||||
curr_size = window.shape[0]
|
||||
window = window.reshape(-1, curr_size*N)
|
||||
|
||||
# window is clipped from front or back, add zero padding
|
||||
if curr_size < window_size:
|
||||
padding_size = window_size*N - curr_size*N
|
||||
padding = np.zeros(shape=(1,padding_size))
|
||||
if i-int(window_size/2) < 0:
|
||||
window = np.hstack((padding,window))
|
||||
else:
|
||||
window = np.hstack((window, padding))
|
||||
|
||||
if windows.shape[0] == 0:
|
||||
windows = window
|
||||
else:
|
||||
windows = np.vstack((windows, window))
|
||||
lbl = y[i]
|
||||
lbls.append(lbl)
|
||||
|
||||
lbls = np.array(lbls, dtype=int).reshape(-1,1)
|
||||
return (windows, lbls)
|
||||
|
||||
def window_signal_start(X, y, window_size=10):
|
||||
M, N = X.shape
|
||||
windows = np.array([])
|
||||
lbls = []
|
||||
|
||||
FP_indxs = np.where(y==1)[0]
|
||||
TP_indxs = np.where(y==-1)[0]
|
||||
combined = np.hstack((FP_indxs, TP_indxs))
|
||||
|
||||
for i in combined:
|
||||
start = i
|
||||
end = min(M, i+window_size)
|
||||
|
||||
window = X[start:end, :]
|
||||
curr_size = window.shape[0]
|
||||
window = window.reshape(-1, curr_size*N)
|
||||
|
||||
# window is clipped from front or back, add zero padding
|
||||
if curr_size < window_size:
|
||||
padding_size = window_size*N - curr_size*N
|
||||
padding = np.zeros(shape=(1,padding_size))
|
||||
window = np.hstack((window, padding))
|
||||
|
||||
if windows.shape[0] == 0:
|
||||
windows = window
|
||||
else:
|
||||
windows = np.vstack((windows, window))
|
||||
lbl = y[i]
|
||||
lbls.append(lbl)
|
||||
|
||||
lbls = np.array(lbls, dtype=int).reshape(-1,1)
|
||||
return (windows, lbls)
|
||||
|
||||
def window_signal(X, y, mode='center', window_size=10):
|
||||
if mode == 'center':
|
||||
return window_signal_center(X, y, window_size=window_size)
|
||||
elif mode == 'start':
|
||||
return window_signal_start(X, y, window_size=window_size)
|
||||
elif mode == 'overlap':
|
||||
return window_signal_overlap(X, y, window_size=window_size)
|
||||
|
||||
def get_header(features, window_size):
|
||||
header = ""
|
||||
for i in range(window_size):
|
||||
h = ['%s_%d'%(f, i) for f in features]
|
||||
h = ",".join(h)
|
||||
if not len(header):
|
||||
header = h
|
||||
continue
|
||||
header = '%s,%s'%(header, h)
|
||||
header = '%s,%s'%(header, 'label')
|
||||
return header
|
||||
|
||||
def is_discrete_feat(feat):
|
||||
isdisc = False
|
||||
for f in discrete_feats:
|
||||
if f in feat:
|
||||
isdisc = True
|
||||
break
|
||||
return isdisc
|
||||
|
||||
def read_data(path):
|
||||
files = glob.glob(os.path.join(path, '*'))
|
||||
y = []
|
||||
X = []
|
||||
subjs = []
|
||||
discrete_cols = []
|
||||
cols = []
|
||||
|
||||
for file in files:
|
||||
subj = file.split(os.sep)[-1]
|
||||
subj = subj.split('.')[0]
|
||||
subj = subj.split('_')[0]
|
||||
|
||||
if subj in subjs_skip:
|
||||
continue
|
||||
|
||||
data = pd.read_csv(file)
|
||||
|
||||
if not len(discrete_cols):
|
||||
cols = data.columns
|
||||
discrete_cols = []
|
||||
for i in range(len(cols)):
|
||||
if is_discrete_feat(cols[i]):
|
||||
discrete_cols.append(cols[i])
|
||||
|
||||
|
||||
if not len(y):
|
||||
y = data['label'].to_numpy().reshape(-1, 1)
|
||||
else:
|
||||
y = np.vstack((y, data['label'].to_numpy().reshape(-1,1)))
|
||||
|
||||
curr_X = data[data.columns[data.columns != 'label']].to_numpy()
|
||||
|
||||
if not len(X):
|
||||
X = curr_X
|
||||
else:
|
||||
X = np.vstack((X, curr_X))
|
||||
|
||||
M = curr_X.shape[0]
|
||||
curr_subjs = np.repeat(subj, M).reshape(-1, 1)
|
||||
if not len(subjs):
|
||||
subjs = curr_subjs
|
||||
else:
|
||||
subjs = np.vstack((subjs, curr_subjs))
|
||||
|
||||
return (X, y, subjs, discrete_cols, cols)
|
||||
|
||||
|
||||
def read_feat_data(path):
|
||||
''' Reads feature data in director 'path' and stores them in PD data frame'''
|
||||
files = glob.glob(os.path.join(path, '*'))
|
||||
|
||||
combined = pd.DataFrame()
|
||||
for file in files:
|
||||
data = pd.read_csv(file, dtype={'selection_type': 'str'}, index_col=0)
|
||||
|
||||
filename = file.split(os.sep)[-1]
|
||||
filename = filename.split('.')[0]
|
||||
subj = filename.split('_')[0]
|
||||
data['sbj'] = subj
|
||||
block = filename.split('_')[1]
|
||||
data['block'] = block
|
||||
|
||||
combined = combined.append(data)
|
||||
|
||||
return combined
|
||||
|
||||
def read_window_data(main_dir, MODE, WINDOW_SIZE, data_type='train'):
|
||||
data_dir = os.path.join(main_dir, 'w_%s_%d'%(MODE, WINDOW_SIZE))
|
||||
data_dir = os.path.join(data_dir, data_type)
|
||||
X, y, subjs, discrete_cols, cols = read_data(main_dir)
|
||||
return X, y, subjs, discrete_cols, cols
|
||||
|
||||
|
||||
def normalize_subj_data(X, y, subjs, exclude_idxs):
|
||||
X_out, y_out = [], []
|
||||
sbjs = np.unique(subjs)
|
||||
for sbj in sbjs:
|
||||
subj_idxs = np.where(subjs==sbj)[0]
|
||||
X_subj, y_subj = X[subj_idxs], y[subj_idxs]
|
||||
|
||||
if X_subj.shape[0] < 1:
|
||||
continue
|
||||
|
||||
if len(exclude_idxs):
|
||||
mask = ~np.isin(np.arange(X.shape[1]), exclude_idxs)
|
||||
X_subj[:,mask] = scale(X_subj[:,mask])
|
||||
else:
|
||||
X_Subj = scale(X_subj)
|
||||
|
||||
if not len(X_out):
|
||||
X_out = X_subj
|
||||
y_out = y_subj
|
||||
else:
|
||||
X_out = np.vstack((X_out, X_subj))
|
||||
y_out = np.vstack((y_out, y_subj))
|
||||
return X_out, y_out
|
||||
|
||||
def get_feat_indxs(columns, query_cols):
|
||||
indxs = []
|
||||
for i in range(len(columns)):
|
||||
c = columns[i]
|
||||
c = c.split('_')
|
||||
c.pop()
|
||||
c = '_'.join(c)
|
||||
if c in query_cols:
|
||||
indxs.append(i)
|
||||
return indxs
|
||||
|
||||
def filter_features(featfilepath, X, headers, count_thresh=1):
|
||||
feat_counter = pd.read_csv(featfilepath,squeeze=True)
|
||||
|
||||
imp_indxs = np.where(feat_counter['num']>count_thresh)[0]
|
||||
important_feats = feat_counter['feat'].values[imp_indxs]
|
||||
indxs = get_feat_indxs(headers, important_feats)
|
||||
X = X[:,indxs]
|
||||
return X
|
||||
|
||||
|
||||
def compute_feature_signal_time_avg(features,fixation_times,time_stamps,N):
|
||||
num_features = features.shape[1]
|
||||
num_rows = len(time_stamps)
|
||||
feature_signal = np.zeros((num_rows,num_features),dtype=float)
|
||||
|
||||
#loop over each time stamp, grab which elements have time greater than current time -2 but less than current time, average, add to row
|
||||
for i in range(num_rows):
|
||||
curr_time = time_stamps[i]
|
||||
|
||||
#get events N seconds prior to curr_time
|
||||
curr_events = features[(fixation_times>=curr_time-N) & (fixation_times<=curr_time),:]
|
||||
|
||||
curr_row = np.zeros((1,num_features),dtype=float)
|
||||
if len(curr_events) >0:
|
||||
curr_row = np.mean(curr_events,0)
|
||||
feature_signal[i,:] = curr_row
|
||||
|
||||
return feature_signal
|
||||
|
||||
'''
|
||||
Summary: Compute a continous time series for feature values by averaging over the past N events
|
||||
Inputs: features: 2D array with rows corresponding to events, and columns corresponding to feature values
|
||||
fixation_times: 1D array containing the timestamps corresponding to each row of fixation_times
|
||||
time_stamps: 1D array representing every sample in the time series, containing times in seconds
|
||||
N: Scalar indicating the number of past events to consider when computing average feature values
|
||||
Output: 2D array, with rows corresponding to each sample in time_stamps and a column for each feature signal.
|
||||
'''
|
||||
def compute_feature_signal_event_avg(features,fixation_times,time_stamps,N):
|
||||
num_features = features.shape[1]
|
||||
num_rows = len(time_stamps)
|
||||
feature_signal = np.zeros((num_rows,num_features),dtype=float)
|
||||
|
||||
#loop over each time stamp, grab past N elements, average, add to row
|
||||
for i in range(num_rows):
|
||||
curr_time = time_stamps[i]
|
||||
|
||||
#get events prior to curr_time
|
||||
curr_events = features[(fixation_times<=curr_time),:]
|
||||
|
||||
#grab last N of them if possible
|
||||
prior_events = curr_events[-N:,:]
|
||||
|
||||
curr_row = np.zeros((1,num_features),dtype=float)
|
||||
if len(prior_events) >0:
|
||||
curr_row = np.mean(prior_events,0)
|
||||
feature_signal[i,:] = curr_row
|
||||
|
||||
return feature_signal
|
||||
|
||||
#wrapper function to call feature signal methods
|
||||
def compute_feature_signal(method,features,event_times,time_stamps,N):
|
||||
if method == 'time_avg':
|
||||
return compute_feature_signal_time_avg(features,event_times,time_stamps,N)
|
||||
elif method == 'event_avg':
|
||||
return compute_feature_signal_event_avg(features,event_times,time_stamps,N)
|
||||
elif method == 'event_discrete':
|
||||
return compute_feature_signal_event_discrete(features,event_times,time_stamps,N)
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_header(features, window_size):
|
||||
header = ""
|
||||
for i in range(window_size):
|
||||
h = ['%s_%d'%(f, i) for f in features]
|
||||
h = ",".join(h)
|
||||
if not len(header):
|
||||
header = h
|
||||
continue
|
||||
header = '%s,%s'%(header, h)
|
||||
header = '%s,%s'%(header, 'label')
|
||||
return header
|
||||
Reference in New Issue
Block a user