746 lines
28 KiB
Python
746 lines
28 KiB
Python
#Utility functions for working with 3D gaze data.
|
|
#Authors: Brendan David-John, Candace Peacock
|
|
import itertools
|
|
import numpy as np
|
|
import pandas as pd
|
|
import os
|
|
import sys
|
|
import math
|
|
import pandas as pd
|
|
import numpy as np
|
|
import json
|
|
import glob # This was missing
|
|
from sklearn.preprocessing import scale
|
|
from sklearn.utils import resample
|
|
|
|
|
|
subjs_skip = []
|
|
discrete_feats = ['sac_det','fix_det']
|
|
|
|
#https://pubmed.ncbi.nlm.nih.gov/25713524/ & https://www.sciencedirect.com/science/article/abs/pii/0025556475900759
|
|
max_sacc_speed = 800
|
|
|
|
#https://www.geeksforgeeks.org/python-make-a-list-of-intervals-with-sequential-numbers/
|
|
def intervals_extract(iterable):
|
|
iterable = sorted(set(iterable))
|
|
for key, group in itertools.groupby(enumerate(iterable),
|
|
lambda t: t[1] - t[0]):
|
|
group = list(group)
|
|
yield [group[0][1], group[-1][1]]
|
|
|
|
#find consecutive chunks of true values in bools, and return start and end indicies
|
|
#input: 1d np array
|
|
#output: 2d np array with indices
|
|
def intervals_from_bool(bools):
|
|
#get python list of indicies
|
|
bool_idx = np.where(bools == True)[0].tolist()
|
|
|
|
#pull out the intervals
|
|
intervals = intervals_extract(bool_idx)
|
|
|
|
#convert to flat np array
|
|
intervals = [item for sublist in intervals for item in sublist]
|
|
|
|
#unroll it so start and end points fall into 1st/2nd column
|
|
intervals = np.reshape(intervals, (-1, 2))
|
|
|
|
return intervals
|
|
|
|
'''
|
|
Function to compute angular gaze velocity using the a numerically stable atan2 method
|
|
Inputs: gaze_vectors: Nx3 array with N 3D gaze vectors (x,y,z),
|
|
dt: 1D array with N elements containing timestep for each gaze sample
|
|
Output: 1D array with N elements containing angular gaze velocity in degrees
|
|
'''
|
|
def compute_gaze_velocity(gaze_vectors,dt):
|
|
#get array of i+1 vectors
|
|
gaze_vectors_1 = gaze_vectors[0:-1,:]
|
|
gaze_vectors_2 = gaze_vectors[1:,:]
|
|
|
|
#numerically stable method: atan2(norm(u-v),norm(u+v)) assuming u and v are already normalized
|
|
gaze_dist = 2*np.arctan2(np.sqrt(np.square(gaze_vectors_1-gaze_vectors_2).sum(axis=1)),np.sqrt(np.square(gaze_vectors_1+gaze_vectors_2).sum(axis=1))) #2*np.arctan2(mag(u-v),mag(u+v))
|
|
gaze_velocity = np.rad2deg(gaze_dist) / dt
|
|
|
|
#make nans and infs zero
|
|
np.nan_to_num(gaze_velocity,copy=False,posinf=0.0,neginf=0.0)
|
|
|
|
#correct outliers over 800 deg/s by interpolating across good samples
|
|
gaze_velocity_good_idx = (gaze_velocity <= max_sacc_speed).nonzero()[0]
|
|
gaze_velocity = np.interp(range(gaze_velocity.shape[0]),gaze_velocity_good_idx,gaze_velocity[gaze_velocity_good_idx])
|
|
|
|
#append zero for first element, so size matches input
|
|
return np.insert(gaze_velocity,0,0.0)
|
|
|
|
'''
|
|
Function to compute angular gaze velocity using the arccos dot method
|
|
Inputs: gaze_vectors: Nx3 array with N 3D gaze vectors (x,y,z),
|
|
dt :1D array with N elements containing timestep for each gaze sample
|
|
Output: 1D array with N elements containing angular gaze velocity in degrees
|
|
'''
|
|
def compute_gaze_velocity_naive(gaze_vectors,dt):
|
|
#get array of i+1 vectors
|
|
gaze_vectors_1 = gaze_vectors[0:-1,:]
|
|
gaze_vectors_2 = gaze_vectors[1:,:]
|
|
|
|
#arc cos of dot product method, assuming vectors are normalized
|
|
gaze_dist_naive = np.arccos(np.sum(gaze_vectors_1*gaze_vectors_2, axis=1))
|
|
gaze_velocity_naive = np.rad2deg(gaze_dist_naive) / dt
|
|
|
|
#make nans and infs zero
|
|
np.nan_to_num(gaze_velocity_naive,copy=False,posinf=0.0,neginf=0.0)
|
|
|
|
#correct outliers over 800 deg/s by interpolating across good samples
|
|
gaze_velocity_naive_good_idx = (gaze_velocity_naive <= max_sacc_speed).nonzero()[0]
|
|
gaze_velocity_naive = np.interp(range(gaze_velocity_naive.shape[0]),gaze_velocity_naive_good_idx,gaze_velocity_naive[gaze_velocity_naive_good_idx])
|
|
|
|
#append zero for first element, so size matches input
|
|
return np.insert(gaze_velocity_naive,0,0.0)
|
|
|
|
'''
|
|
Function to classify gaze samples as saccade events using velocity thresholding.
|
|
Inputs: gaze_velocity: 1D array with N elements representing gaze velocity in degrees,
|
|
time_stamps: 1D array with N elements representing timestamps in seconds,
|
|
ivt_threshold: scalar in degrees, saccade samples are classified above this value
|
|
min_dur: scalar in seconds, saccades shorter than this are discarded
|
|
Output: 2 element tuple.
|
|
First element: 1D boolean array with N elements where True indiciates a sample is part of saccade, and False if not
|
|
Second element: 2D array that is Mx2, where M is the number of saccade events detected and the first column is start index, second end index
|
|
'''
|
|
def saccade_classification_ivt(gaze_velocity,time_stamps,ivt_threshold=100.0,min_dur=.012,max_dur=2.0):
|
|
#find samples over threshold
|
|
sac_bool = gaze_velocity > ivt_threshold
|
|
sac_intervals = intervals_from_bool(sac_bool)
|
|
|
|
#correct saccades shorter than minimum duration
|
|
sacc_durs = time_stamps[sac_intervals[:,1]] - time_stamps[sac_intervals[:,0]]
|
|
short_sacc_idx = np.where((sacc_durs<min_dur)|(sacc_durs>max_dur))[0]
|
|
|
|
#use short_sacc_idx to remove the bad intervals, and set those bad intervals to False in sac_bool
|
|
bad_sac_intervals = sac_intervals[short_sacc_idx,:]
|
|
|
|
sac_intervals_clean = np.delete(sac_intervals,short_sacc_idx,0)
|
|
|
|
num_bad_intervals = len(bad_sac_intervals)
|
|
for i in range(num_bad_intervals):
|
|
curr_interval = bad_sac_intervals[i,:]
|
|
sac_bool[curr_interval[0]:curr_interval[1]+1] = False
|
|
|
|
return (sac_bool,sac_intervals_clean)
|
|
|
|
'''
|
|
Function to classify gaze samples as fixation events using velocity thresholding.
|
|
Inputs: gaze_velocity: 1D array with N elements representing gaze velocity in degrees,
|
|
time_stamps: 1D array with N elements representing timestamps in seconds,
|
|
ivt_threshold: scalar in degrees, fixation samples are classified below this value
|
|
min_dur: scalar in seconds, fixations shorter than this are discarded
|
|
max_dur: scalar in secodns, fixations longer than this are discarded
|
|
Output: 2 element tuple.
|
|
First element: 1D boolean array with N elements where True indiciates a sample is part of saccade, and False if not
|
|
Second element: 2D array that is Mx2, where M is the number of saccade events detected and the first column is start index, second end index
|
|
'''
|
|
def fixation_classification_ivt(gaze_velocity,time_stamps,ivt_threshold=20.0,min_dur=0.100,max_dur=2.0):
|
|
#find samples over threshold
|
|
fix_bool = gaze_velocity < ivt_threshold
|
|
|
|
fix_intervals = intervals_from_bool(fix_bool)
|
|
|
|
#correct fixation shorter than minimum duration
|
|
fix_durs = time_stamps[fix_intervals[:,1]] - time_stamps[fix_intervals[:,0]]
|
|
short_fix_idx = np.where((fix_durs<min_dur) | (fix_durs>max_dur))[0]
|
|
#short_fix_idx = np.where(fix_durs<min_dur)[0]
|
|
|
|
#use short_fix to remove the bad intervals, and set those bad intervals to False in fix_bool
|
|
bad_fix_intervals = fix_intervals[short_fix_idx,:]
|
|
fix_intervals_clean = np.delete(fix_intervals,short_fix_idx,0)
|
|
|
|
num_bad_intervals = len(bad_fix_intervals)
|
|
for i in range(num_bad_intervals):
|
|
curr_interval = bad_fix_intervals[i,:]
|
|
fix_bool[curr_interval[0]:curr_interval[1]+1] = False
|
|
|
|
return (fix_bool,fix_intervals_clean)
|
|
'''
|
|
Compute angular dispersion over input set of 3D gaze vectors, computed based on max angular displacement
|
|
of a sample in window from the centroid.
|
|
Inputs: gaze_vectors: Nx3 array with N 3D gaze vectors (x,y,z)
|
|
Outputs: Maximum dispersion in degrees
|
|
'''
|
|
def compute_dispersion(gaze_vectors):
|
|
#get gaze_distances in angle from centroid of gaze positions
|
|
centroid = np.mean(gaze_vectors,0)
|
|
|
|
#numerically stable method: atan2(norm(u-v),norm(u+v)) assuming u and v are already normalized
|
|
gaze_dist = np.rad2deg(2*np.arctan2(np.sqrt(np.square(gaze_vectors-centroid)).sum(axis=1),np.sqrt(np.square(gaze_vectors+centroid)).sum(axis=1))) #2*np.arctan2(mag(u-v),mag(u+v))
|
|
|
|
# gaze_dist = 2*np.arctan2(np.sqrt(np.square(gaze_vectors-gaze_vectors).sum(axis=1)),np.sqrt(np.square(gaze_vectors+gaze_vectors).sum(axis=1))) #2*np.arctan2(mag(u-v),mag(u+v))
|
|
#make nans and infs zero
|
|
# print(gaze_dist)
|
|
np.nan_to_num(gaze_dist,copy=False,posinf=0.0,neginf=0.0)
|
|
|
|
|
|
#return maximum
|
|
return np.max(gaze_dist)
|
|
|
|
'''
|
|
Function to classify gaze samples as fixation events using I-DT by enforcing a maximum angular dispersion within a time window of min duration
|
|
Implementation based on: https://github.com/ecekt/eyegaze/blob/master/gaze.py and https://github.com/ASAPLableni/VR-centred_I-DT_algorithm
|
|
Inputs: gaze_vectors: Nx3 array with N 3D gaze vectors (x,y,z)
|
|
time_stamps: 1D array with N elements representing timestamps in seconds,
|
|
sac_bool: 1D boolean array indicating which samples were marked as saccades from I-VT, used to ensure samples do not have more than one label
|
|
min_dur: scalar in seconds, fixations shorter than this are discarded
|
|
max_dur: scalar in secodns, fixations longer than this are discarded
|
|
max_disp: scalar in degrees, maximum spatial dispersion allowed within a time window for a fixation
|
|
window_size: how big of window to bin samples within when classifying dispersion
|
|
Output: 2 element tuple.
|
|
First element: 1D boolean array with N elements where True indiciates a sample is part of saccade, and False if not
|
|
Second element: 2D array that is Mx2, where M is the number of saccade events detected and the first column is start index, second end index
|
|
'''
|
|
def fixation_classification_idt(gaze_vectors,time_stamps,sac_bool,min_dur=0.100,max_dur=2.0,max_disp=1.0):
|
|
fix_bool = np.zeros(sac_bool.shape, dtype=bool)
|
|
window_range = [0,0]
|
|
|
|
current = 0 #pointer to represent the current beginning point of the window
|
|
last = 0
|
|
fix_idx = []
|
|
|
|
while (current < len(gaze_vectors)):
|
|
t0 = time_stamps[current] #beginning time
|
|
t1 = t0 + min_dur #time after a min. fix. threshold has been observed
|
|
|
|
for r in range(current, len(gaze_vectors)):
|
|
if(time_stamps[r]>= t0 and time_stamps[r]<= t1):
|
|
last = r
|
|
elif time_stamps[r] > t1:
|
|
break
|
|
|
|
window_range = [current,last]
|
|
|
|
#now check the dispersion in this window
|
|
dispersion = compute_dispersion(gaze_vectors[current:last+1,:])
|
|
|
|
if (dispersion <= max_disp):
|
|
#add new points
|
|
while(dispersion <= max_disp and last + 1 < len(gaze_vectors)):
|
|
last += 1
|
|
window_range = [current,last]
|
|
#print current, last, "*"
|
|
#print "*"
|
|
dispersion = compute_dispersion(gaze_vectors[current:last+1,:])
|
|
|
|
#dispersion threshold is exceeded
|
|
#fixation at the centroid [current,last]
|
|
fix_bool[current:last+1] = True
|
|
|
|
current = last + 1 #this will move the pointer to a novel window
|
|
|
|
else:
|
|
current += 1
|
|
last = current
|
|
|
|
#correct with saccade bool, find points marked as both, change to False in fix_bool
|
|
conflict_idx = np.where((fix_bool == True) & (sac_bool == True))[0]
|
|
fix_bool[conflict_idx] = False
|
|
|
|
fix_intervals = intervals_from_bool(fix_bool)
|
|
|
|
#correct fixation shorter than minimum duration or longer than max
|
|
fix_durs = time_stamps[fix_intervals[:,1]] - time_stamps[fix_intervals[:,0]]
|
|
short_fix_idx = np.where((fix_durs<min_dur) | (fix_durs>max_dur))[0]
|
|
|
|
#use short_fix to remove the bad intervals, and set those bad intervals to False in fix_bool
|
|
bad_fix_intervals = fix_intervals[short_fix_idx,:]
|
|
fix_intervals_clean = np.delete(fix_intervals,short_fix_idx,0)
|
|
|
|
num_bad_intervals = len(bad_fix_intervals)
|
|
for i in range(num_bad_intervals):
|
|
curr_interval = bad_fix_intervals[i,:]
|
|
fix_bool[curr_interval[0]:curr_interval[1]+1] = False
|
|
|
|
return (fix_bool,fix_intervals_clean)
|
|
|
|
'''
|
|
Summary: Compute a continous time series for feature values by averaging over events in the last N seconds
|
|
Inputs: features: 2D array with rows corresponding to events, and columns corresponding to feature values
|
|
fixation_times: 1D array containing the timestamps corresponding to each row of fixation_times
|
|
time_stamps: 1D array representing every sample in the time series, containing times in seconds
|
|
N: Scalar indicating the number of seconds to consider past events within for computing average feature values
|
|
Output: 2D array, with rows corresponding to each sample in time_stamps and a column for each feature signal.
|
|
'''
|
|
def compute_feature_signal_time_avg(features,fixation_times,time_stamps,N):
|
|
num_features = features.shape[1]
|
|
num_rows = len(time_stamps)
|
|
feature_signal = np.zeros((num_rows,num_features),dtype=float)
|
|
|
|
#loop over each time stamp, grab which elements have time greater than current time -2 but less than current time, average, add to row
|
|
for i in range(num_rows):
|
|
curr_time = time_stamps[i]
|
|
|
|
#get events N seconds prior to curr_time
|
|
curr_events = features[(fixation_times>=curr_time-N) & (fixation_times<=curr_time),:]
|
|
|
|
curr_row = np.zeros((1,num_features),dtype=float)
|
|
if len(curr_events) >0:
|
|
curr_row = np.mean(curr_events,0)
|
|
feature_signal[i,:] = curr_row
|
|
|
|
return feature_signal
|
|
|
|
'''
|
|
Summary: Compute a continous time series for feature values by averaging over the past N events
|
|
Inputs: features: 2D array with rows corresponding to events, and columns corresponding to feature values
|
|
fixation_times: 1D array containing the timestamps corresponding to each row of fixation_times
|
|
time_stamps: 1D array representing every sample in the time series, containing times in seconds
|
|
N: Scalar indicating the number of past events to consider when computing average feature values
|
|
Output: 2D array, with rows corresponding to each sample in time_stamps and a column for each feature signal.
|
|
'''
|
|
def compute_feature_signal_event_avg(features,fixation_times,time_stamps,N):
|
|
num_features = features.shape[1]
|
|
num_rows = len(time_stamps)
|
|
feature_signal = np.zeros((num_rows,num_features),dtype=float)
|
|
|
|
#loop over each time stamp, grab past N elements, average, add to row
|
|
for i in range(num_rows):
|
|
curr_time = time_stamps[i]
|
|
|
|
#get events prior to curr_time
|
|
curr_events = features[(fixation_times<=curr_time),:]
|
|
|
|
#grab last N of them if possible
|
|
prior_events = curr_events[-N:,:]
|
|
|
|
curr_row = np.zeros((1,num_features),dtype=float)
|
|
if len(prior_events) >0:
|
|
curr_row = np.mean(prior_events,0)
|
|
feature_signal[i,:] = curr_row
|
|
|
|
return feature_signal
|
|
|
|
#wrapper function to call feature signal methods
|
|
def compute_feature_signal(method,features,event_times,time_stamps,N):
|
|
if method == 'time_avg':
|
|
return compute_feature_signal_time_avg(features,event_times,time_stamps,N)
|
|
elif method == 'event_avg':
|
|
return compute_feature_signal_event_avg(features,event_times,time_stamps,N)
|
|
elif method == 'event_discrete':
|
|
return compute_feature_signal_event_discrete(features,event_times,time_stamps,N)
|
|
else:
|
|
return None
|
|
|
|
#returns three column np array (x,y,z) containing head corrected gaze vectors
|
|
def head_correction(df,param):
|
|
gaze_vectors = df[['combined.gazeDirection.x','combined.gazeDirection.y','combined.gazeDirection.z']].values
|
|
|
|
num_rows = np.size(gaze_vectors,0)
|
|
transformed_gaze_vectors = np.zeros(gaze_vectors.shape)
|
|
|
|
if param.transform_type == 'coordinate_frame':
|
|
#transform vectors
|
|
head_pos = df[['head.pos.x','head.pos.y','head.pos.z']].values
|
|
head_up = df[['head.right.x','head.right.y','head.right.z']].values
|
|
head_right = df[['head.up.x','head.up.y','head.up.z']].values
|
|
head_dir = df[['head.dir.x','head.dir.y','head.dir.z']].values
|
|
|
|
for i in range(num_rows):
|
|
#get current gaze vector and transforms
|
|
gaze_vector = gaze_vectors[i,:]
|
|
curr_head_up = head_up[i,:]
|
|
curr_head_right = head_right[i,:]
|
|
curr_head_dir = head_dir[i,:]
|
|
|
|
'''Transformation matrix of the form:
|
|
[ R.x, R.y, R.z, 0,
|
|
U.x, U.y, U.z, 0,
|
|
D.x, D.y, D.z, 0,
|
|
0, 0, 0, 1 ]
|
|
'''
|
|
#make transformation matrix
|
|
transform_mat = np.ma.row_stack((np.append(curr_head_right ,0),\
|
|
np.append(curr_head_up ,0),\
|
|
np.append(curr_head_dir,0),\
|
|
np.array([0.0, 0.0, 0.0, 1.0])))
|
|
|
|
|
|
#add homogenous coordinate to vector
|
|
gaze_vector = np.append(gaze_vector,1.0)
|
|
|
|
#right-multiply row vector with transpose of matrix as defined above
|
|
transformed_gaze_vector = np.matmul(gaze_vector,transform_mat.T)
|
|
|
|
#place transformed gaze_vector back into gaze_vectors w/o homogenous coordinate
|
|
transformed_gaze_vectors[i,:] = transformed_gaze_vector[0:3]
|
|
|
|
#normalize all rows again to account for any rounding errors
|
|
row_norm_vals = np.sqrt(np.square(transformed_gaze_vectors).sum(axis=1))
|
|
transformed_gaze_vectors = transformed_gaze_vectors/row_norm_vals[:,None]
|
|
|
|
elif param.transform_type == 'quaternions':
|
|
q_x = df['RotationX'].values
|
|
q_y = df['RotationY'].values
|
|
q_z = df['RotationZ'].values
|
|
q_w = df['RotationW'].values
|
|
|
|
for i in range(num_rows):
|
|
#get current gaze vector and transforms
|
|
gaze_vector = gaze_vectors[i,:]
|
|
|
|
curr_q_x = q_x[i]
|
|
curr_q_y = q_y[i]
|
|
curr_q_z = q_z[i]
|
|
curr_q_w = q_w[i]
|
|
|
|
# first column
|
|
one_one = np.square(curr_q_w) + np.square(curr_q_x) - np.square(curr_q_y) - np.square(curr_q_z)
|
|
two_one = 2 * (curr_q_w*curr_q_z + curr_q_x*curr_q_y)
|
|
three_one = 2 * (curr_q_x*curr_q_z - curr_q_w*curr_q_y)
|
|
|
|
# second column
|
|
one_two = 2* (curr_q_x*curr_q_y - curr_q_w*curr_q_z)
|
|
two_two = np.square(curr_q_w) + np.square(curr_q_x) - np.square(curr_q_y) - np.square(curr_q_z)
|
|
three_two = 2* (curr_q_w*curr_q_x - curr_q_y*curr_q_z)
|
|
|
|
# third column
|
|
one_three = 2* (curr_q_w*curr_q_y - curr_q_x*curr_q_z)
|
|
two_three = 2* (curr_q_y*curr_q_z - curr_q_w*curr_q_x)
|
|
three_three = np.square(curr_q_w) + np.square(curr_q_x) - np.square(curr_q_y) - np.square(curr_q_z)
|
|
|
|
# compute rotation matrix as per Diaz et al.
|
|
rot_mat = np.array([[one_one, two_one, three_one],\
|
|
[one_two, two_two, three_two],\
|
|
[one_three, two_three, three_three]])
|
|
|
|
transformed_gaze_vectors[i,:] = np.matmul(rot_mat,gaze_vector)
|
|
|
|
#normalize all rows again to account for any rounding errors
|
|
row_norm_vals = np.sqrt(np.square(transformed_gaze_vectors).sum(axis=1))
|
|
transformed_gaze_vectors = transformed_gaze_vectors/row_norm_vals[:,None]
|
|
|
|
return transformed_gaze_vectors
|
|
|
|
def window_signal_overlap(X, y, window_size=10):
|
|
M, N = X.shape
|
|
windows = np.array([])
|
|
lbls = []
|
|
|
|
FP_indxs = np.where(y==1)[0]
|
|
TP_indxs = np.where(y==-1)[0]
|
|
combined = np.hstack((FP_indxs, TP_indxs))
|
|
|
|
for i in combined:
|
|
window = np.array([])
|
|
starts = range(max(0, i-window_size+1), i+1)
|
|
ends = range(i+1, min(M, i+window_size))
|
|
|
|
for (s, e) in zip(starts, ends):
|
|
window = X[s:e]
|
|
curr_size = window.shape[0]
|
|
window = window.reshape(-1, curr_size*N)
|
|
|
|
# window is clipped from front or back, add zero padding
|
|
if curr_size < window_size:
|
|
padding_size = window_size*N - curr_size*N
|
|
padding = np.zeros(shape=(1,padding_size))
|
|
if i-window_size < 0:
|
|
window = np.hstack((padding,window))
|
|
else:
|
|
window = np.hstack((window, padding))
|
|
|
|
if windows.shape[0] == 0:
|
|
windows = window
|
|
else:
|
|
windows = np.vstack((windows, window))
|
|
lbl = y[i]
|
|
lbls.append(lbl)
|
|
|
|
|
|
lbls = np.array(lbls, dtype=int).reshape(-1,1)
|
|
return (windows, lbls)
|
|
|
|
|
|
def window_signal_center(X, y, window_size=10):
|
|
M, N = X.shape
|
|
windows = np.array([])
|
|
lbls = []
|
|
|
|
FP_indxs = np.where(y==1)[0]
|
|
TP_indxs = np.where(y==-1)[0]
|
|
combined = np.hstack((FP_indxs, TP_indxs))
|
|
|
|
for i in combined:
|
|
start = max(0, i-int(window_size/2))
|
|
end = min(M, i+int(window_size/2))
|
|
|
|
window = X[start:end, :]
|
|
curr_size = window.shape[0]
|
|
window = window.reshape(-1, curr_size*N)
|
|
|
|
# window is clipped from front or back, add zero padding
|
|
if curr_size < window_size:
|
|
padding_size = window_size*N - curr_size*N
|
|
padding = np.zeros(shape=(1,padding_size))
|
|
if i-int(window_size/2) < 0:
|
|
window = np.hstack((padding,window))
|
|
else:
|
|
window = np.hstack((window, padding))
|
|
|
|
if windows.shape[0] == 0:
|
|
windows = window
|
|
else:
|
|
windows = np.vstack((windows, window))
|
|
lbl = y[i]
|
|
lbls.append(lbl)
|
|
|
|
lbls = np.array(lbls, dtype=int).reshape(-1,1)
|
|
return (windows, lbls)
|
|
|
|
def window_signal_start(X, y, window_size=10):
|
|
M, N = X.shape
|
|
windows = np.array([])
|
|
lbls = []
|
|
|
|
FP_indxs = np.where(y==1)[0]
|
|
TP_indxs = np.where(y==-1)[0]
|
|
combined = np.hstack((FP_indxs, TP_indxs))
|
|
|
|
for i in combined:
|
|
start = i
|
|
end = min(M, i+window_size)
|
|
|
|
window = X[start:end, :]
|
|
curr_size = window.shape[0]
|
|
window = window.reshape(-1, curr_size*N)
|
|
|
|
# window is clipped from front or back, add zero padding
|
|
if curr_size < window_size:
|
|
padding_size = window_size*N - curr_size*N
|
|
padding = np.zeros(shape=(1,padding_size))
|
|
window = np.hstack((window, padding))
|
|
|
|
if windows.shape[0] == 0:
|
|
windows = window
|
|
else:
|
|
windows = np.vstack((windows, window))
|
|
lbl = y[i]
|
|
lbls.append(lbl)
|
|
|
|
lbls = np.array(lbls, dtype=int).reshape(-1,1)
|
|
return (windows, lbls)
|
|
|
|
def window_signal(X, y, mode='center', window_size=10):
|
|
if mode == 'center':
|
|
return window_signal_center(X, y, window_size=window_size)
|
|
elif mode == 'start':
|
|
return window_signal_start(X, y, window_size=window_size)
|
|
elif mode == 'overlap':
|
|
return window_signal_overlap(X, y, window_size=window_size)
|
|
|
|
def get_header(features, window_size):
|
|
header = ""
|
|
for i in range(window_size):
|
|
h = ['%s_%d'%(f, i) for f in features]
|
|
h = ",".join(h)
|
|
if not len(header):
|
|
header = h
|
|
continue
|
|
header = '%s,%s'%(header, h)
|
|
header = '%s,%s'%(header, 'label')
|
|
return header
|
|
|
|
def is_discrete_feat(feat):
|
|
isdisc = False
|
|
for f in discrete_feats:
|
|
if f in feat:
|
|
isdisc = True
|
|
break
|
|
return isdisc
|
|
|
|
def read_data(path):
|
|
files = glob.glob(os.path.join(path, '*'))
|
|
y = []
|
|
X = []
|
|
subjs = []
|
|
discrete_cols = []
|
|
cols = []
|
|
|
|
for file in files:
|
|
subj = file.split(os.sep)[-1]
|
|
subj = subj.split('.')[0]
|
|
subj = subj.split('_')[0]
|
|
|
|
if subj in subjs_skip:
|
|
continue
|
|
|
|
data = pd.read_csv(file)
|
|
|
|
if not len(discrete_cols):
|
|
cols = data.columns
|
|
discrete_cols = []
|
|
for i in range(len(cols)):
|
|
if is_discrete_feat(cols[i]):
|
|
discrete_cols.append(cols[i])
|
|
|
|
|
|
if not len(y):
|
|
y = data['label'].to_numpy().reshape(-1, 1)
|
|
else:
|
|
y = np.vstack((y, data['label'].to_numpy().reshape(-1,1)))
|
|
|
|
curr_X = data[data.columns[data.columns != 'label']].to_numpy()
|
|
|
|
if not len(X):
|
|
X = curr_X
|
|
else:
|
|
X = np.vstack((X, curr_X))
|
|
|
|
M = curr_X.shape[0]
|
|
curr_subjs = np.repeat(subj, M).reshape(-1, 1)
|
|
if not len(subjs):
|
|
subjs = curr_subjs
|
|
else:
|
|
subjs = np.vstack((subjs, curr_subjs))
|
|
|
|
return (X, y, subjs, discrete_cols, cols)
|
|
|
|
|
|
def read_feat_data(path):
|
|
''' Reads feature data in director 'path' and stores them in PD data frame'''
|
|
files = glob.glob(os.path.join(path, '*'))
|
|
|
|
combined = pd.DataFrame()
|
|
for file in files:
|
|
data = pd.read_csv(file, dtype={'selection_type': 'str'}, index_col=0)
|
|
|
|
filename = file.split(os.sep)[-1]
|
|
filename = filename.split('.')[0]
|
|
subj = filename.split('_')[0]
|
|
data['sbj'] = subj
|
|
block = filename.split('_')[1]
|
|
data['block'] = block
|
|
|
|
combined = combined.append(data)
|
|
|
|
return combined
|
|
|
|
def read_window_data(main_dir, MODE, WINDOW_SIZE, data_type='train'):
|
|
data_dir = os.path.join(main_dir, 'w_%s_%d'%(MODE, WINDOW_SIZE))
|
|
data_dir = os.path.join(data_dir, data_type)
|
|
X, y, subjs, discrete_cols, cols = read_data(main_dir)
|
|
return X, y, subjs, discrete_cols, cols
|
|
|
|
|
|
def normalize_subj_data(X, y, subjs, exclude_idxs):
|
|
X_out, y_out = [], []
|
|
sbjs = np.unique(subjs)
|
|
for sbj in sbjs:
|
|
subj_idxs = np.where(subjs==sbj)[0]
|
|
X_subj, y_subj = X[subj_idxs], y[subj_idxs]
|
|
|
|
if X_subj.shape[0] < 1:
|
|
continue
|
|
|
|
if len(exclude_idxs):
|
|
mask = ~np.isin(np.arange(X.shape[1]), exclude_idxs)
|
|
X_subj[:,mask] = scale(X_subj[:,mask])
|
|
else:
|
|
X_Subj = scale(X_subj)
|
|
|
|
if not len(X_out):
|
|
X_out = X_subj
|
|
y_out = y_subj
|
|
else:
|
|
X_out = np.vstack((X_out, X_subj))
|
|
y_out = np.vstack((y_out, y_subj))
|
|
return X_out, y_out
|
|
|
|
def get_feat_indxs(columns, query_cols):
|
|
indxs = []
|
|
for i in range(len(columns)):
|
|
c = columns[i]
|
|
c = c.split('_')
|
|
c.pop()
|
|
c = '_'.join(c)
|
|
if c in query_cols:
|
|
indxs.append(i)
|
|
return indxs
|
|
|
|
def filter_features(featfilepath, X, headers, count_thresh=1):
|
|
feat_counter = pd.read_csv(featfilepath,squeeze=True)
|
|
|
|
imp_indxs = np.where(feat_counter['num']>count_thresh)[0]
|
|
important_feats = feat_counter['feat'].values[imp_indxs]
|
|
indxs = get_feat_indxs(headers, important_feats)
|
|
X = X[:,indxs]
|
|
return X
|
|
|
|
|
|
def compute_feature_signal_time_avg(features,fixation_times,time_stamps,N):
|
|
num_features = features.shape[1]
|
|
num_rows = len(time_stamps)
|
|
feature_signal = np.zeros((num_rows,num_features),dtype=float)
|
|
|
|
#loop over each time stamp, grab which elements have time greater than current time -2 but less than current time, average, add to row
|
|
for i in range(num_rows):
|
|
curr_time = time_stamps[i]
|
|
|
|
#get events N seconds prior to curr_time
|
|
curr_events = features[(fixation_times>=curr_time-N) & (fixation_times<=curr_time),:]
|
|
|
|
curr_row = np.zeros((1,num_features),dtype=float)
|
|
if len(curr_events) >0:
|
|
curr_row = np.mean(curr_events,0)
|
|
feature_signal[i,:] = curr_row
|
|
|
|
return feature_signal
|
|
|
|
'''
|
|
Summary: Compute a continous time series for feature values by averaging over the past N events
|
|
Inputs: features: 2D array with rows corresponding to events, and columns corresponding to feature values
|
|
fixation_times: 1D array containing the timestamps corresponding to each row of fixation_times
|
|
time_stamps: 1D array representing every sample in the time series, containing times in seconds
|
|
N: Scalar indicating the number of past events to consider when computing average feature values
|
|
Output: 2D array, with rows corresponding to each sample in time_stamps and a column for each feature signal.
|
|
'''
|
|
def compute_feature_signal_event_avg(features,fixation_times,time_stamps,N):
|
|
num_features = features.shape[1]
|
|
num_rows = len(time_stamps)
|
|
feature_signal = np.zeros((num_rows,num_features),dtype=float)
|
|
|
|
#loop over each time stamp, grab past N elements, average, add to row
|
|
for i in range(num_rows):
|
|
curr_time = time_stamps[i]
|
|
|
|
#get events prior to curr_time
|
|
curr_events = features[(fixation_times<=curr_time),:]
|
|
|
|
#grab last N of them if possible
|
|
prior_events = curr_events[-N:,:]
|
|
|
|
curr_row = np.zeros((1,num_features),dtype=float)
|
|
if len(prior_events) >0:
|
|
curr_row = np.mean(prior_events,0)
|
|
feature_signal[i,:] = curr_row
|
|
|
|
return feature_signal
|
|
|
|
#wrapper function to call feature signal methods
|
|
def compute_feature_signal(method,features,event_times,time_stamps,N):
|
|
if method == 'time_avg':
|
|
return compute_feature_signal_time_avg(features,event_times,time_stamps,N)
|
|
elif method == 'event_avg':
|
|
return compute_feature_signal_event_avg(features,event_times,time_stamps,N)
|
|
elif method == 'event_discrete':
|
|
return compute_feature_signal_event_discrete(features,event_times,time_stamps,N)
|
|
else:
|
|
return None
|
|
|
|
def get_header(features, window_size):
|
|
header = ""
|
|
for i in range(window_size):
|
|
h = ['%s_%d'%(f, i) for f in features]
|
|
h = ",".join(h)
|
|
if not len(header):
|
|
header = h
|
|
continue
|
|
header = '%s,%s'%(header, h)
|
|
header = '%s,%s'%(header, 'label')
|
|
return header
|