import numpy as np import cv2 from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA from scipy import spatial from itertools import combinations import time import sys sys.path.append("C://Users//xbrjos//Desktop//Python") from gepard.analysis.particleContainer import ParticleContainer from gepard.analysis import particleAndMeasurement as pm from methods import SubsamplingMethod def get_pca(data: np.ndarray, numComp: int = 2) -> np.ndarray: standardizedData = StandardScaler().fit_transform(data) pca = PCA(n_components=numComp) princComp: np.ndarray = pca.fit_transform(np.transpose(standardizedData)) return princComp class ChemometricSubsampling(SubsamplingMethod): def __init__(self, particleContainer: ParticleContainer, desiredFraction: float): super(ChemometricSubsampling, self).__init__(particleContainer, desiredFraction) def label(self) -> str: return 'Chemometric Selection' def apply_subsampling_method(self) -> list: return [] def _get_particle_featurematrix(self) -> np.ndarray: vectors: list = [] for particle in self.particleContainer.particles: extractor: FeatureExtractor = FeatureExtractor(particle) vectors.append(extractor.get_characteristic_vector()) vectors: np.ndarray = np.transpose(np.array(vectors)) return vectors class FeatureExtractor(object): def __init__(self, particle: pm.Particle): super(FeatureExtractor, self).__init__() self.particle: pm.Particle = particle def get_characteristic_vector(self) -> np.ndarray: return self._get_log_hu_moments() def _get_log_hu_moments(self) -> np.ndarray: moments: dict = cv2.moments(self.particle.contour) resultMoments: np.ndarray = np.zeros((7, 1)) for index, mom in enumerate(cv2.HuMoments(moments)): if mom != 0: resultMoments[index] = -1 * np.copysign(1.0, mom) * np.log10(abs(mom)) else: resultMoments[index] = 0 return resultMoments[:, 0] class KennardStone(object): def __init__(self, data: np.ndarray, desiredFraction: float): super(KennardStone, self).__init__() self.data: np.ndarray = data self.fraction: float = desiredFraction def get_sampled_indices(self) -> list: t0 = time.time() selectedIndices: set = set([]) numIndices: int = round(self.data.shape[0] * self.fraction) if numIndices < 2: raise ValueError(f'Not enough indices to generate (min = 2), requested: {numIndices}') else: distMat = spatial.distance_matrix(self.data, self.data) i, j = np.unravel_index(distMat.argmax(), distMat.shape) remainingIndices: set = set(np.arange(self.data.shape[0])) selectedIndices.add(i) selectedIndices.add(j) remainingIndices.remove(i) remainingIndices.remove(j) for _ in range(numIndices-2): minDist = 0.0 for j in remainingIndices: dist = np.min([distMat[j][i] for i in selectedIndices]) if dist > minDist: minj = j minDist = dist selectedIndices.add(minj) remainingIndices.remove(minj) assert len(np.unique(list(selectedIndices))) == len(selectedIndices) print('selecting indices time:', np.round(time.time() - t0, 2), 'seconds') return list(selectedIndices) # def get_sampled_indices(self) -> list: # t0 = time.time() # numIndices: int = round(self.data.shape[0] * self.fraction) # if numIndices < 2: # raise ValueError(f'Not enough indices to generate (min = 2), requested: {numIndices}') # else: # startInd = self._get_start_indices() # selectedPoints = np.zeros((numIndices, 2)) # selectedPoints[0, :] = self.data[startInd[0], :] # selectedPoints[1, :] = self.data[startInd[1], :] # # if numIndices > 2: # data: np.ndarray = np.delete(self.data, startInd, 0) # for i in range(numIndices-2): # newIndex: int = self._get_point_furthest_from_other_points(selectedPoints[:i+2, :], data) # selectedPoints[i+2, :] = data[newIndex, :] # data = np.delete(data, newIndex, 0) # # selectedIndices = [] # assert numIndices == selectedPoints.shape[0] # for i in range(numIndices): # newInd = np.where(self.data == selectedPoints[i])[0][0] # selectedIndices.append(newInd) # # assert len(np.unique(selectedIndices)) == len(selectedIndices) # print('selecting indices time:', np.round(time.time()-t0, 2), 'seconds') # return selectedIndices def _get_start_indices(self) -> list: """ Finds the indices of the points that are furthest apart. Adapted from https://stackoverflow.com/questions/50468643/finding-two-most-far-away-points-in-plot-with-many-points-in-python/50469147 :return: """ assert self.data.shape[1] == 2 candidates = self.data[spatial.ConvexHull(self.data).vertices] dist_mat = spatial.distance_matrix(candidates, candidates) i, j = np.unravel_index(dist_mat.argmax(), dist_mat.shape) index1 = np.where(self.data == candidates[i])[0][0] index2 = np.where(self.data == candidates[j])[0][0] assert index1 != index2 return sorted([index1, index2]) # def _get_point_furthest_from_other_points(self, refPoints: np.ndarray, otherPoints: np.ndarray) -> int: # assert refPoints.shape[1] == 2 # assert otherPoints.shape[1] == 2 # dist_mat = spatial.distance_matrix(refPoints, otherPoints) # i, index = np.unravel_index(dist_mat.argmax(), dist_mat.shape) # return index class BoundingAreaHierarchy(object): def __init__(self, points: np.ndarray): super(BoundingAreaHierarchy, self).__init__() self.points: np.ndarray = points self.tree: BAHNode = BAHNode(points) self._populate_tree() # def _populate_tree(self) -> None: # # class BAHNode(object): # def __init__(self, parent, startxy: tuple, width: float, height: float, points: np.ndarray) -> None: # super(BAHNode, self).__init__() # self.maxPointsPerNode: int = 10 # self.parent: BAHNode = parent # self.children: list = [] # if empty, we reached the lowest node level # self.points: np.ndarray = np.array([]) # if empty, we are not at the lowest level and have to check children # self.x0: float = startxy[0] # self.y0: float = startxy[1] # self.width: float = width # self.height: float = height # self.x1: float = self.x0 + width # self.y1: float = self.y0 + height # self._create_child_nodes(points) # # def _create_child_nodes(self, points:np.ndarray): # if points.shape[0] > 0: # avoid testing for children in case of a testrun (empty array is provided as points) # pointsInNode: np.ndarray = self._get_points_in_area(points) # if pointsInNode.shape[0] > self.maxPointsPerNode: # childWidth: float = self.width/2 # childHeight:float = self.height/2 # # self.children.append(BAHNode(self, (self.x0, self.y1), # childWidth, childHeight, pointsInNode)) # # self.children.append(BAHNode(self, (self.x0 + childWidth, self.y1), # childWidth, childHeight, pointsInNode)) # # self.children.append(BAHNode(self, (self.x0, self.y1 + childHeight), # childWidth, childHeight, pointsInNode)) # # self.children.append(BAHNode(self, (self.x0 + childWidth, self.y1 + childHeight), # childWidth, childHeight, pointsInNode)) # # # def _get_points_in_area(self, points:np.ndarray) -> np.ndarray: # assert points.shape[1] == 2 # cond1: np.ndarray = np.logical_and(points[:, 0] >= self.x0, points[:, 0] < self.x1) # cond2: np.ndarray = np.logical_and(points[:, 1] >= self.y0, points[:, 1] < self.y1) # return points[np.logical_and(cond1, cond2)]