Commit ee19bc2c authored by Josef Brandt's avatar Josef Brandt

Kennard Stone Optimization, but now bug??

parent 5d22e3c7
...@@ -24,16 +24,18 @@ class ChemometricSubsampling(SubsamplingMethod): ...@@ -24,16 +24,18 @@ class ChemometricSubsampling(SubsamplingMethod):
def __init__(self, particleContainer: ParticleContainer, desiredFraction: float): def __init__(self, particleContainer: ParticleContainer, desiredFraction: float):
super(ChemometricSubsampling, self).__init__(particleContainer, desiredFraction) super(ChemometricSubsampling, self).__init__(particleContainer, desiredFraction)
@property
def label(self) -> str: def label(self) -> str:
return 'Chemometric Selection' return 'Chemometric Selection'
def apply_subsampling_method(self) -> list: def apply_subsampling_method(self) -> list:
vectors: np.ndarray = self._get_particle_featurematrix() vectors: np.ndarray = self._get_particle_featurematrix()
kennardStone: KennardStone = KennardStone(vectors, self.fraction) princComps: np.ndarray = get_pca(vectors)
kennardStone: KennardStone = KennardStone(princComps, self.fraction)
indices: list = kennardStone.get_sampled_indices() indices: list = kennardStone.get_sampled_indices()
selectedParticles: list = [] selectedParticles: list = []
for particle in self.particleContainer.particles: for particle in self.particleContainer.particles:
if particle.index in selectedParticles: if particle.index in indices:
selectedParticles.append(particle) selectedParticles.append(particle)
return selectedParticles return selectedParticles
...@@ -46,6 +48,12 @@ class ChemometricSubsampling(SubsamplingMethod): ...@@ -46,6 +48,12 @@ class ChemometricSubsampling(SubsamplingMethod):
vectors: np.ndarray = np.transpose(np.array(vectors)) vectors: np.ndarray = np.transpose(np.array(vectors))
return vectors return vectors
def equals(self, otherMethod) -> bool:
equals: bool = False
if type(otherMethod) == ChemometricSubsampling and otherMethod.fraction == self.fraction:
equals = True
return equals
class FeatureExtractor(object): class FeatureExtractor(object):
def __init__(self, particle: pm.Particle): def __init__(self, particle: pm.Particle):
...@@ -78,9 +86,10 @@ class KennardStone(object): ...@@ -78,9 +86,10 @@ class KennardStone(object):
Adapted from https://github.com/karoka/Kennard-Stone-Algorithm/blob/master/kenStone.py Adapted from https://github.com/karoka/Kennard-Stone-Algorithm/blob/master/kenStone.py
:return""" :return"""
t0 = time.time() t0 = time.time()
numIndices: int = round(self.data.shape[0] * self.fraction) assert self.data.shape[1] == 2
if numIndices < 2: numIndices: int = int(np.ceil(self.data.shape[0] * self.fraction))
raise ValueError(f'Not enough indices to generate (min = 2), requested: {numIndices}') if numIndices == 1:
selectedIndices = [int(round(self.data.shape[0]/2))]
else: else:
distMat = spatial.distance_matrix(self.data, self.data) distMat = spatial.distance_matrix(self.data, self.data)
...@@ -88,7 +97,7 @@ class KennardStone(object): ...@@ -88,7 +97,7 @@ class KennardStone(object):
selectedIndices = find_furthest_indices(distMat, int(numIndices), i, j) selectedIndices = find_furthest_indices(distMat, int(numIndices), i, j)
assert len(np.unique(list(selectedIndices))) == len(selectedIndices) assert len(np.unique(list(selectedIndices))) == len(selectedIndices)
print('selecting indices time:', np.round(time.time() - t0, 2), 'seconds') print(f'selecting indices time: {np.round(time.time() - t0, 2)} seconds ({numIndices} out of {self.data.shape[0]})')
return list(selectedIndices) return list(selectedIndices)
def _get_start_indices(self) -> list: def _get_start_indices(self) -> list:
......
...@@ -9,20 +9,25 @@ ctypedef np.int64_t INT64_t ...@@ -9,20 +9,25 @@ ctypedef np.int64_t INT64_t
def find_furthest_indices(np.ndarray[DTYPE_t, ndim=2] distMat, int numIndices, int index0, int index1): def find_furthest_indices(np.ndarray[DTYPE_t, ndim=2] distMat, int numIndices, int index0, int index1):
cdef int i, j cdef int i, j, jidx, kidx
cdef double dist, minDist, curDist cdef double dist, minDist, curDist
cdef np.ndarray[INT32_t, ndim=1] selectedIndices = np.empty(numIndices, dtype=np.int32) cdef np.ndarray[INT32_t, ndim=1] selectedIndices = np.empty(numIndices, dtype=np.int32)
cdef np.ndarray[INT32_t, ndim=1] remainingIndices = np.arange(numIndices, dtype=np.int32) cdef np.ndarray[INT32_t, ndim=1] remainingIndices = np.arange(numIndices, dtype=np.int32)
cdef int numSelectedIndices = 2
cdef int numRemainingIndices = distMat.shape[0]-2
selectedIndices[0] = index0 selectedIndices[0] = index0
selectedIndices[1] = index1 selectedIndices[1] = index1
for i in range(numIndices-2): for i in range(numIndices-2):
minDist = 0.0 minDist = 0.0
for j in remainingIndices: for jidx in range(remainingIndices.shape[0]):
dist = np.inf j = remainingIndices[jidx]
dist = 1E6
for k in selectedIndices[:i+1]: for kidx in range(numSelectedIndices):
curDist = distMat[j][k] k = selectedIndices[kidx]
curDist = distMat[j, k]
if curDist < dist: if curDist < dist:
dist = curDist dist = curDist
...@@ -31,6 +36,7 @@ def find_furthest_indices(np.ndarray[DTYPE_t, ndim=2] distMat, int numIndices, i ...@@ -31,6 +36,7 @@ def find_furthest_indices(np.ndarray[DTYPE_t, ndim=2] distMat, int numIndices, i
minDist = dist minDist = dist
selectedIndices[i+2] = minj selectedIndices[i+2] = minj
numSelectedIndices += 1
remainingIndices = remainingIndices[remainingIndices!=minj] remainingIndices = remainingIndices[remainingIndices!=minj]
return selectedIndices return selectedIndices
\ No newline at end of file
...@@ -14,6 +14,7 @@ import matplotlib.pyplot as plt ...@@ -14,6 +14,7 @@ import matplotlib.pyplot as plt
from helpers import ParticleBinSorter from helpers import ParticleBinSorter
import methods as meth import methods as meth
import geometricMethods as gmeth import geometricMethods as gmeth
import chemometricMethods as cmeth
sys.path.append("C://Users//xbrjos//Desktop//Python") sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset from gepard import dataset
...@@ -25,7 +26,7 @@ def get_name_from_directory(dirPath: str) -> str: ...@@ -25,7 +26,7 @@ def get_name_from_directory(dirPath: str) -> str:
class TotalResults(object): class TotalResults(object):
methods: list = [meth.RandomSampling, meth.SizeBinFractioning, gmeth.CrossBoxSubSampling, methods: list = [meth.RandomSampling, meth.SizeBinFractioning, gmeth.CrossBoxSubSampling,
gmeth.SpiralBoxSubsampling] gmeth.SpiralBoxSubsampling]
measuredFreactions: list = [0.05, 0.1, 0.15, 0.2, 0.5, 0.75, 0.9] measuredFreactions: list = [0.01, 0.05, 0.1, 0.15, 0.2, 0.5, 0.75, 0.9]
def __init__(self): def __init__(self):
super(TotalResults, self).__init__() super(TotalResults, self).__init__()
...@@ -107,6 +108,8 @@ class TotalResults(object): ...@@ -107,6 +108,8 @@ class TotalResults(object):
boxCreator: gmeth.BoxSelectionCreator = gmeth.BoxSelectionCreator(dataset) boxCreator: gmeth.BoxSelectionCreator = gmeth.BoxSelectionCreator(dataset)
methods += boxCreator.get_crossBoxSubsamplers_for_fraction(fraction) methods += boxCreator.get_crossBoxSubsamplers_for_fraction(fraction)
methods += boxCreator.get_spiralBoxSubsamplers_for_fraction(fraction) methods += boxCreator.get_spiralBoxSubsamplers_for_fraction(fraction)
if fraction <= 0.1:
methods.append(cmeth.ChemometricSubsampling(particleContainer, fraction))
return methods return methods
......
...@@ -53,7 +53,7 @@ class TestKennardStone(unittest.TestCase): ...@@ -53,7 +53,7 @@ class TestKennardStone(unittest.TestCase):
self.kennardStone: cmeth.KennardStone = cmeth.KennardStone(np.array([]), 0.1) self.kennardStone: cmeth.KennardStone = cmeth.KennardStone(np.array([]), 0.1)
def test_get_sampled_indices(self): def test_get_sampled_indices(self):
numDataSets: int = 1000 numDataSets: int = 4000
self.kennardStone.data = np.random.rand(numDataSets, 2) self.kennardStone.data = np.random.rand(numDataSets, 2)
self.kennardStone.fraction = 0.1 self.kennardStone.fraction = 0.1
selectedIndices = self.kennardStone.get_sampled_indices() selectedIndices = self.kennardStone.get_sampled_indices()
...@@ -63,10 +63,19 @@ class TestKennardStone(unittest.TestCase): ...@@ -63,10 +63,19 @@ class TestKennardStone(unittest.TestCase):
self.kennardStone.fraction = 0.1 self.kennardStone.fraction = 0.1
numDataSets = 2 numDataSets = 2
self.kennardStone.data = np.random.rand(numDataSets, 2) self.kennardStone.data = np.random.rand(numDataSets, 2)
self.assertRaises(ValueError, self.kennardStone.get_sampled_indices) selectedIndices = self.kennardStone.get_sampled_indices()
self.assertEqual(len(selectedIndices), 1)
self.kennardStone.fraction = 0.5
numDataSets = 2
self.kennardStone.data = np.random.rand(numDataSets, 2)
selectedIndices = self.kennardStone.get_sampled_indices()
self.assertEqual(len(selectedIndices), numDataSets * self.kennardStone.fraction)
self.assertEqual(len(np.unique(selectedIndices)), numDataSets * self.kennardStone.fraction)
numDataSets = 20 numDataSets = 20
self.kennardStone.data = np.random.rand(numDataSets, 2) self.kennardStone.data = np.random.rand(numDataSets, 2)
self.kennardStone.fraction = 0.1
selectedIndices = self.kennardStone.get_sampled_indices() selectedIndices = self.kennardStone.get_sampled_indices()
self.assertEqual(len(selectedIndices), 2) self.assertEqual(len(selectedIndices), 2)
self.assertEqual(len(np.unique(selectedIndices)), 2) self.assertEqual(len(np.unique(selectedIndices)), 2)
...@@ -120,13 +129,13 @@ class TestChemometricSubsampling(unittest.TestCase): ...@@ -120,13 +129,13 @@ class TestChemometricSubsampling(unittest.TestCase):
particleContainer.setParticleContours([contours[0] for i in range(numParticles)]) particleContainer.setParticleContours([contours[0] for i in range(numParticles)])
self.chemSubs: cmeth.ChemometricSubsampling = cmeth.ChemometricSubsampling(particleContainer, desiredFraction=0.1) self.chemSubs: cmeth.ChemometricSubsampling = cmeth.ChemometricSubsampling(particleContainer, desiredFraction=0.1)
def test_get_particle_featurematrix(self): # def test_get_particle_featurematrix(self):
features: np.ndarray = self.chemSubs._get_particle_featurematrix() # features: np.ndarray = self.chemSubs._get_particle_featurematrix()
self.assertEqual(features.shape, (7, 5)) # self.assertEqual(features.shape, (7, 5))
for i in range(6): # for i in range(6):
diff: np.ndarray = features[i, :] - np.mean(features[i, :]) # diff: np.ndarray = features[i, :] - np.mean(features[i, :])
self.assertFalse(np.any(diff > 0.1)) # self.assertFalse(np.any(diff > 0.1))
# def test_pca(self): # def test_pca(self):
# fname = r'C:\Users\xbrjos\Desktop\temp MP\190326_MCII_WWTP_SB_50_1\190326_MCII_WWTP_SB_50_1.pkl' # fname = r'C:\Users\xbrjos\Desktop\temp MP\190326_MCII_WWTP_SB_50_1\190326_MCII_WWTP_SB_50_1.pkl'
# fname = r'C:\Users\xbrjos\Desktop\temp MP\190313_Soil_5_A_50_5_1_50_1\190313_Soil_5_A_50_5_1_50_1.pkl' # fname = r'C:\Users\xbrjos\Desktop\temp MP\190313_Soil_5_A_50_5_1_50_1\190313_Soil_5_A_50_5_1_50_1.pkl'
...@@ -137,7 +146,7 @@ class TestChemometricSubsampling(unittest.TestCase): ...@@ -137,7 +146,7 @@ class TestChemometricSubsampling(unittest.TestCase):
# plt.scatter(princComp[:, 0], princComp[:, 1]) # plt.scatter(princComp[:, 0], princComp[:, 1])
# plt.title(dset.name) # plt.title(dset.name)
# plt.show() # plt.show()
#
# class TestBAH(unittest.TestCase): # class TestBAH(unittest.TestCase):
# # def setUp(self) -> None: # # def setUp(self) -> None:
......
...@@ -62,7 +62,9 @@ class TestTotalResults(unittest.TestCase): ...@@ -62,7 +62,9 @@ class TestTotalResults(unittest.TestCase):
possibleRandomMethods = 2 possibleRandomMethods = 2
possibleCrossBoxMethods = 2 possibleCrossBoxMethods = 2
possibleSpiralBoxMethods = 3 possibleSpiralBoxMethods = 3
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + possibleSpiralBoxMethods possibleChemometricMethods = 1
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods
self.assertEqual(len(methods), totalPossible) self.assertEqual(len(methods), totalPossible)
self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction))) self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction))) self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
...@@ -74,7 +76,9 @@ class TestTotalResults(unittest.TestCase): ...@@ -74,7 +76,9 @@ class TestTotalResults(unittest.TestCase):
possibleRandomMethods = 2 possibleRandomMethods = 2
possibleCrossBoxMethods = 1 possibleCrossBoxMethods = 1
possibleSpiralBoxMethods = 0 possibleSpiralBoxMethods = 0
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + possibleSpiralBoxMethods possibleChemometricMethods = 0
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods
self.assertEqual(len(methods), totalPossible) self.assertEqual(len(methods), totalPossible)
self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction))) self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction))) self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
...@@ -86,7 +90,9 @@ class TestTotalResults(unittest.TestCase): ...@@ -86,7 +90,9 @@ class TestTotalResults(unittest.TestCase):
possibleRandomMethods = 2 possibleRandomMethods = 2
possibleCrossBoxMethods = 0 possibleCrossBoxMethods = 0
possibleSpiralBoxMethods = 0 possibleSpiralBoxMethods = 0
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + possibleSpiralBoxMethods possibleChemometricMethods = 0
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods
self.assertEqual(len(methods), totalPossible) self.assertEqual(len(methods), totalPossible)
self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction))) self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction))) self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment