...
 
Commits (2)
...@@ -20,3 +20,7 @@ cythonModules/build/ ...@@ -20,3 +20,7 @@ cythonModules/build/
chemometrics/Assignments.txt chemometrics/Assignments.txt
chemometrics/Data.txt chemometrics/Data.txt
chemometrics/Assignments_all.txt
chemometrics/Data_all.txt
...@@ -5,7 +5,7 @@ from sklearn.decomposition import PCA ...@@ -5,7 +5,7 @@ from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN from sklearn.cluster import DBSCAN
# from scipy import spatial # from scipy import spatial
# from itertools import combinations # from itertools import combinations
from random import sample from random import sample, random
import pickle import pickle
from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier from sklearn.neural_network import MLPClassifier
...@@ -19,7 +19,6 @@ from gepard.helperfunctions import cv2imread_fix ...@@ -19,7 +19,6 @@ from gepard.helperfunctions import cv2imread_fix
from methods import SubsamplingMethod from methods import SubsamplingMethod
from helpers import timingDecorator from helpers import timingDecorator
def get_pca(data: np.ndarray, numComp: int = 2) -> np.ndarray: def get_pca(data: np.ndarray, numComp: int = 2) -> np.ndarray:
try: try:
standardizedData = StandardScaler().fit_transform(data.copy()) standardizedData = StandardScaler().fit_transform(data.copy())
...@@ -224,7 +223,7 @@ def get_curvature_ft(contour: np.ndarray, angularSegment: float = 20, numHarmoni ...@@ -224,7 +223,7 @@ def get_curvature_ft(contour: np.ndarray, angularSegment: float = 20, numHarmoni
tan_before: float = 0 tan_before: float = 0
if x - x_before != 0: if x - x_before != 0:
tan_before = np.rad2deg(np.arctan((y-y_before) / (x-x_before))) tanr_before = np.rad2deg(np.arctan((y-y_before) / (x-x_before)))
tan_after: float = 0 tan_after: float = 0
if x_after - x != 0: if x_after - x != 0:
...@@ -252,12 +251,18 @@ def get_curvature_ft(contour: np.ndarray, angularSegment: float = 20, numHarmoni ...@@ -252,12 +251,18 @@ def get_curvature_ft(contour: np.ndarray, angularSegment: float = 20, numHarmoni
class TrainedSubsampling(SubsamplingMethod): class TrainedSubsampling(SubsamplingMethod):
def __init__(self, particleContainer: ParticleContainer, desiredFraction: float, def __init__(self, particleContainer: ParticleContainer, desiredFraction: float,
path: str = r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.79.pkl'): path: str = r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.79.pkl',
fakeScore: float = 0.8):
super(TrainedSubsampling, self).__init__(particleContainer, desiredFraction) super(TrainedSubsampling, self).__init__(particleContainer, desiredFraction)
self.score: float = None self.score: float = None
self.clf = None self.clf = None
self.clfPath: str = path self.clfPath: str = path
self.fraction = desiredFraction self.fraction = desiredFraction
self.fakeClassifier: bool = True
self.fakeScore: float = fakeScore
self.fractionForExtrapolation: float = 0.0
self.predictedMPIndices: list = []
self._predict_MP_Indices()
def equals(self, otherMethod) -> bool: def equals(self, otherMethod) -> bool:
isEqual: bool = False isEqual: bool = False
...@@ -266,29 +271,63 @@ class TrainedSubsampling(SubsamplingMethod): ...@@ -266,29 +271,63 @@ class TrainedSubsampling(SubsamplingMethod):
isEqual = True isEqual = True
return isEqual return isEqual
@property @property
def label(self) -> str: def label(self) -> str:
return 'Trained Random Sampling' label: str = 'Dummy Trained Random Sampling'
if self.fakeClassifier:
label += f' (score {self.fakeScore})'
else:
label += f' (score {self.score})'
return label
def _predict_MP_Indices(self) -> None:
from evaluation import is_MP_particle
if not self.fakeClassifier:
self._load_classifier()
fullimgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages'
dsetname: str = self.particleContainer.datasetParent.name
imgPath: str = os.path.join(fullimgpath, dsetname + '.tif')
fullimg = cv2imread_fix(imgPath)
features: np.ndarray = get_particle_featurematrix(self.particleContainer, fullimg)
predictions: np.ndarray = self.clf.predict(features)
else:
self.score = self.fakeScore
particles: list = self.particleContainer.particles
predictions: np.ndarray = np.zeros(len(particles))
for index, particle in enumerate(particles):
if is_MP_particle(particle):
if random() <= self.fakeScore:
predictions[index] = 1
else:
if random() > self.fakeScore:
predictions[index] = 1
mpIndices = list(np.where(predictions == 1)[0])
nonMPIndices = list(np.where(predictions == 0)[0])
numNonMPIndices = len(nonMPIndices)
fracNonMPToTake: float = float(np.clip(-0.5 + 1/0.1 * self.fraction, 0.0, 1.0))
numNonMPToTake: int = int(round(fracNonMPToTake * numNonMPIndices))
self.predictedMPIndices = mpIndices + sample(nonMPIndices, numNonMPToTake)
def get_maximum_achievable_fraction(self) -> float: def get_maximum_achievable_fraction(self) -> float:
return 1.0 maxFrac: float = 0.10
return maxFrac
def apply_subsampling_method(self) -> list: def apply_subsampling_method(self) -> list:
self._load_classifier() numParticlesToSelect = round(len(self.particleContainer.particles) * self.fraction)
fullimgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages' if numParticlesToSelect > len(self.predictedMPIndices):
dsetname: str = self.particleContainer.datasetParent.name numParticlesToSelect = len(self.predictedMPIndices)
imgPath: str = os.path.join(fullimgpath, dsetname + '.tif')
fullimg = cv2imread_fix(imgPath) indicesToSelect = sample(self.predictedMPIndices, numParticlesToSelect)
features: np.ndarray = get_particle_featurematrix(self.particleContainer, fullimg)
predictions: np.ndarray = self.clf.predict(features)
indicesToSelect: set = self._get_measure_indices(list(predictions))
selectedParticles: list = [] selectedParticles: list = []
for particle in self.particleContainer.particles: for index in indicesToSelect:
if particle.index in indicesToSelect: selectedParticles.append(self.particleContainer.getParticleOfIndex(index))
selectedParticles.append(particle)
numOrigParticles = len(self.particleContainer.particles)
numEnhancedParticles = len(self.predictedMPIndices)
self.fractionForExtrapolation = self.fraction * (numOrigParticles/numEnhancedParticles)
return selectedParticles return selectedParticles
def _load_classifier(self) -> None: def _load_classifier(self) -> None:
...@@ -306,12 +345,12 @@ class TrainedSubsampling(SubsamplingMethod): ...@@ -306,12 +345,12 @@ class TrainedSubsampling(SubsamplingMethod):
mpIndices: list = list(np.where(assignments == 1)[0]) mpIndices: list = list(np.where(assignments == 1)[0])
nonMpIndices: list = list(np.where(assignments == 0)[0]) nonMpIndices: list = list(np.where(assignments == 0)[0])
numEstimMPParticles: int = len(mpIndices) numPredictedMP: int = len(mpIndices)
numParticlesToMeasure = round(len(predictedAssignments) * self.fraction) numParticlesToMeasure = round(len(predictedAssignments) * self.fraction)
if numParticlesToMeasure <= numEstimMPParticles: if numParticlesToMeasure <= numPredictedMP:
indicesToMeasure = set(sample(mpIndices, numParticlesToMeasure)) indicesToMeasure = set(sample(mpIndices, numParticlesToMeasure))
else: else:
remainingIndices: int = int(numParticlesToMeasure - numEstimMPParticles) remainingIndices: int = int(numParticlesToMeasure - numPredictedMP)
indicesToMeasure = set(mpIndices + sample(nonMpIndices, remainingIndices)) indicesToMeasure = set(mpIndices + sample(nonMpIndices, remainingIndices))
assert len(indicesToMeasure) == numParticlesToMeasure assert len(indicesToMeasure) == numParticlesToMeasure
...@@ -324,11 +363,18 @@ class TrainedSubsampling(SubsamplingMethod): ...@@ -324,11 +363,18 @@ class TrainedSubsampling(SubsamplingMethod):
It is used for extrapolating the mpCount of the subsampled particle list. It is used for extrapolating the mpCount of the subsampled particle list.
:return: :return:
""" """
score: float = self.score # score: float = self.score
diff: float = 1/self.fraction - 1 # i.e., from 50 % score to 100 % score # diff: float = 1/self.fraction - 1 # i.e., from 50 % score to 100 % score
factor: float = 1 + (1 - score)/0.5 * diff # factor: float = 1 + (1 - score)/0.5 * diff
return 1 / factor # pow: float = (1-self.fakeScore)
# return self.fraction # theoreticFactor: float = (1/factor**pow)
# print('actual fraction, theor. factor is', self.fraction, theoreticFactor)
# return theoreticFactor
return self.fractionForExtrapolation
# class ChemometricSubsampling(SubsamplingMethod): # class ChemometricSubsampling(SubsamplingMethod):
...@@ -463,4 +509,28 @@ class TrainedSubsampling(SubsamplingMethod): ...@@ -463,4 +509,28 @@ class TrainedSubsampling(SubsamplingMethod):
# assert abs(totalPointsAdded - numPointsToSelect) <= 1 # assert abs(totalPointsAdded - numPointsToSelect) <= 1
# for clusterIndex in pointsPerCluster.keys(): # for clusterIndex in pointsPerCluster.keys():
# assert 0 <= pointsPerCluster[clusterIndex] <= len(labels[labels == clusterIndex]) # assert 0 <= pointsPerCluster[clusterIndex] <= len(labels[labels == clusterIndex])
# return pointsPerCluster # return pointsPerCluster
\ No newline at end of file
if __name__ == '__main__':
import matplotlib.pyplot as plt
fractions: np.ndarray = np.linspace(0.01, 1, 100)
scores: np.ndarray = np.linspace(0.5, 1.0, 5)
plt.clf()
for score in scores:
# if score == 0.5:
# theorFractions = fractions
# a, b, n = 1, 1, 1.5
# data1 = a * fractions**n / (fractions**n + b)
# data1 -= data1.min()
# data1 /= data1.max()
# theorFactors = 0.5 + 0.5*data1
theorFractions = []
for frac in fractions:
diff: float = 1 / frac - 1 # i.e., from 50 % score to 100 % score
factor: float = 1 + (1 - score) / 0.5 * diff
theorFractions.append(1/factor**0.2)
plt.plot(fractions, theorFractions, label=str(score))
plt.legend()
plt.show()
\ No newline at end of file
import sys import sys
sys.path.append("C://Users//xbrjos//Desktop//Python") sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard.helperfunctions import cv2imread_fix from gepard.helperfunctions import cv2imread_fix
from gepard.dataset import loadData from gepard.dataset import DataSet,loadData
from gepard.analysis.particleContainer import ParticleContainer from gepard.analysis.particleContainer import ParticleContainer
import cv2 import cv2
import numpy as np import numpy as np
...@@ -9,43 +9,73 @@ from scipy import spatial ...@@ -9,43 +9,73 @@ from scipy import spatial
import os import os
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from helpers import get_filterDimensions_from_dataset, get_center_from_filter_dimensions, convert_length_to_pixels from helpers import get_filterDimensions_from_dataset, get_center_from_filter_dimensions, convert_length_to_pixels
from graphs import get_distance_point_histogramdata # from evaluation import is_MP_particle
import evaluation
def get_particle_patchiness(dataset: DataSet, numCells: int = 50, onlyMP=False) -> float:
offset, diameter, [width, height] = get_filterDimensions_from_dataset(dataset)
center: np.ndarray = get_center_from_filter_dimensions(offset, diameter)
width: float = convert_length_to_pixels(dataset, width)
height: float = convert_length_to_pixels(dataset, height)
pixelsPerTile: int = max(int(round(width/numCells)), int(round(height/numCells)))
centerX: int = int(round(convert_length_to_pixels(dataset, center[0] / pixelsPerTile)))
centerY: int = int(round(convert_length_to_pixels(dataset, center[1] / pixelsPerTile)))
radius: int = int(round(convert_length_to_pixels(dataset, diameter / pixelsPerTile * 0.5)))
numRows: int = int(np.ceil(height / pixelsPerTile)) + 1
numCols: int = int(np.ceil(width / pixelsPerTile)) + 1
partCount: int = 0
densityImage: np.ndarray = np.zeros((numRows, numCols))
for particle in dataset.particleContainer.particles:
if (onlyMP and evaluation.is_MP_particle(particle)) or not onlyMP:
particleCenter: tuple = np.mean(particle.contour[:, 0, 0]), np.mean(particle.contour[:, 0, 1])
row: int = int(round(particleCenter[1] / pixelsPerTile))
col: int = int(round(particleCenter[0] / pixelsPerTile))
densityImage[row, col] += 1
partCount += 1
mask: np.ndarray = np.zeros_like(densityImage)
cv2.circle(mask, (centerY, centerX), radius, 1, -1)
relevantData: np.ndarray = densityImage[mask > 0]
mean: np.ndarray = np.round(np.mean(relevantData), 2)
std: np.ndarray = np.round(np.std(relevantData), 2)
ratio: float = round(std/mean, 2)
# plt.imshow(densityImage)
# plt.title(f'MP particle count: {partCount},\ndensity mean: {mean}, density std: {std},\npatchiness = {ratio}')
# plt.axis('off')
# plt.tight_layout()
# plt.show()
# print(f'sample: {dataset.name}, mean: {mean}, std: {std}, ratio = {ratio}')
return ratio
if __name__ == '__main__': if __name__ == '__main__':
# imgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages' # imgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages'
# imgname: str = '181120_MCI_2_ds1+2_all_ kleiner500_10_1.tif' # imgname: str = '181120_MCI_2_ds1+2_all_ kleiner500_10_1.tif'
# imgname: str = '190619_5_PTPH_sld_190321_ds1_50_1_neu.tif' # imgname: str = '190619_5_PTPH_sld_190321_ds1_50_1_neu.tif'
# #191213_P190814_TPHZ_ds1_50_1
# img: np.ndarray = cv2imread_fix(os.path.join(imgpath, imgname)) # img: np.ndarray = cv2imread_fix(os.path.join(imgpath, imgname))
# gray: np.ndarray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # gray: np.ndarray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# _, binimg = cv2.threshold(gray, 20, 1, cv2.THRESH_BINARY_INV) # _, binimg = cv2.threshold(gray, 20, 1, cv2.THRESH_BINARY_INV)
# distmap: np.ndarray = cv2.distanceTransform(binimg, cv2.DIST_L1, 3) # distmap: np.ndarray = cv2.distanceTransform(binimg, cv2.DIST_L1, 3)
# plt.imshow(distmap, cmap='gray') # plt.imshow(distmap, cmap='gray')
paths: list = [r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\181120_MCI_2_ds1+2_all_ kleiner500_10_1.pkl', paths: list = [r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\181120_MCI_2_ds1+2_all_ kleiner500_10_1.pkl',
r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\wastewater, slush\190619_5_PTPH_sld_190321_ds1_50_1_neu.pkl'] r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\wastewater, water\191213_P190814_TPHZ_ds1_50_1.pkl',
# paths.append(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\190306_MCII_1_2_50.pkl') r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\Air\191119_RW6_Solling_50_2_neu.pkl']
# paths.append(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\190222_MCII_1_1_50_1.pkl')
distances: list = [] distances: list = []
allParticles = []
for path in paths: for path in paths:
dset = loadData(path) dset = loadData(path)
particleContainer: ParticleContainer = dset.particleContainer get_particle_patchiness(dset, 50, onlyMP=True)
# particleCenters: list = [] for particle in dset.particleContainer.particles:
# for particle in particleContainer.particles: if evaluation.is_MP_particle(particle):
# particleCenters.append([np.mean(particle.contour[:, 0, 0]), np.mean(particle.contour[:, 0, 1])]) allParticles.append(particle.getParticleAssignment())
#
# closest_particle_distances: np.ndarray = np.zeros(len(particleCenters)) print(set(allParticles))
# particleCenters: np.ndarray = np.array(particleCenters) \ No newline at end of file
# print('particle centers done')
# distMat: np.ndarray = spatial.distance_matrix(particleCenters, particleCenters)
# print('distmat computed')
# for i in range(distMat.shape[0]):
# if i == 0:
# closest_particle_distances[i] = np.min(distMat[i, 1:])
# elif i == distMat.shape[0]-1:
# closest_particle_distances[i] = np.min(distMat[i, :-1])
# else:
# closest_particle_distances[i] = np.min([np.min(distMat[i, :i]), np.min(distMat[i, i+1:])])
# distances.append(closest_particle_distances)
# plt.boxplot(distances)
import numpy as np
cimport numpy as np
cimport cython
from cython cimport boundscheck, wraparound
@boundscheck(False)
@wraparound(False)
cdef bint box_overlaps_contour(unsigned int[:] boxTopLeftXY, unsigned int boxSize, unsigned int[:, :, :] contourData):
cdef bint isOverlapping = False
cdef unsigned int xmin, xmax, width, boxXmin, boxXmax, ymin, ymax, height, boxYmin, boxYmax
xmin = np.min(contourData[:, 0, 1])
xmax = np.max(contourData[:, 0, 1])
width = xmax - xmin
boxXmin = boxTopLeftXY[0]
boxXmax = boxTopLeftXY[0] + boxSize
if xmin > (boxXmin-width/2):
if xmax < (boxXmax+width/2):
ymin = np.min(contourData[:, 0, 1])
ymax = np.max(contourData[:, 0, 1])
height = ymax - ymin
boxYmin = boxTopLeftXY[1]
boxYmax = boxTopLeftXY[1] + boxSize
if ymin > (boxYmin-height/2):
if ymax < (boxYmax+width/2):
isOverlapping = True
return isOverlapping
@boundscheck(False)
@wraparound(False)
def def_get_indices_of_overlapping_particles(contours not None, unsigned int[:, :] topLefts, unsigned int boxSize):
cdef Py_ssize_t i, j
cdef unsigned int counter, numParticles, numTopLefts
numParticles = len(contours)
numTopLefts = topLefts.shape[0]
cdef unsigned int[:] overlappingIndices = np.zeros(numParticles, dtype=np.uint32)
cdef unsigned int[:, :, :] currentContour
counter = 0
for i in range(numParticles):
currentContour = contours[i]
for j in range(numTopLefts):
if box_overlaps_contour(topLefts[j, :], boxSize, currentContour):
overlappingIndices[counter] = i
counter += 1
break
return overlappingIndices[:counter]
...@@ -30,7 +30,7 @@ def get_random_topLefts(int numBoxes, double boxSize, double radius, double maxA ...@@ -30,7 +30,7 @@ def get_random_topLefts(int numBoxes, double boxSize, double radius, double maxA
cdef int outerCounter, counter, x, y, i, j, diffX, diffY, successfullyAdded cdef int outerCounter, counter, x, y, i, j, diffX, diffY, successfullyAdded
cdef bint validSolutionFound, boxOverlaps cdef bint validSolutionFound, boxOverlaps
srand(42) # setting seed srand(seed) # setting seed
assert RAND_MAX == 32767 # this value is used in the random-function above. For performance-reasons, it is directly typed in there as a number assert RAND_MAX == 32767 # this value is used in the random-function above. For performance-reasons, it is directly typed in there as a number
maxDist = radius - np.sqrt((boxSize/2)**2 + (boxSize/2)**2) maxDist = radius - np.sqrt((boxSize/2)**2 + (boxSize/2)**2)
outerCounter = 0 outerCounter = 0
......
...@@ -19,8 +19,14 @@ if len(sys.argv) == 1: ...@@ -19,8 +19,14 @@ if len(sys.argv) == 1:
# ext = Extension("getRandomTopLefts", ["getRandomTopLefts.pyx"], extra_compile_args=['-O3'],) # ext = Extension("getRandomTopLefts", ["getRandomTopLefts.pyx"], extra_compile_args=['-O3'],)
# setup(
# name="get a given number of random topLefts",
# ext_modules=cythonize("randoms.pyx", annotate=True), # accepts a glob pattern
# include_dirs=[np.get_include()]
# )
setup( setup(
name="get a given number of random topLefts", name="checks which particle contours overlap the boxes",
ext_modules=cythonize("randoms.pyx", annotate=True), # accepts a glob pattern ext_modules=cythonize("particleBoxOverlap.pyx", annotate=True), # accepts a glob pattern
include_dirs=[np.get_include()] include_dirs=[np.get_include()]
) )
\ No newline at end of file
...@@ -8,6 +8,7 @@ Created on Wed Jan 22 13:57:28 2020 ...@@ -8,6 +8,7 @@ Created on Wed Jan 22 13:57:28 2020
# import pickle # import pickle
import os import os
import numpy as np import numpy as np
import time
# import matplotlib.pyplot as plt # import matplotlib.pyplot as plt
import concurrent.futures import concurrent.futures
import operator import operator
...@@ -20,6 +21,7 @@ from helpers import ParticleBinSorter ...@@ -20,6 +21,7 @@ from helpers import ParticleBinSorter
import methods as meth import methods as meth
import geometricMethods as gmeth import geometricMethods as gmeth
from chemometrics import chemometricMethods as cmeth from chemometrics import chemometricMethods as cmeth
from chemometrics.imageOperations import get_particle_patchiness
from datasetOperations import ParticleVariations from datasetOperations import ParticleVariations
...@@ -33,8 +35,9 @@ def get_methods_to_test(dataset: dataset.DataSet, fractions: list = [], maxTries ...@@ -33,8 +35,9 @@ def get_methods_to_test(dataset: dataset.DataSet, fractions: list = [], maxTries
:return: list of measurement Objects that are applicable :return: list of measurement Objects that are applicable
""" """
if len(fractions) == 0: if len(fractions) == 0:
fractions: list = [0.02, 0.05, 0.1, 0.25, 0.5, 0.7, 0.9] fractions: list = [0.02, 0.03, 0.05, 0.1, 0.25, 0.5, 0.7, 0.9]
# fractions: list = [0.1, 0.3, 0.5] # fractions: list = [0.02, 0.06, 0.15, 0.2, 0.5]
# fractions: list = [0.01, 0.1, 0.5, 0.9]
methods: list = [] methods: list = []
particleContainer = dataset.particleContainer particleContainer = dataset.particleContainer
...@@ -47,15 +50,19 @@ def get_methods_to_test(dataset: dataset.DataSet, fractions: list = [], maxTries ...@@ -47,15 +50,19 @@ def get_methods_to_test(dataset: dataset.DataSet, fractions: list = [], maxTries
methods += boxCreator.get_spiralBoxSubsamplers_for_fraction(fraction) methods += boxCreator.get_spiralBoxSubsamplers_for_fraction(fraction)
methods += boxCreator.get_randomBoxSubsamplers_for_fraction(fraction, maxTries=maxTries) methods += boxCreator.get_randomBoxSubsamplers_for_fraction(fraction, maxTries=maxTries)
methods += boxCreator.get_randomQuarterBoxSubsamplers_for_fraction(fraction, maxTries=maxTries) methods += boxCreator.get_randomQuarterBoxSubsamplers_for_fraction(fraction, maxTries=maxTries)
# methods.append(cmeth.TrainedSubsampling(particleContainer, fraction)) for fakeScore in [0.6, 0.7, 0.8]:
# methods.append(cmeth.ChemometricSubsampling(particleContainer, fraction)) trainedSampling = cmeth.TrainedSubsampling(particleContainer, fraction, fakeScore=fakeScore)
if trainedSampling.config_is_valid():
methods.append(trainedSampling)
return methods return methods
def update_sample(sample, force: bool, index: int): def update_sample(sample, force: bool, index: int):
sample.load_dataset() sample.load_dataset()
t0 = time.time()
methods: list = get_methods_to_test(sample.dataset) methods: list = get_methods_to_test(sample.dataset)
print('getting methods for sample', sample.dataset.name, 'took', round(time.time()-t0, 2), 'seconds')
sample.update_result_with_methods(methods, force) sample.update_result_with_methods(methods, force)
return sample, index return sample, index
...@@ -131,34 +138,58 @@ class TotalResults(object): ...@@ -131,34 +138,58 @@ class TotalResults(object):
self.sampleResults[index] = updatedResult self.sampleResults[index] = updatedResult
print(f'done updating {updatedResult.dataset.name} at index {index}') print(f'done updating {updatedResult.dataset.name} at index {index}')
def get_error_vs_fraction_data(self, attributes: list = [], methods: list = []) -> dict: def get_error_vs_fraction_data(self, attributes: list = [], methods: list = [], partCount: tuple = (0, np.inf)) -> tuple:
""" """
:param attributes: A list of attributes that should be used for filtering the samples. Only samples with an :param attributes: A list of attributes that should be used for filtering the samples. Only samples with an
attribute from within that list are considered. attribute from within that list are considered.
:param methods: A list of methods to extract
:param patchiness: Tuple, first val: min patchiness, second val, max patchiness
:return: Dict: Key: Method Label, :return: Dict: Key: Method Label,
Value: {Dict: Key:Measured Fraction, Value: Tuple (averaged MPCountError, StDev MPCountError) over all samples} Value: {Dict: Key:Measured Fraction, Value: Tuple (averaged MPCountError, StDev MPCountError) over all samples}
""" """
result: dict = {} result: dict = {}
allParticles: list = []
numSamples: int = 0 numSamples: int = 0
for sample in self.sampleResults: for sample in self.sampleResults:
sample: SampleResult = sample sample: SampleResult = sample
if attributes == [] or sample.has_any_attribute(attributes): if attributes == [] or sample.has_any_attribute(attributes):
numSamples += 1 if partCount[0] == 0 and partCount[1] == np.inf:
for res in sample.results: samplePartCount: int = 1
res: SubsamplingResult = res # samplePatchiness: float = 1.0 # doesn't matter in this case
method: meth.SubsamplingMethod = res.method else:
if methods == [] or method.matches_any_pattern(methods): if sample.dataset is None:
label: str = method.label sample.load_dataset()
frac: float = method.fraction samplePartCount: int = len(sample.dataset.particleContainer.particles)
error: float = res.mpCountError # samplePatchiness: float = sample.get_patchiness()
stdev: float = res.mpCountErrorStDev # print(sample.sampleName, samplePatchiness)
if partCount[0] <= samplePartCount < partCount[1]:
if label not in result.keys(): numSamples += 1
result[label] = {frac: [(error, stdev)]} if sample.dataset is None:
elif frac not in result[label].keys(): sample.load_dataset()
result[label][frac] = [(error, stdev)]
else: for particle in sample.dataset.particleContainer.particles:
result[label][frac].append((error, stdev)) allParticles.append(particle)
for res in sample.results:
res: SubsamplingResult = res
method: meth.SubsamplingMethod = res.method
if methods == [] or method.matches_any_pattern(methods):
label: str = method.label
frac: float = method.fraction
error: float = res.mpCountError
stdev: float = res.mpCountErrorStDev
if label not in result.keys():
result[label] = {frac: [(error, stdev)]}
elif frac not in result[label].keys():
result[label][frac] = [(error, stdev)]
else:
result[label][frac].append((error, stdev))
numMPParticles: float = get_number_of_MP_particles(allParticles)
stats: dict = {'numSamples': numSamples,
'meanParticleCount': round(len(allParticles) / numSamples),
'meanMPFrac': round(numMPParticles / len(allParticles) * 100, 1)}
for method in result.keys(): for method in result.keys():
methodRes: dict = result[method] methodRes: dict = result[method]
...@@ -167,7 +198,7 @@ class TotalResults(object): ...@@ -167,7 +198,7 @@ class TotalResults(object):
meanStd = np.mean([i[1] for i in methodRes[fraction]]) meanStd = np.mean([i[1] for i in methodRes[fraction]])
methodRes[fraction] = (meanError, meanStd) methodRes[fraction] = (meanError, meanStd)
return numSamples, result return stats, result
class SubsamplingResult(object): class SubsamplingResult(object):
...@@ -262,13 +293,14 @@ class SampleResult(object): ...@@ -262,13 +293,14 @@ class SampleResult(object):
""" """
An object the stores all generated results per sample and can update and report on them. An object the stores all generated results per sample and can update and report on them.
""" """
def __init__(self, filepath: str, numVariations: int = 10): def __init__(self, filepath: str, numVariations: int = 20):
super(SampleResult, self).__init__() super(SampleResult, self).__init__()
self.filepath: str = filepath self.filepath: str = filepath
self.dataset: dataset.DataSet = None self.dataset: dataset.DataSet = None
self.results: list = [] self.results: list = []
self.attributes: list = [] self.attributes: list = []
self.numVariations: int = numVariations # how often the sample is altered for each method self.numVariations: int = numVariations # how often the sample is altered for each method
self.patchiness: float = -1
@property @property
def sampleName(self) -> str: def sampleName(self) -> str:
...@@ -278,6 +310,13 @@ class SampleResult(object): ...@@ -278,6 +310,13 @@ class SampleResult(object):
self.dataset = dataset.loadData(self.filepath) self.dataset = dataset.loadData(self.filepath)
assert self.dataset is not None assert self.dataset is not None
def get_patchiness(self) -> float:
if not hasattr(self, "patchiness") or self.patchiness == -1:
if self.dataset is None:
self.load_dataset()
self.patchiness = get_particle_patchiness(self.dataset)
return self.patchiness
def update_result_with_methods(self, methods: list, force: bool = False) -> list: def update_result_with_methods(self, methods: list, force: bool = False) -> list:
""" """
Updates result with the given method (contains desiredFraction already) Updates result with the given method (contains desiredFraction already)
...@@ -289,8 +328,8 @@ class SampleResult(object): ...@@ -289,8 +328,8 @@ class SampleResult(object):
self.load_dataset() self.load_dataset()
updatedMethods: list = [] updatedMethods: list = []
t0 = time.time()
particleVariations: ParticleVariations = ParticleVariations(self.dataset, numVariations=self.numVariations) particleVariations: ParticleVariations = ParticleVariations(self.dataset, numVariations=self.numVariations)
needsToBeUpdated: dict = {method: False for method in methods} needsToBeUpdated: dict = {method: False for method in methods}
for index, particleContainer in enumerate(particleVariations.get_particleContainer_variations()): for index, particleContainer in enumerate(particleVariations.get_particleContainer_variations()):
...@@ -312,11 +351,10 @@ class SampleResult(object): ...@@ -312,11 +351,10 @@ class SampleResult(object):
if needsToBeUpdated[method]: if needsToBeUpdated[method]:
subParticles = method.apply_subsampling_method() subParticles = method.apply_subsampling_method()
result.add_result(method.particleContainer.particles, subParticles) result.add_result(method.particleContainer.particles, subParticles)
if method not in updatedMethods: updatedMethods.append(method)
updatedMethods.append(method)
# print(f'updated {self.sampleName} with {method.label} at fraction {method.fraction}, ' # print(f'updated {self.sampleName} with {method.label} at fraction {method.fraction}, '
# f'iteration {index+1}') # f'iteration {index+1}, took {round(time.time()-t0, 2)}, seconds')
print(f'finished updating sample {self.sampleName}, it took {round(time.time()-t0, 2)} seconds')
return updatedMethods return updatedMethods
def set_attribute(self, newAttribute: str) -> None: def set_attribute(self, newAttribute: str) -> None:
......
import time
import numpy as np import numpy as np
from itertools import combinations from itertools import combinations
from methods import SubsamplingMethod from methods import SubsamplingMethod
...@@ -6,7 +7,7 @@ import sys ...@@ -6,7 +7,7 @@ import sys
sys.path.append("C://Users//xbrjos//Desktop//Python") sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset from gepard import dataset
import helpers import helpers
from cythonModules import randoms from cythonModules import randoms, particleBoxOverlap
def box_overlaps_other_box(topLeft1: list, topLeft2: list, boxSize: float) -> bool: def box_overlaps_other_box(topLeft1: list, topLeft2: list, boxSize: float) -> bool:
...@@ -53,16 +54,27 @@ class BoxSelectionSubsamplingMethod(SubsamplingMethod): ...@@ -53,16 +54,27 @@ class BoxSelectionSubsamplingMethod(SubsamplingMethod):
return abs(topleft[0] - cntStart[0]) + abs(topleft[1] - cntStart[1]) return abs(topleft[0] - cntStart[0]) + abs(topleft[1] - cntStart[1])
subParticles: list = [] subParticles: list = []
topLefts: list = self.get_topLeft_of_boxes() topLefts: np.ndarray = np.array(self.get_topLeft_of_boxes())
boxWidthHeight: tuple = (self.boxSize, self.boxSize) cython: bool = False
if cython:
topLefts = np.round(topLefts).astype(np.uint32)
# contours: np.ndarray = np.array(self.particleContainer.getParticleContours())
contours = [cnt.astype(np.uint32) for cnt in self.particleContainer.getParticleContours()]
boxSize: np.uint32 = np.uint32(round(self.boxSize))
indices = particleBoxOverlap.def_get_indices_of_overlapping_particles(contours, topLefts, boxSize)
for index in indices:
subParticles.append(self.particleContainer.getParticleOfIndex(index))
for particle in self.particleContainer.particles: else:
cntStart: tuple = (particle.contour[0, 0, 0], particle.contour[0, 0, 1]) boxWidthHeight: tuple = (self.boxSize, self.boxSize)
sortedTopLefts = sorted(topLefts, key=distanceToCnt) for particle in self.particleContainer.particles:
cntStart: tuple = (particle.contour[0, 0, 0], particle.contour[0, 0, 1])
sortedTopLefts = sorted(topLefts, key=distanceToCnt)
for topLeftXY in sortedTopLefts: for topLeftXY in sortedTopLefts:
if helpers.box_overlaps_contour(topLeftXY, boxWidthHeight, particle.contour): if helpers.box_overlaps_contour(topLeftXY, boxWidthHeight, particle.contour):
subParticles.append(particle) subParticles.append(particle)
break
return subParticles return subParticles
...@@ -263,6 +275,7 @@ class CrossBoxSubSampling(BoxSelectionSubsamplingMethod): ...@@ -263,6 +275,7 @@ class CrossBoxSubSampling(BoxSelectionSubsamplingMethod):
numBoxes: int = 2 * self.numBoxesAcross - 1 numBoxes: int = 2 * self.numBoxesAcross - 1
totalBoxArea: float = numBoxes * (maxBoxSize ** 2) totalBoxArea: float = numBoxes * (maxBoxSize ** 2)
maxFraction: float = totalBoxArea / self.filterArea maxFraction: float = totalBoxArea / self.filterArea
return maxFraction return maxFraction
def equals(self, otherMethod) -> bool: def equals(self, otherMethod) -> bool:
...@@ -386,7 +399,7 @@ class RandomBoxSampling(BoxSelectionSubsamplingMethod): ...@@ -386,7 +399,7 @@ class RandomBoxSampling(BoxSelectionSubsamplingMethod):
@property @property
def label(self) -> str: def label(self) -> str:
return f'Boxes random layout ({self.numBoxes} boxes)' return f'Boxes Random layout ({self.numBoxes} boxes)'
def equals(self, otherMethod) -> bool: def equals(self, otherMethod) -> bool:
equals: bool = False equals: bool = False
...@@ -396,7 +409,6 @@ class RandomBoxSampling(BoxSelectionSubsamplingMethod): ...@@ -396,7 +409,6 @@ class RandomBoxSampling(BoxSelectionSubsamplingMethod):
return equals return equals
def get_topLeft_of_boxes(self) -> list: def get_topLeft_of_boxes(self) -> list:
valid, topLefts = randoms.get_random_topLefts(self.numBoxes, self.boxSize, valid, topLefts = randoms.get_random_topLefts(self.numBoxes, self.boxSize,
self.filterDiameter/2, self.__maxAngle, self.filterDiameter/2, self.__maxAngle,
seed=self.randomSeed, maxTries=self.maxTries) seed=self.randomSeed, maxTries=self.maxTries)
...@@ -413,7 +425,7 @@ class RandomQuarterBoxes(RandomBoxSampling): ...@@ -413,7 +425,7 @@ class RandomQuarterBoxes(RandomBoxSampling):
@property @property
def label(self) -> str: def label(self) -> str:
return f'Boxes random layout (quarter) ({self.numBoxes} boxes)' return f'Boxes Random layout (quarter) ({self.numBoxes} boxes)'
def determine_max_achievable_frac(method: BoxSelectionSubsamplingMethod, numBoxes: int) -> float: def determine_max_achievable_frac(method: BoxSelectionSubsamplingMethod, numBoxes: int) -> float:
......
This diff is collapsed.
...@@ -172,3 +172,14 @@ def convert_length_to_pixels(dataset: dataset.DataSet, length: float) -> float: ...@@ -172,3 +172,14 @@ def convert_length_to_pixels(dataset: dataset.DataSet, length: float) -> float:
pixelScale: float = (dataset.pixelscale_df if imgMode == 'df' else dataset.pixelscale_bf) pixelScale: float = (dataset.pixelscale_df if imgMode == 'df' else dataset.pixelscale_bf)
length /= pixelScale length /= pixelScale
return length return length
if __name__ == '__main__':
counts = [2600, 14662, 9472, 16533]
mpfracs = [4.7, 0.9, 0.5, 0.5]
errorMargins = [0.2]
sigma = 0.7
for count, mpFrac in zip(counts, mpfracs):
for margin in errorMargins:
angerFrac = get_Anger_fraction(count, sigma=sigma, mpFraction=mpFrac/100, errorMargin=margin)
print(count, mpFrac, margin, angerFrac, angerFrac / count)
\ No newline at end of file
...@@ -4,11 +4,11 @@ import time ...@@ -4,11 +4,11 @@ import time
from evaluation import TotalResults, SampleResult from evaluation import TotalResults, SampleResult
from input_output import get_pkls_from_directory, get_attributes_from_foldername, save_results, load_results from input_output import get_pkls_from_directory, get_attributes_from_foldername, save_results, load_results
from graphs import get_error_vs_frac_plot, get_distance_hist_plots from graphs import get_error_vs_frac_plot, get_distance_hist_plots, get_error_vs_mpfrac_plot
""" """
IMPORTANT!!! IMPORTANT!!!
SET GEPARD TO EVALUATION BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF THE LEGACY CONVERTS MIGHT FAIL.. SET GEPARD TO DEVELOPMENT BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF THE LEGACY CONVERTS MIGHT FAIL..
""" """
if __name__ == '__main__': if __name__ == '__main__':
...@@ -17,7 +17,7 @@ if __name__ == '__main__': ...@@ -17,7 +17,7 @@ if __name__ == '__main__':
# counter = 0 # counter = 0
# for folder in pklsInFolders.keys(): # for folder in pklsInFolders.keys():
# for samplePath in pklsInFolders[folder]: # for samplePath in pklsInFolders[folder]:
# if counter < 100: # if counter < 50:
# newSampleResult: SampleResult = results.add_sample(samplePath) # newSampleResult: SampleResult = results.add_sample(samplePath)
# for attr in get_attributes_from_foldername(folder): # for attr in get_attributes_from_foldername(folder):
# newSampleResult.set_attribute(attr) # newSampleResult.set_attribute(attr)
...@@ -27,18 +27,22 @@ if __name__ == '__main__': ...@@ -27,18 +27,22 @@ if __name__ == '__main__':
# results.update_all(multiprocessing=True) # results.update_all(multiprocessing=True)
# print('updating all took', time.time()-t0, 'seconds') # print('updating all took', time.time()-t0, 'seconds')
# #
# save_results('results2.res', results) # save_results('results5.res', results)
results: TotalResults = load_results('results2.res')
#
plot: Figure = get_error_vs_frac_plot(results,
attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']],
# # attributes=[['$ mpFraction < 0.02'], ['$ mpFraction >= 0.02']],
# # methods=[['random layout (7', 'random layout (1']]*2)
methods=[['random sub', 'cross', 'layout (10']]*4)
# # methods=[['Random Subsampling', 'Sizebin']] * 2)
# # methods=[['layout (5', 'layout (10', 'layout (15', 'cross', 'random subsampling', 'sizebin']] * 2)
# plot.show()
results: TotalResults = load_results('results4.res')
plot: Figure = get_error_vs_frac_plot(results,
# attributes=[],
# attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']],
partCounts=[(0, 2000), (2000, 5000), (5000, 10000), (10000, 40000)],
# methods=[['random subs', 'dummy']]*2,
# methods=[['random subsampling', 'random']]*2,
methods=[['layout (10', 'crossLayout (3', 'random subsampling', 'quarter) (10']] * 4,
poissonRef=False, fill=True)
plot.show()
#
# plot2: Figure = get_distance_hist_plots(results, # plot2: Figure = get_distance_hist_plots(results,
# attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']]) # attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']])
# plot2.show() # plot2.show()
# plot3: Figure = get_error_vs_mpfrac_plot(results, attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']])
# plot3.show()
\ No newline at end of file
...@@ -18,6 +18,7 @@ def get_default_DataSet() -> DataSet: ...@@ -18,6 +18,7 @@ def get_default_DataSet() -> DataSet:
dset.imagedim_df = [10, 10] dset.imagedim_df = [10, 10]
dset.pixelscale_df = 1.0 dset.pixelscale_df = 1.0
setMaxDim(dset, 10, 0, 10, 0, 10) setMaxDim(dset, 10, 0, 10, 0, 10)
dset.particleContainer = get_default_ParticleContainer()
return dset return dset
......
...@@ -127,8 +127,8 @@ class TestTrainedSubsampling(unittest.TestCase): ...@@ -127,8 +127,8 @@ class TestTrainedSubsampling(unittest.TestCase):
self.assertEqual(type(self.trainedSampling.label), str) self.assertEqual(type(self.trainedSampling.label), str)
def test_load_classifier(self): def test_load_classifier(self):
self.assertTrue(self.trainedSampling.clf is None) # self.assertTrue(self.trainedSampling.clf is None)
self.assertTrue(self.trainedSampling.score is None) # self.assertTrue(self.trainedSampling.score is None) # TODO: REIMPLEMENT
self.trainedSampling.clfPath = r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.72.pkl' self.trainedSampling.clfPath = r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.72.pkl'
self.trainedSampling._load_classifier() self.trainedSampling._load_classifier()
...@@ -160,16 +160,16 @@ class TestTrainedSubsampling(unittest.TestCase): ...@@ -160,16 +160,16 @@ class TestTrainedSubsampling(unittest.TestCase):
for index in range(numMPParticles): # all MP Particles should be measured for index in range(numMPParticles): # all MP Particles should be measured
self.assertTrue(index in indicesToMeasure) self.assertTrue(index in indicesToMeasure)
def test_get_theoretic_fraction(self): # def test_get_theoretic_fraction(self):
for frac in [0.1, 0.3, 0.5, 0.9, 1.0]: # for frac in [0.1, 0.3, 0.5, 0.9, 1.0]:
for score in [0.5, 0.7, 1.0]: # for score in [0.5, 0.7, 1.0]:
self.trainedSampling.fraction = frac # self.trainedSampling.fraction = frac
self.trainedSampling.score = score # self.trainedSampling.score = score
#
score: float = self.trainedSampling.score # score: float = self.trainedSampling.score
diff: float = 1 / self.trainedSampling.fraction - 1 # i.e., from 50 % score to 100 % score # diff: float = 1 / self.trainedSampling.fraction - 1 # i.e., from 50 % score to 100 % score
factor: float = 1 + (1 - score) / 0.5 * diff # factor: float = 1 + (1 - score) / 0.5 * diff
self.assertEqual(self.trainedSampling.get_theoretic_frac(), 1/factor) # self.assertEqual(self.trainedSampling.get_theoretic_frac(), 1/factor)
# def test_make_subparticles_match_fraction(self): # def test_make_subparticles_match_fraction(self):
......
...@@ -43,8 +43,10 @@ class TestTotalResults(unittest.TestCase): ...@@ -43,8 +43,10 @@ class TestTotalResults(unittest.TestCase):
def test_get_error_vs_fraction_data(self): def test_get_error_vs_fraction_data(self):
firstSample: SampleResult = self.totalResults.add_sample('sample1.pkl') firstSample: SampleResult = self.totalResults.add_sample('sample1.pkl')
firstSample.set_attribute('to be used') firstSample.set_attribute('to be used')
firstSample.dataset = get_default_DataSet()
secondSample: SampleResult = self.totalResults.add_sample('sample2.pkl') secondSample: SampleResult = self.totalResults.add_sample('sample2.pkl')
secondSample.set_attribute('not to be used') secondSample.set_attribute('not to be used')
secondSample.dataset = get_default_DataSet()
firstMethod: meth.RandomSampling = meth.RandomSampling(None, 0.1) firstMethod: meth.RandomSampling = meth.RandomSampling(None, 0.1)
firstResult: SubsamplingResult = SubsamplingResult(firstMethod) firstResult: SubsamplingResult = SubsamplingResult(firstMethod)
...@@ -76,8 +78,8 @@ class TestTotalResults(unittest.TestCase): ...@@ -76,8 +78,8 @@ class TestTotalResults(unittest.TestCase):
firstSample.results = [firstResult, secondResult, thirdResult, thirdResult3] firstSample.results = [firstResult, secondResult, thirdResult, thirdResult3]
secondSample.results = [firstResult, secondResult, thirdResult2, thirdResult3] secondSample.results = [firstResult, secondResult, thirdResult2, thirdResult3]
numSamples, resultDict = self.totalResults.get_error_vs_fraction_data() stats, resultDict = self.totalResults.get_error_vs_fraction_data()
self.assertEqual(numSamples, 2) self.assertEqual(stats['numSamples'], 2)
self.assertEqual(list(resultDict.keys()), [firstMethod.label, secondMethod.label, thirdMethod.label]) self.assertEqual(list(resultDict.keys()), [firstMethod.label, secondMethod.label, thirdMethod.label])
for i, key in enumerate(resultDict.keys()): for i, key in enumerate(resultDict.keys()):
...@@ -101,8 +103,8 @@ class TestTotalResults(unittest.TestCase): ...@@ -101,8 +103,8 @@ class TestTotalResults(unittest.TestCase):
self.assertAlmostEqual(mean, 50) self.assertAlmostEqual(mean, 50)
self.assertAlmostEqual(stdev, 16.32993161855452) self.assertAlmostEqual(stdev, 16.32993161855452)
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(attributes=['to be used']) stats, filteredResultDict = self.totalResults.get_error_vs_fraction_data(attributes=['to be used'])
self.assertEqual(numSamples, 1) self.assertEqual(stats['numSamples'], 1)
self.assertEqual(list(filteredResultDict.keys()), [firstMethod.label, secondMethod.label, thirdMethod.label]) self.assertEqual(list(filteredResultDict.keys()), [firstMethod.label, secondMethod.label, thirdMethod.label])
for i, key in enumerate(filteredResultDict.keys()): for i, key in enumerate(filteredResultDict.keys()):
...@@ -129,16 +131,16 @@ class TestTotalResults(unittest.TestCase): ...@@ -129,16 +131,16 @@ class TestTotalResults(unittest.TestCase):
self.assertAlmostEqual(mean, 50) self.assertAlmostEqual(mean, 50)
self.assertAlmostEqual(stdev, 16.32993161855452) self.assertAlmostEqual(stdev, 16.32993161855452)
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['cross']) stats, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['cross'])
self.assertEqual(numSamples, 2) self.assertEqual(stats['numSamples'], 2)
self.assertEqual(list(filteredResultDict.keys()), [secondMethod.label, thirdMethod.label]) self.assertEqual(list(filteredResultDict.keys()), [secondMethod.label, thirdMethod.label])
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['Cross']) stats, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['Cross'])
self.assertEqual(numSamples, 2) self.assertEqual(stats['numSamples'], 2)
self.assertEqual(list(filteredResultDict.keys()), [secondMethod.label, thirdMethod.label]) self.assertEqual(list(filteredResultDict.keys()), [secondMethod.label, thirdMethod.label])
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['random']) stats, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['random'])
self.assertEqual(numSamples, 2) self.assertEqual(stats['numSamples'], 2)
self.assertEqual(list(filteredResultDict.keys()), [firstMethod.label]) self.assertEqual(list(filteredResultDict.keys()), [firstMethod.label])
...@@ -279,7 +281,7 @@ class TestSampleResult(unittest.TestCase): ...@@ -279,7 +281,7 @@ class TestSampleResult(unittest.TestCase):
possibleSpiralBoxMethods = 3 possibleSpiralBoxMethods = 3
possibleRandomBoxMethods = 3 possibleRandomBoxMethods = 3
possibleQuarterRandomBoxMethods = 3 possibleQuarterRandomBoxMethods = 3
possibleChemometricMethods = 0 possibleChemometricMethods = 3 # i.e., the dummy classifier methods
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \ totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods + \ possibleSpiralBoxMethods + possibleChemometricMethods + \
possibleRandomBoxMethods + possibleQuarterRandomBoxMethods possibleRandomBoxMethods + possibleQuarterRandomBoxMethods
...@@ -330,7 +332,7 @@ class TestSampleResult(unittest.TestCase): ...@@ -330,7 +332,7 @@ class TestSampleResult(unittest.TestCase):
possibleRandomMethods = 4 possibleRandomMethods = 4
possibleCrossBoxMethods = 3 possibleCrossBoxMethods = 3
possibleSpiralBoxMethods = 3 possibleSpiralBoxMethods = 3
possibleChemometricMethods = 0 possibleChemometricMethods = 3
possibleRandomBoxMethods = 3 possibleRandomBoxMethods = 3
possibleQuarterRandomBoxMethods = 3 possibleQuarterRandomBoxMethods = 3
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \ totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
...@@ -369,7 +371,7 @@ class TestSampleResult(unittest.TestCase): ...@@ -369,7 +371,7 @@ class TestSampleResult(unittest.TestCase):
self.assertEqual(len(result.mpCountErrors), 0) # because the added results haven't set any self.assertEqual(len(result.mpCountErrors), 0) # because the added results haven't set any
updatedMethods = self.sampleResult.update_result_with_methods(methods, force=True) updatedMethods = self.sampleResult.update_result_with_methods(methods, force=True)
self.assertEqual(len(updatedMethods), 2) # because now we force the update self.assertEqual(len(updatedMethods), 2*numVariations) # because now we force the update
self.assertTrue(method1 in updatedMethods) self.assertTrue(method1 in updatedMethods)
self.assertTrue(method2 in updatedMethods) self.assertTrue(method2 in updatedMethods)
......