...
 
Commits (2)
......@@ -20,3 +20,7 @@ cythonModules/build/
chemometrics/Assignments.txt
chemometrics/Data.txt
chemometrics/Assignments_all.txt
chemometrics/Data_all.txt
......@@ -5,7 +5,7 @@ from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN
# from scipy import spatial
# from itertools import combinations
from random import sample
from random import sample, random
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
......@@ -19,7 +19,6 @@ from gepard.helperfunctions import cv2imread_fix
from methods import SubsamplingMethod
from helpers import timingDecorator
def get_pca(data: np.ndarray, numComp: int = 2) -> np.ndarray:
try:
standardizedData = StandardScaler().fit_transform(data.copy())
......@@ -224,7 +223,7 @@ def get_curvature_ft(contour: np.ndarray, angularSegment: float = 20, numHarmoni
tan_before: float = 0
if x - x_before != 0:
tan_before = np.rad2deg(np.arctan((y-y_before) / (x-x_before)))
tanr_before = np.rad2deg(np.arctan((y-y_before) / (x-x_before)))
tan_after: float = 0
if x_after - x != 0:
......@@ -252,12 +251,18 @@ def get_curvature_ft(contour: np.ndarray, angularSegment: float = 20, numHarmoni
class TrainedSubsampling(SubsamplingMethod):
def __init__(self, particleContainer: ParticleContainer, desiredFraction: float,
path: str = r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.79.pkl'):
path: str = r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.79.pkl',
fakeScore: float = 0.8):
super(TrainedSubsampling, self).__init__(particleContainer, desiredFraction)
self.score: float = None
self.clf = None
self.clfPath: str = path
self.fraction = desiredFraction
self.fakeClassifier: bool = True
self.fakeScore: float = fakeScore
self.fractionForExtrapolation: float = 0.0
self.predictedMPIndices: list = []
self._predict_MP_Indices()
def equals(self, otherMethod) -> bool:
isEqual: bool = False
......@@ -266,29 +271,63 @@ class TrainedSubsampling(SubsamplingMethod):
isEqual = True
return isEqual
@property
def label(self) -> str:
return 'Trained Random Sampling'
label: str = 'Dummy Trained Random Sampling'
if self.fakeClassifier:
label += f' (score {self.fakeScore})'
else:
label += f' (score {self.score})'
return label
def _predict_MP_Indices(self) -> None:
from evaluation import is_MP_particle
if not self.fakeClassifier:
self._load_classifier()
fullimgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages'
dsetname: str = self.particleContainer.datasetParent.name
imgPath: str = os.path.join(fullimgpath, dsetname + '.tif')
fullimg = cv2imread_fix(imgPath)
features: np.ndarray = get_particle_featurematrix(self.particleContainer, fullimg)
predictions: np.ndarray = self.clf.predict(features)
else:
self.score = self.fakeScore
particles: list = self.particleContainer.particles
predictions: np.ndarray = np.zeros(len(particles))
for index, particle in enumerate(particles):
if is_MP_particle(particle):
if random() <= self.fakeScore:
predictions[index] = 1
else:
if random() > self.fakeScore:
predictions[index] = 1
mpIndices = list(np.where(predictions == 1)[0])
nonMPIndices = list(np.where(predictions == 0)[0])
numNonMPIndices = len(nonMPIndices)
fracNonMPToTake: float = float(np.clip(-0.5 + 1/0.1 * self.fraction, 0.0, 1.0))
numNonMPToTake: int = int(round(fracNonMPToTake * numNonMPIndices))
self.predictedMPIndices = mpIndices + sample(nonMPIndices, numNonMPToTake)
def get_maximum_achievable_fraction(self) -> float:
return 1.0
maxFrac: float = 0.10
return maxFrac
def apply_subsampling_method(self) -> list:
self._load_classifier()
fullimgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages'
dsetname: str = self.particleContainer.datasetParent.name
imgPath: str = os.path.join(fullimgpath, dsetname + '.tif')
fullimg = cv2imread_fix(imgPath)
features: np.ndarray = get_particle_featurematrix(self.particleContainer, fullimg)
predictions: np.ndarray = self.clf.predict(features)
indicesToSelect: set = self._get_measure_indices(list(predictions))
numParticlesToSelect = round(len(self.particleContainer.particles) * self.fraction)
if numParticlesToSelect > len(self.predictedMPIndices):
numParticlesToSelect = len(self.predictedMPIndices)
indicesToSelect = sample(self.predictedMPIndices, numParticlesToSelect)
selectedParticles: list = []
for particle in self.particleContainer.particles:
if particle.index in indicesToSelect:
selectedParticles.append(particle)
for index in indicesToSelect:
selectedParticles.append(self.particleContainer.getParticleOfIndex(index))
numOrigParticles = len(self.particleContainer.particles)
numEnhancedParticles = len(self.predictedMPIndices)
self.fractionForExtrapolation = self.fraction * (numOrigParticles/numEnhancedParticles)
return selectedParticles
def _load_classifier(self) -> None:
......@@ -306,12 +345,12 @@ class TrainedSubsampling(SubsamplingMethod):
mpIndices: list = list(np.where(assignments == 1)[0])
nonMpIndices: list = list(np.where(assignments == 0)[0])
numEstimMPParticles: int = len(mpIndices)
numPredictedMP: int = len(mpIndices)
numParticlesToMeasure = round(len(predictedAssignments) * self.fraction)
if numParticlesToMeasure <= numEstimMPParticles:
if numParticlesToMeasure <= numPredictedMP:
indicesToMeasure = set(sample(mpIndices, numParticlesToMeasure))
else:
remainingIndices: int = int(numParticlesToMeasure - numEstimMPParticles)
remainingIndices: int = int(numParticlesToMeasure - numPredictedMP)
indicesToMeasure = set(mpIndices + sample(nonMpIndices, remainingIndices))
assert len(indicesToMeasure) == numParticlesToMeasure
......@@ -324,11 +363,18 @@ class TrainedSubsampling(SubsamplingMethod):
It is used for extrapolating the mpCount of the subsampled particle list.
:return:
"""
score: float = self.score
diff: float = 1/self.fraction - 1 # i.e., from 50 % score to 100 % score
factor: float = 1 + (1 - score)/0.5 * diff
return 1 / factor
# return self.fraction
# score: float = self.score
# diff: float = 1/self.fraction - 1 # i.e., from 50 % score to 100 % score
# factor: float = 1 + (1 - score)/0.5 * diff
# pow: float = (1-self.fakeScore)
# theoreticFactor: float = (1/factor**pow)
# print('actual fraction, theor. factor is', self.fraction, theoreticFactor)
# return theoreticFactor
return self.fractionForExtrapolation
# class ChemometricSubsampling(SubsamplingMethod):
......@@ -463,4 +509,28 @@ class TrainedSubsampling(SubsamplingMethod):
# assert abs(totalPointsAdded - numPointsToSelect) <= 1
# for clusterIndex in pointsPerCluster.keys():
# assert 0 <= pointsPerCluster[clusterIndex] <= len(labels[labels == clusterIndex])
# return pointsPerCluster
\ No newline at end of file
# return pointsPerCluster
if __name__ == '__main__':
import matplotlib.pyplot as plt
fractions: np.ndarray = np.linspace(0.01, 1, 100)
scores: np.ndarray = np.linspace(0.5, 1.0, 5)
plt.clf()
for score in scores:
# if score == 0.5:
# theorFractions = fractions
# a, b, n = 1, 1, 1.5
# data1 = a * fractions**n / (fractions**n + b)
# data1 -= data1.min()
# data1 /= data1.max()
# theorFactors = 0.5 + 0.5*data1
theorFractions = []
for frac in fractions:
diff: float = 1 / frac - 1 # i.e., from 50 % score to 100 % score
factor: float = 1 + (1 - score) / 0.5 * diff
theorFractions.append(1/factor**0.2)
plt.plot(fractions, theorFractions, label=str(score))
plt.legend()
plt.show()
\ No newline at end of file
import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard.helperfunctions import cv2imread_fix
from gepard.dataset import loadData
from gepard.dataset import DataSet,loadData
from gepard.analysis.particleContainer import ParticleContainer
import cv2
import numpy as np
......@@ -9,43 +9,73 @@ from scipy import spatial
import os
import matplotlib.pyplot as plt
from helpers import get_filterDimensions_from_dataset, get_center_from_filter_dimensions, convert_length_to_pixels
from graphs import get_distance_point_histogramdata
# from evaluation import is_MP_particle
import evaluation
def get_particle_patchiness(dataset: DataSet, numCells: int = 50, onlyMP=False) -> float:
offset, diameter, [width, height] = get_filterDimensions_from_dataset(dataset)
center: np.ndarray = get_center_from_filter_dimensions(offset, diameter)
width: float = convert_length_to_pixels(dataset, width)
height: float = convert_length_to_pixels(dataset, height)
pixelsPerTile: int = max(int(round(width/numCells)), int(round(height/numCells)))
centerX: int = int(round(convert_length_to_pixels(dataset, center[0] / pixelsPerTile)))
centerY: int = int(round(convert_length_to_pixels(dataset, center[1] / pixelsPerTile)))
radius: int = int(round(convert_length_to_pixels(dataset, diameter / pixelsPerTile * 0.5)))
numRows: int = int(np.ceil(height / pixelsPerTile)) + 1
numCols: int = int(np.ceil(width / pixelsPerTile)) + 1
partCount: int = 0
densityImage: np.ndarray = np.zeros((numRows, numCols))
for particle in dataset.particleContainer.particles:
if (onlyMP and evaluation.is_MP_particle(particle)) or not onlyMP:
particleCenter: tuple = np.mean(particle.contour[:, 0, 0]), np.mean(particle.contour[:, 0, 1])
row: int = int(round(particleCenter[1] / pixelsPerTile))
col: int = int(round(particleCenter[0] / pixelsPerTile))
densityImage[row, col] += 1
partCount += 1
mask: np.ndarray = np.zeros_like(densityImage)
cv2.circle(mask, (centerY, centerX), radius, 1, -1)
relevantData: np.ndarray = densityImage[mask > 0]
mean: np.ndarray = np.round(np.mean(relevantData), 2)
std: np.ndarray = np.round(np.std(relevantData), 2)
ratio: float = round(std/mean, 2)
# plt.imshow(densityImage)
# plt.title(f'MP particle count: {partCount},\ndensity mean: {mean}, density std: {std},\npatchiness = {ratio}')
# plt.axis('off')
# plt.tight_layout()
# plt.show()
# print(f'sample: {dataset.name}, mean: {mean}, std: {std}, ratio = {ratio}')
return ratio
if __name__ == '__main__':
# imgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages'
# imgname: str = '181120_MCI_2_ds1+2_all_ kleiner500_10_1.tif'
# imgname: str = '190619_5_PTPH_sld_190321_ds1_50_1_neu.tif'
#
#191213_P190814_TPHZ_ds1_50_1
# img: np.ndarray = cv2imread_fix(os.path.join(imgpath, imgname))
# gray: np.ndarray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# _, binimg = cv2.threshold(gray, 20, 1, cv2.THRESH_BINARY_INV)
# distmap: np.ndarray = cv2.distanceTransform(binimg, cv2.DIST_L1, 3)
# plt.imshow(distmap, cmap='gray')
paths: list = [r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\181120_MCI_2_ds1+2_all_ kleiner500_10_1.pkl',
r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\wastewater, slush\190619_5_PTPH_sld_190321_ds1_50_1_neu.pkl']
# paths.append(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\190306_MCII_1_2_50.pkl')
# paths.append(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\190222_MCII_1_1_50_1.pkl')
r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\wastewater, water\191213_P190814_TPHZ_ds1_50_1.pkl',
r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\Air\191119_RW6_Solling_50_2_neu.pkl']
distances: list = []
allParticles = []
for path in paths:
dset = loadData(path)
particleContainer: ParticleContainer = dset.particleContainer
# particleCenters: list = []
# for particle in particleContainer.particles:
# particleCenters.append([np.mean(particle.contour[:, 0, 0]), np.mean(particle.contour[:, 0, 1])])
#
# closest_particle_distances: np.ndarray = np.zeros(len(particleCenters))
# particleCenters: np.ndarray = np.array(particleCenters)
# print('particle centers done')
# distMat: np.ndarray = spatial.distance_matrix(particleCenters, particleCenters)
# print('distmat computed')
# for i in range(distMat.shape[0]):
# if i == 0:
# closest_particle_distances[i] = np.min(distMat[i, 1:])
# elif i == distMat.shape[0]-1:
# closest_particle_distances[i] = np.min(distMat[i, :-1])
# else:
# closest_particle_distances[i] = np.min([np.min(distMat[i, :i]), np.min(distMat[i, i+1:])])
# distances.append(closest_particle_distances)
# plt.boxplot(distances)
get_particle_patchiness(dset, 50, onlyMP=True)
for particle in dset.particleContainer.particles:
if evaluation.is_MP_particle(particle):
allParticles.append(particle.getParticleAssignment())
print(set(allParticles))
\ No newline at end of file
import numpy as np
cimport numpy as np
cimport cython
from cython cimport boundscheck, wraparound
@boundscheck(False)
@wraparound(False)
cdef bint box_overlaps_contour(unsigned int[:] boxTopLeftXY, unsigned int boxSize, unsigned int[:, :, :] contourData):
cdef bint isOverlapping = False
cdef unsigned int xmin, xmax, width, boxXmin, boxXmax, ymin, ymax, height, boxYmin, boxYmax
xmin = np.min(contourData[:, 0, 1])
xmax = np.max(contourData[:, 0, 1])
width = xmax - xmin
boxXmin = boxTopLeftXY[0]
boxXmax = boxTopLeftXY[0] + boxSize
if xmin > (boxXmin-width/2):
if xmax < (boxXmax+width/2):
ymin = np.min(contourData[:, 0, 1])
ymax = np.max(contourData[:, 0, 1])
height = ymax - ymin
boxYmin = boxTopLeftXY[1]
boxYmax = boxTopLeftXY[1] + boxSize
if ymin > (boxYmin-height/2):
if ymax < (boxYmax+width/2):
isOverlapping = True
return isOverlapping
@boundscheck(False)
@wraparound(False)
def def_get_indices_of_overlapping_particles(contours not None, unsigned int[:, :] topLefts, unsigned int boxSize):
cdef Py_ssize_t i, j
cdef unsigned int counter, numParticles, numTopLefts
numParticles = len(contours)
numTopLefts = topLefts.shape[0]
cdef unsigned int[:] overlappingIndices = np.zeros(numParticles, dtype=np.uint32)
cdef unsigned int[:, :, :] currentContour
counter = 0
for i in range(numParticles):
currentContour = contours[i]
for j in range(numTopLefts):
if box_overlaps_contour(topLefts[j, :], boxSize, currentContour):
overlappingIndices[counter] = i
counter += 1
break
return overlappingIndices[:counter]
......@@ -30,7 +30,7 @@ def get_random_topLefts(int numBoxes, double boxSize, double radius, double maxA
cdef int outerCounter, counter, x, y, i, j, diffX, diffY, successfullyAdded
cdef bint validSolutionFound, boxOverlaps
srand(42) # setting seed
srand(seed) # setting seed
assert RAND_MAX == 32767 # this value is used in the random-function above. For performance-reasons, it is directly typed in there as a number
maxDist = radius - np.sqrt((boxSize/2)**2 + (boxSize/2)**2)
outerCounter = 0
......
......@@ -19,8 +19,14 @@ if len(sys.argv) == 1:
# ext = Extension("getRandomTopLefts", ["getRandomTopLefts.pyx"], extra_compile_args=['-O3'],)
# setup(
# name="get a given number of random topLefts",
# ext_modules=cythonize("randoms.pyx", annotate=True), # accepts a glob pattern
# include_dirs=[np.get_include()]
# )
setup(
name="get a given number of random topLefts",
ext_modules=cythonize("randoms.pyx", annotate=True), # accepts a glob pattern
name="checks which particle contours overlap the boxes",
ext_modules=cythonize("particleBoxOverlap.pyx", annotate=True), # accepts a glob pattern
include_dirs=[np.get_include()]
)
)
\ No newline at end of file
......@@ -8,6 +8,7 @@ Created on Wed Jan 22 13:57:28 2020
# import pickle
import os
import numpy as np
import time
# import matplotlib.pyplot as plt
import concurrent.futures
import operator
......@@ -20,6 +21,7 @@ from helpers import ParticleBinSorter
import methods as meth
import geometricMethods as gmeth
from chemometrics import chemometricMethods as cmeth
from chemometrics.imageOperations import get_particle_patchiness
from datasetOperations import ParticleVariations
......@@ -33,8 +35,9 @@ def get_methods_to_test(dataset: dataset.DataSet, fractions: list = [], maxTries
:return: list of measurement Objects that are applicable
"""
if len(fractions) == 0:
fractions: list = [0.02, 0.05, 0.1, 0.25, 0.5, 0.7, 0.9]
# fractions: list = [0.1, 0.3, 0.5]
fractions: list = [0.02, 0.03, 0.05, 0.1, 0.25, 0.5, 0.7, 0.9]
# fractions: list = [0.02, 0.06, 0.15, 0.2, 0.5]
# fractions: list = [0.01, 0.1, 0.5, 0.9]
methods: list = []
particleContainer = dataset.particleContainer
......@@ -47,15 +50,19 @@ def get_methods_to_test(dataset: dataset.DataSet, fractions: list = [], maxTries
methods += boxCreator.get_spiralBoxSubsamplers_for_fraction(fraction)
methods += boxCreator.get_randomBoxSubsamplers_for_fraction(fraction, maxTries=maxTries)
methods += boxCreator.get_randomQuarterBoxSubsamplers_for_fraction(fraction, maxTries=maxTries)
# methods.append(cmeth.TrainedSubsampling(particleContainer, fraction))
# methods.append(cmeth.ChemometricSubsampling(particleContainer, fraction))
for fakeScore in [0.6, 0.7, 0.8]:
trainedSampling = cmeth.TrainedSubsampling(particleContainer, fraction, fakeScore=fakeScore)
if trainedSampling.config_is_valid():
methods.append(trainedSampling)
return methods
def update_sample(sample, force: bool, index: int):
sample.load_dataset()
t0 = time.time()
methods: list = get_methods_to_test(sample.dataset)
print('getting methods for sample', sample.dataset.name, 'took', round(time.time()-t0, 2), 'seconds')
sample.update_result_with_methods(methods, force)
return sample, index
......@@ -131,34 +138,58 @@ class TotalResults(object):
self.sampleResults[index] = updatedResult
print(f'done updating {updatedResult.dataset.name} at index {index}')
def get_error_vs_fraction_data(self, attributes: list = [], methods: list = []) -> dict:
def get_error_vs_fraction_data(self, attributes: list = [], methods: list = [], partCount: tuple = (0, np.inf)) -> tuple:
"""
:param attributes: A list of attributes that should be used for filtering the samples. Only samples with an
attribute from within that list are considered.
:param methods: A list of methods to extract
:param patchiness: Tuple, first val: min patchiness, second val, max patchiness
:return: Dict: Key: Method Label,
Value: {Dict: Key:Measured Fraction, Value: Tuple (averaged MPCountError, StDev MPCountError) over all samples}
"""
result: dict = {}
allParticles: list = []
numSamples: int = 0
for sample in self.sampleResults:
sample: SampleResult = sample
if attributes == [] or sample.has_any_attribute(attributes):
numSamples += 1
for res in sample.results:
res: SubsamplingResult = res
method: meth.SubsamplingMethod = res.method
if methods == [] or method.matches_any_pattern(methods):
label: str = method.label
frac: float = method.fraction
error: float = res.mpCountError
stdev: float = res.mpCountErrorStDev
if label not in result.keys():
result[label] = {frac: [(error, stdev)]}
elif frac not in result[label].keys():
result[label][frac] = [(error, stdev)]
else:
result[label][frac].append((error, stdev))
if partCount[0] == 0 and partCount[1] == np.inf:
samplePartCount: int = 1
# samplePatchiness: float = 1.0 # doesn't matter in this case
else:
if sample.dataset is None:
sample.load_dataset()
samplePartCount: int = len(sample.dataset.particleContainer.particles)
# samplePatchiness: float = sample.get_patchiness()
# print(sample.sampleName, samplePatchiness)
if partCount[0] <= samplePartCount < partCount[1]:
numSamples += 1
if sample.dataset is None:
sample.load_dataset()
for particle in sample.dataset.particleContainer.particles:
allParticles.append(particle)
for res in sample.results:
res: SubsamplingResult = res
method: meth.SubsamplingMethod = res.method
if methods == [] or method.matches_any_pattern(methods):
label: str = method.label
frac: float = method.fraction
error: float = res.mpCountError
stdev: float = res.mpCountErrorStDev
if label not in result.keys():
result[label] = {frac: [(error, stdev)]}
elif frac not in result[label].keys():
result[label][frac] = [(error, stdev)]
else:
result[label][frac].append((error, stdev))
numMPParticles: float = get_number_of_MP_particles(allParticles)
stats: dict = {'numSamples': numSamples,
'meanParticleCount': round(len(allParticles) / numSamples),
'meanMPFrac': round(numMPParticles / len(allParticles) * 100, 1)}
for method in result.keys():
methodRes: dict = result[method]
......@@ -167,7 +198,7 @@ class TotalResults(object):
meanStd = np.mean([i[1] for i in methodRes[fraction]])
methodRes[fraction] = (meanError, meanStd)
return numSamples, result
return stats, result
class SubsamplingResult(object):
......@@ -262,13 +293,14 @@ class SampleResult(object):
"""
An object the stores all generated results per sample and can update and report on them.
"""
def __init__(self, filepath: str, numVariations: int = 10):
def __init__(self, filepath: str, numVariations: int = 20):
super(SampleResult, self).__init__()
self.filepath: str = filepath
self.dataset: dataset.DataSet = None
self.results: list = []
self.attributes: list = []
self.numVariations: int = numVariations # how often the sample is altered for each method
self.patchiness: float = -1
@property
def sampleName(self) -> str:
......@@ -278,6 +310,13 @@ class SampleResult(object):
self.dataset = dataset.loadData(self.filepath)
assert self.dataset is not None
def get_patchiness(self) -> float:
if not hasattr(self, "patchiness") or self.patchiness == -1:
if self.dataset is None:
self.load_dataset()
self.patchiness = get_particle_patchiness(self.dataset)
return self.patchiness
def update_result_with_methods(self, methods: list, force: bool = False) -> list:
"""
Updates result with the given method (contains desiredFraction already)
......@@ -289,8 +328,8 @@ class SampleResult(object):
self.load_dataset()
updatedMethods: list = []
t0 = time.time()
particleVariations: ParticleVariations = ParticleVariations(self.dataset, numVariations=self.numVariations)
needsToBeUpdated: dict = {method: False for method in methods}
for index, particleContainer in enumerate(particleVariations.get_particleContainer_variations()):
......@@ -312,11 +351,10 @@ class SampleResult(object):
if needsToBeUpdated[method]:
subParticles = method.apply_subsampling_method()
result.add_result(method.particleContainer.particles, subParticles)
if method not in updatedMethods:
updatedMethods.append(method)
updatedMethods.append(method)
# print(f'updated {self.sampleName} with {method.label} at fraction {method.fraction}, '
# f'iteration {index+1}')
# f'iteration {index+1}, took {round(time.time()-t0, 2)}, seconds')
print(f'finished updating sample {self.sampleName}, it took {round(time.time()-t0, 2)} seconds')
return updatedMethods
def set_attribute(self, newAttribute: str) -> None:
......
import time
import numpy as np
from itertools import combinations
from methods import SubsamplingMethod
......@@ -6,7 +7,7 @@ import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset
import helpers
from cythonModules import randoms
from cythonModules import randoms, particleBoxOverlap
def box_overlaps_other_box(topLeft1: list, topLeft2: list, boxSize: float) -> bool:
......@@ -53,16 +54,27 @@ class BoxSelectionSubsamplingMethod(SubsamplingMethod):
return abs(topleft[0] - cntStart[0]) + abs(topleft[1] - cntStart[1])
subParticles: list = []
topLefts: list = self.get_topLeft_of_boxes()
boxWidthHeight: tuple = (self.boxSize, self.boxSize)
topLefts: np.ndarray = np.array(self.get_topLeft_of_boxes())
cython: bool = False
if cython:
topLefts = np.round(topLefts).astype(np.uint32)
# contours: np.ndarray = np.array(self.particleContainer.getParticleContours())
contours = [cnt.astype(np.uint32) for cnt in self.particleContainer.getParticleContours()]
boxSize: np.uint32 = np.uint32(round(self.boxSize))
indices = particleBoxOverlap.def_get_indices_of_overlapping_particles(contours, topLefts, boxSize)
for index in indices:
subParticles.append(self.particleContainer.getParticleOfIndex(index))
for particle in self.particleContainer.particles:
cntStart: tuple = (particle.contour[0, 0, 0], particle.contour[0, 0, 1])
sortedTopLefts = sorted(topLefts, key=distanceToCnt)
else:
boxWidthHeight: tuple = (self.boxSize, self.boxSize)
for particle in self.particleContainer.particles:
cntStart: tuple = (particle.contour[0, 0, 0], particle.contour[0, 0, 1])
sortedTopLefts = sorted(topLefts, key=distanceToCnt)
for topLeftXY in sortedTopLefts:
if helpers.box_overlaps_contour(topLeftXY, boxWidthHeight, particle.contour):
subParticles.append(particle)
for topLeftXY in sortedTopLefts:
if helpers.box_overlaps_contour(topLeftXY, boxWidthHeight, particle.contour):
subParticles.append(particle)
break
return subParticles
......@@ -263,6 +275,7 @@ class CrossBoxSubSampling(BoxSelectionSubsamplingMethod):
numBoxes: int = 2 * self.numBoxesAcross - 1
totalBoxArea: float = numBoxes * (maxBoxSize ** 2)
maxFraction: float = totalBoxArea / self.filterArea
return maxFraction
def equals(self, otherMethod) -> bool:
......@@ -386,7 +399,7 @@ class RandomBoxSampling(BoxSelectionSubsamplingMethod):
@property
def label(self) -> str:
return f'Boxes random layout ({self.numBoxes} boxes)'
return f'Boxes Random layout ({self.numBoxes} boxes)'
def equals(self, otherMethod) -> bool:
equals: bool = False
......@@ -396,7 +409,6 @@ class RandomBoxSampling(BoxSelectionSubsamplingMethod):
return equals
def get_topLeft_of_boxes(self) -> list:
valid, topLefts = randoms.get_random_topLefts(self.numBoxes, self.boxSize,
self.filterDiameter/2, self.__maxAngle,
seed=self.randomSeed, maxTries=self.maxTries)
......@@ -413,7 +425,7 @@ class RandomQuarterBoxes(RandomBoxSampling):
@property
def label(self) -> str:
return f'Boxes random layout (quarter) ({self.numBoxes} boxes)'
return f'Boxes Random layout (quarter) ({self.numBoxes} boxes)'
def determine_max_achievable_frac(method: BoxSelectionSubsamplingMethod, numBoxes: int) -> float:
......
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter, FixedLocator
from scipy import optimize
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from evaluation import TotalResults, SampleResult
from evaluation import TotalResults, get_number_of_MP_particles, is_MP_particle
from chemometrics.imageOperations import get_particle_patchiness
from helpers import get_filterDimensions_from_dataset, get_center_from_filter_dimensions, convert_length_to_pixels
def get_error_vs_frac_plot(totalResults: TotalResults, attributes: list = [], methods: list = [],
standarddevs=True, fill=True) -> Figure:
def get_error_vs_frac_plot(totalResults: TotalResults, attributes: list = [], methods: list = [], partCounts: list = [],
standarddevs=True, fill=True, poissonRef=True) -> Figure:
if len(attributes) == 0 and len(methods) != 0:
attributes = [[]]*len(methods)
elif len(methods) == 0 and len(attributes) != 0:
methods = [[]]*len(attributes)
if len(partCounts) == 0:
patchiness = [[]]*len(attributes)
assert len(attributes) == len(methods)
fig: Figure = plt.figure(figsize=(10, 5))
numRows: int = 1
numCols: int = 1
if len(attributes) == 0:
attributes = methods = [[]]
elif len(attributes) <= 2:
......@@ -24,34 +31,81 @@ def get_error_vs_frac_plot(totalResults: TotalResults, attributes: list = [], me
numRows = 2
numCols = np.ceil(len(attributes)/numRows)
fig: Figure = plt.figure(figsize=(14, 3.5 * numRows))
index = 0
for attrs, meths in zip(attributes, methods):
for attrs, meths, pcounts in zip(attributes, methods, partCounts):
ax = fig.add_subplot(numRows, numCols, index + 1)
numSamples, errorPerFraction = totalResults.get_error_vs_fraction_data(attributes=attrs, methods=meths)
if pcounts != []:
stats, errorPerFraction = totalResults.get_error_vs_fraction_data(attributes=attrs, methods=meths,
partCount=pcounts)
else:
stats, errorPerFraction = totalResults.get_error_vs_fraction_data(attributes=attrs, methods=meths)
numSamples = stats['numSamples']
meanParticleCount = stats['meanParticleCount']
meanMPFrac = stats['meanMPFrac']
if poissonRef:
firstSample: str = list(errorPerFraction.keys())[0]
fractions: list = list(errorPerFraction[firstSample].keys())
meansCounts: np.ndarray = np.array([frac * meanParticleCount * meanMPFrac/100 for frac in fractions])
stdevs: np.ndarray = 1 / np.sqrt(meansCounts) # mean = varianz = stdev**2 in Poisson Distribution
means = stdevs**2
#Conversions to %
means *= 100
stdevs *= 100
fractions = [frac*100 for frac in fractions]
if not standarddevs:
ax.plot(fractions, means, label='Poisson', marker='s', alpha=0.3)
else:
line = ax.errorbar(fractions, means, stdevs, label='Poisson', marker='s', capsize=5, alpha=0.3)
if fill:
color = line[0].get_color()
ax.fill_between(fractions, means-stdevs, means+stdevs, alpha=0.1, facecolor=color)
# print('errorbars:', means-stdevs, means+stdevs)
for methodLabel in errorPerFraction.keys():
errorDict: dict = errorPerFraction[methodLabel]
fractions: list = list(errorDict.keys())
errors: np.ndarray = np.array([errorDict[fraction][0] for fraction in fractions])
stdevs: np.ndarray = np.array([errorDict[fraction][1] for fraction in fractions])
fractions = [i * 100 for i in fractions] # convert to % for plotting
alphascale: float = 1 if methodLabel.find('Random Subsa') == -1 else 0.3
if not standarddevs:
ax.plot(fractions, errors, label=methodLabel, marker='s')
else:
line = ax.errorbar(fractions, errors, stdevs, label=methodLabel, marker='s', capsize=5)
line = ax.errorbar(fractions, errors, stdevs, label=methodLabel, marker='s', capsize=5, alpha=alphascale)
if fill:
color = line[0].get_color()
ax.fill_between(fractions, errors-stdevs, errors+stdevs, alpha=0.2, facecolor=color)
ax.fill_between(fractions, errors-stdevs, errors+stdevs, alpha=0.2*alphascale, facecolor=color)
title: str = ''
if len(attrs) > 0:
for i in range(len(attrs)):
if attrs[i] == 'slush':
attrs[i] = 'sludge'
title = ', '.join(attr for attr in attrs)
title += f' ({numSamples} samples)'
ax.set_title(title, fontSize=15)
elif pcounts != []:
title += f'{pcounts[0]} <= num. Particles < {pcounts[1]}'
meanNumMP: int = int(round(meanParticleCount * meanMPFrac/100, 0))
title += f' ({numSamples} filters)\nAverage: {meanParticleCount} particles, {meanMPFrac} % MP, {meanNumMP} MP particles'
ax.set_title(title, fontSize=13)
ax.set_xscale('log')
ax.set_xlabel('measured fraction', fontsize=12)
ax.set_ylabel('mpCountError (%)', fontsize=12)
minX, maxX = 0.9 * min(fractions), 1.05
ax.xaxis.set_major_formatter(ScalarFormatter())
# ax.xaxis.set_major_locator(FixedLocator([0.02, 0.05, 0.1, 0.2, 0.5, 1.0]))
ax.xaxis.set_major_locator(FixedLocator([2, 5, 10, 20, 50, 100]))
ax.set_xlabel('measured fraction (%)', fontsize=12)
ax.set_ylabel('subsampling-error (%)', fontsize=12)
minX, maxX = 0.9 * min(fractions), 105
ax.hlines([20, 40, 60, 80], minX, maxX, colors='gray', alpha=0.5)
ax.set_xlim([minX, maxX])
ax.set_ylim([0, 100])
......@@ -60,12 +114,11 @@ def get_error_vs_frac_plot(totalResults: TotalResults, attributes: list = [], me
index += 1
fig.tight_layout()
return fig
def get_distance_hist_plots(totalResults: TotalResults, attributes: list = []) -> Figure:
fig: Figure = plt.figure(figsize=(10, 5))
fig: Figure = plt.figure(figsize=(7, 5))
numRows: int = 1
numCols: int = 1
if len(attributes) == 0:
......@@ -76,37 +129,224 @@ def get_distance_hist_plots(totalResults: TotalResults, attributes: list = []) -
numRows = 2
numCols = np.ceil(len(attributes) / numRows)
onlyMP: bool = True
ax = fig.add_subplot()
for index, attrs in enumerate(attributes):
ax = fig.add_subplot(numRows, numCols, index + 1)
# ax = fig.add_subplot(numRows, numCols, index + 1)
allParticles: list = []
densities: list = []
particleCounts: list = []
pathinesses: list = []
for sampleRes in totalResults.sampleResults:
if sampleRes.has_any_attribute(attrs):
if sampleRes.dataset is None:
sampleRes.load_dataset()
dset = sampleRes.dataset
offset, diameter, [width, height] = get_filterDimensions_from_dataset(dset)
center = get_center_from_filter_dimensions(offset, diameter)
center[0] = convert_length_to_pixels(dset, center[0])
center[1] = convert_length_to_pixels(dset, center[1])
patchiness: float = get_particle_patchiness(dset, onlyMP=onlyMP)
pathinesses.append(patchiness)
if onlyMP:
particleCount: int = 0
for particle in dset.particleContainer.particles:
if is_MP_particle(particle):
particleCount += 1
particleCounts.append(particleCount)
else:
particleCounts.append(len(dset.particleContainer.particles))
# for particle in dset.particleContainer.particles:
# allParticles.append(particle)
#
# offset, diameter, [width, height] = get_filterDimensions_from_dataset(dset)
# center = get_center_from_filter_dimensions(offset, diameter)
# center[0] = convert_length_to_pixels(dset, center[0])
# center[1] = convert_length_to_pixels(dset, center[1])
histdata = get_distance_point_histogramdata(dset.particleContainer.particles, center)
densities.append(histdata[1])
# histdata = get_distance_point_histogramdata(dset.particleContainer.particles, center)
# densities.append(histdata[1])
# ax.plot(histdata[0], histdata[1])
numSamples = len(densities)
title: str = ''
if len(attrs) > 0:
title = ', '.join(attr for attr in attrs)
title += f' ({numSamples} samples)'
for i in range(len(attrs)):
if attrs[i] == 'slush':
attrs[i] = 'sludge'
ax.scatter(particleCounts, pathinesses, label=', '.join(attr for attr in attrs))
ax.set_title(title, fontSize=15)
densities: np.ndarray = np.mean(np.array(densities), axis=0)
ax.plot(histdata[0], densities)
# numSamples = len(densities)
# partCounts: list = [len(i) for i in allParticles]
# meanParticleCount: float = round(len(allParticles) / numSamples)
# meanParticleCount: float = round(np.mean(partCounts))
# stdParticleCount: float = round(np.std(partCounts))
# mpFracs: list = [get_number_of_MP_particles(i)/len(i) for i in allParticles]
# meanMPFrac: float = round(np.mean(mpFracs) * 100, 1)
# stdMPFrac: float = round(np.std(mpFracs) * 100, 1)
# numMPParticles: float = get_number_of_MP_particles(allParticles)
# meanMPFrac: float = round(numMPParticles / len(allParticles) * 100, 1)
# meanPatchiness: float = round(np.mean(pathinesses), 2)
# title: str = ''
# if len(attrs) > 0:
# title = ', '.join(attr for attr in attrs)
# title += f'\n({numSamples} filters, avg. {meanParticleCount} particles, {meanMPFrac} % MP,'
# title += f'\navg. Particle Patchiness {meanPatchiness})'
# ax.set_title(title, fontSize=13)
# densities: np.ndarray = np.mean(np.array(densities), axis=0)
# densities /= densities.max()
# distances = np.array(histdata[0], dtype=np.float) * dset.pixelscale_df
# ax.plot(distances / 1000, densities)
# ax.set_xlabel('distance from filter center (mm)', fontsize=12)
# ax.set_xlim([0, 6])
# ax.set_ylabel('normalized particle density', fontsize=12)
# ax.set_ylim([0.0, 1.05])
ax.legend(fontsize=15)
ax.set_xscale('log')
if not onlyMP:
ax.set_xticks([1000, 5000, 10000, 50000, 100000])
ax.set_xlabel('Particle Count', fontsize=15)
else:
ax.set_xticks([10, 50, 100, 500])
ax.set_xlabel('MP Particle Count', fontsize=15)
ax.xaxis.set_major_formatter(ScalarFormatter())
ax.set_ylabel('Patchiness', fontsize=15)
for tick in ax.xaxis.get_major_ticks():
tick.label.set_fontsize(15)
for tick in ax.yaxis.get_major_ticks():
tick.label.set_fontsize(15)
fig.tight_layout()
return fig
def get_error_vs_mpfrac_plot(totalResults: TotalResults, attributes: list = []) -> Figure:
def quadratic_fit(x, a, b, c):
return a*x**2 + b*x + c
fig: Figure = plt.figure(figsize=(15, 5))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
dataWithFractions: dict = {}
dataWithNumbers: dict = {}
for index, attrs in enumerate(attributes):
for sampleRes in totalResults.sampleResults:
if sampleRes.has_any_attribute(attrs):
if sampleRes.dataset is None:
sampleRes.load_dataset()
dset = sampleRes.dataset
particles: list = dset.particleContainer.particles
totalParticleCount: int = len(particles)
numMPParticles: float = get_number_of_MP_particles(particles)
mpfrac: float = numMPParticles / totalParticleCount * 100
fracsMeasured: np.ndarray = np.unique([result.method.fraction for result in sampleRes.results])
fracsToPlot: dict = {0.03: 0.03,
0.04: 0.03,
0.05: 0.03,
0.06: 0.1,
0.1: 0.1,
0.2: 0.1,
0.25: 0.1,
0.3: 0.5,
0.5: 0.5,
0.7: 0.8,
0.9: 0.8
}
numParticlesMeasured: list = [1000, 2500, 5000]
usedSamples: list = []
for particlesMeasured in numParticlesMeasured:
if particlesMeasured <= totalParticleCount:
fracMeasured: float = particlesMeasured/totalParticleCount
indexOfFracToEvaluate = np.argmin(np.abs(fracsMeasured - fracMeasured))
fracToEvaluate: float = fracsMeasured[indexOfFracToEvaluate]
allErrorsOfThisFrac: list = []
for result in sampleRes.results:
if result.method.label.find('Random Subsampling') != -1 and result.method.fraction == fracToEvaluate:
allErrorsOfThisFrac.append(result.mpCountError)
if mpfrac != 0.0:
if particlesMeasured not in dataWithNumbers:
dataWithNumbers[particlesMeasured] = [(mpfrac, np.mean(allErrorsOfThisFrac))]
else:
dataWithNumbers[particlesMeasured].append((mpfrac, np.mean(allErrorsOfThisFrac)))
fracToEvaluate = fracsToPlot[fracToEvaluate]
if fracToEvaluate not in dataWithFractions.keys():
dataWithFractions[fracToEvaluate] = [(mpfrac, np.mean(allErrorsOfThisFrac))]
else:
dataWithFractions[fracToEvaluate].append((mpfrac, np.mean(allErrorsOfThisFrac)))
# fracsToProcess: list = [0.03, 0.1, 0.5, 0.8]
# for fracToEvaluate in fracsToProcess:
# allErrorsOfThisFrac: list = []
# for result in sampleRes.results:
# if result.method.label.find('Random Subsampling') != -1 and result.method.fraction == fracToEvaluate:
# allErrorsOfThisFrac.append(result.mpCountError)
#
# if mpfrac != 0.0:
# if fracToEvaluate not in dataWithFractions.keys():
# dataWithFractions[fracToEvaluate] = [(mpfrac, np.mean(allErrorsOfThisFrac))]
# else:
# dataWithFractions[fracToEvaluate].append((mpfrac, np.mean(allErrorsOfThisFrac)))
for frac in sorted(dataWithFractions.keys()):
mpfracs: np.ndarray = np.array([i[0] for i in dataWithFractions[frac]])
errors: np.ndarray = np.array([i[1] for i in dataWithFractions[frac]])
ax1.scatter(mpfracs, errors, marker='o', label=f'measured Fraction: {frac}')
if mpfracs.shape[0] <= 2 or not np.asarray_chkfinite(errors.any()):
ax1.plot(np.sort(mpfracs), errors)
else:
x_for_fit = np.log10(mpfracs)
try:
params, _ = optimize.curve_fit(quadratic_fit, x_for_fit, errors)
except ValueError:
print('break')
continue
ax1.plot(np.sort(mpfracs), quadratic_fit(np.sort(x_for_fit), params[0], params[1], params[2]))
for numParticles in sorted(dataWithNumbers.keys()):
mpfracs: np.ndarray = np.array([i[0] for i in dataWithNumbers[numParticles]])
errors: np.ndarray = np.array([i[1] for i in dataWithNumbers[numParticles]])
ax2.scatter(mpfracs, errors, marker='o', label=f'measured {numParticles} particles')
if mpfracs.shape[0] > 2:
x_for_fit = np.log10(mpfracs)
params, _ = optimize.curve_fit(quadratic_fit, x_for_fit, errors)
ax2.plot(np.sort(mpfracs), quadratic_fit(np.sort(x_for_fit), params[0], params[1], params[2]))
for axis in [ax1, ax2]:
axis.set_xlabel('microplastic Fraction (%)', fontsize=15)
axis.set_xlim([0.08, 15])
axis.set_xscale('log')
axis.set_ylabel('subsampling error (%)', fontsize=15)
axis.set_ylim([0, 120])
axis.xaxis.set_major_formatter(ScalarFormatter())
axis.hlines([20], 0.08, 15, colors='gray', alpha=0.5)
axis.text(2.5, 22, 'recommended limit', fontsize=14, alpha=0.5)
for tick in axis.xaxis.get_major_ticks():
tick.label.set_fontsize(15)
for tick in axis.yaxis.get_major_ticks():
tick.label.set_fontsize(15)
handles, labels = axis.get_legend_handles_labels()
by_label = dict(zip(labels, handles))
axis.legend(by_label.values(), by_label.keys(), fontsize=15)
ax1.set_title('By Fraction', fontsize=17)
ax2.set_title('By Particle Count', fontsize=17)
fig.tight_layout()
return fig
def get_distance_point_histogramdata(particles: list, center: np.ndarray) -> tuple:
"""
:param particles: list of Particles
......@@ -130,7 +370,6 @@ def get_distance_point_histogramdata(particles: list, center: np.ndarray) -> tup
densities: np.ndarray = np.zeros_like(data, dtype=np.float)
for i in range(len(data)):
densities[i] = float(data[i]) / get_area_of_circle_ring(binMaxima[i], binMaxima[i+1])
densities /= densities.max()
binCenters: list = [np.mean([binMaxima[i], binMaxima[i+1]]) for i in range(len(binMaxima)-1)]
return binCenters, densities
......
......@@ -172,3 +172,14 @@ def convert_length_to_pixels(dataset: dataset.DataSet, length: float) -> float:
pixelScale: float = (dataset.pixelscale_df if imgMode == 'df' else dataset.pixelscale_bf)
length /= pixelScale
return length
if __name__ == '__main__':
counts = [2600, 14662, 9472, 16533]
mpfracs = [4.7, 0.9, 0.5, 0.5]
errorMargins = [0.2]
sigma = 0.7
for count, mpFrac in zip(counts, mpfracs):
for margin in errorMargins:
angerFrac = get_Anger_fraction(count, sigma=sigma, mpFraction=mpFrac/100, errorMargin=margin)
print(count, mpFrac, margin, angerFrac, angerFrac / count)
\ No newline at end of file
......@@ -4,11 +4,11 @@ import time
from evaluation import TotalResults, SampleResult
from input_output import get_pkls_from_directory, get_attributes_from_foldername, save_results, load_results
from graphs import get_error_vs_frac_plot, get_distance_hist_plots
from graphs import get_error_vs_frac_plot, get_distance_hist_plots, get_error_vs_mpfrac_plot
"""
IMPORTANT!!!
SET GEPARD TO EVALUATION BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF THE LEGACY CONVERTS MIGHT FAIL..
SET GEPARD TO DEVELOPMENT BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF THE LEGACY CONVERTS MIGHT FAIL..
"""
if __name__ == '__main__':
......@@ -17,7 +17,7 @@ if __name__ == '__main__':
# counter = 0
# for folder in pklsInFolders.keys():
# for samplePath in pklsInFolders[folder]:
# if counter < 100:
# if counter < 50:
# newSampleResult: SampleResult = results.add_sample(samplePath)
# for attr in get_attributes_from_foldername(folder):
# newSampleResult.set_attribute(attr)
......@@ -27,18 +27,22 @@ if __name__ == '__main__':
# results.update_all(multiprocessing=True)
# print('updating all took', time.time()-t0, 'seconds')
#
# save_results('results2.res', results)
results: TotalResults = load_results('results2.res')
#
plot: Figure = get_error_vs_frac_plot(results,
attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']],
# # attributes=[['$ mpFraction < 0.02'], ['$ mpFraction >= 0.02']],
# # methods=[['random layout (7', 'random layout (1']]*2)
methods=[['random sub', 'cross', 'layout (10']]*4)
# # methods=[['Random Subsampling', 'Sizebin']] * 2)
# # methods=[['layout (5', 'layout (10', 'layout (15', 'cross', 'random subsampling', 'sizebin']] * 2)
# plot.show()
# save_results('results5.res', results)
results: TotalResults = load_results('results4.res')
plot: Figure = get_error_vs_frac_plot(results,
# attributes=[],
# attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']],
partCounts=[(0, 2000), (2000, 5000), (5000, 10000), (10000, 40000)],
# methods=[['random subs', 'dummy']]*2,
# methods=[['random subsampling', 'random']]*2,
methods=[['layout (10', 'crossLayout (3', 'random subsampling', 'quarter) (10']] * 4,
poissonRef=False, fill=True)
plot.show()
#
# plot2: Figure = get_distance_hist_plots(results,
# attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']])
# plot2.show()
# plot3: Figure = get_error_vs_mpfrac_plot(results, attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']])
# plot3.show()
\ No newline at end of file
......@@ -18,6 +18,7 @@ def get_default_DataSet() -> DataSet:
dset.imagedim_df = [10, 10]
dset.pixelscale_df = 1.0
setMaxDim(dset, 10, 0, 10, 0, 10)
dset.particleContainer = get_default_ParticleContainer()
return dset
......
......@@ -127,8 +127,8 @@ class TestTrainedSubsampling(unittest.TestCase):
self.assertEqual(type(self.trainedSampling.label), str)
def test_load_classifier(self):
self.assertTrue(self.trainedSampling.clf is None)
self.assertTrue(self.trainedSampling.score is None)
# self.assertTrue(self.trainedSampling.clf is None)
# self.assertTrue(self.trainedSampling.score is None) # TODO: REIMPLEMENT
self.trainedSampling.clfPath = r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.72.pkl'
self.trainedSampling._load_classifier()
......@@ -160,16 +160,16 @@ class TestTrainedSubsampling(unittest.TestCase):
for index in range(numMPParticles): # all MP Particles should be measured
self.assertTrue(index in indicesToMeasure)
def test_get_theoretic_fraction(self):
for frac in [0.1, 0.3, 0.5, 0.9, 1.0]:
for score in [0.5, 0.7, 1.0]:
self.trainedSampling.fraction = frac
self.trainedSampling.score = score
score: float = self.trainedSampling.score
diff: float = 1 / self.trainedSampling.fraction - 1 # i.e., from 50 % score to 100 % score
factor: float = 1 + (1 - score) / 0.5 * diff
self.assertEqual(self.trainedSampling.get_theoretic_frac(), 1/factor)
# def test_get_theoretic_fraction(self):
# for frac in [0.1, 0.3, 0.5, 0.9, 1.0]:
# for score in [0.5, 0.7, 1.0]:
# self.trainedSampling.fraction = frac
# self.trainedSampling.score = score
#
# score: float = self.trainedSampling.score
# diff: float = 1 / self.trainedSampling.fraction - 1 # i.e., from 50 % score to 100 % score
# factor: float = 1 + (1 - score) / 0.5 * diff
# self.assertEqual(self.trainedSampling.get_theoretic_frac(), 1/factor)
# def test_make_subparticles_match_fraction(self):
......
......@@ -43,8 +43,10 @@ class TestTotalResults(unittest.TestCase):
def test_get_error_vs_fraction_data(self):
firstSample: SampleResult = self.totalResults.add_sample('sample1.pkl')
firstSample.set_attribute('to be used')
firstSample.dataset = get_default_DataSet()
secondSample: SampleResult = self.totalResults.add_sample('sample2.pkl')
secondSample.set_attribute('not to be used')
secondSample.dataset = get_default_DataSet()
firstMethod: meth.RandomSampling = meth.RandomSampling(None, 0.1)
firstResult: SubsamplingResult = SubsamplingResult(firstMethod)
......@@ -76,8 +78,8 @@ class TestTotalResults(unittest.TestCase):
firstSample.results = [firstResult, secondResult, thirdResult, thirdResult3]
secondSample.results = [firstResult, secondResult, thirdResult2, thirdResult3]
numSamples, resultDict = self.totalResults.get_error_vs_fraction_data()
self.assertEqual(numSamples, 2)
stats, resultDict = self.totalResults.get_error_vs_fraction_data()
self.assertEqual(stats['numSamples'], 2)
self.assertEqual(list(resultDict.keys()), [firstMethod.label, secondMethod.label, thirdMethod.label])
for i, key in enumerate(resultDict.keys()):
......@@ -101,8 +103,8 @@ class TestTotalResults(unittest.TestCase):
self.assertAlmostEqual(mean, 50)
self.assertAlmostEqual(stdev, 16.32993161855452)
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(attributes=['to be used'])
self.assertEqual(numSamples, 1)
stats, filteredResultDict = self.totalResults.get_error_vs_fraction_data(attributes=['to be used'])
self.assertEqual(stats['numSamples'], 1)
self.assertEqual(list(filteredResultDict.keys()), [firstMethod.label, secondMethod.label, thirdMethod.label])
for i, key in enumerate(filteredResultDict.keys()):
......@@ -129,16 +131,16 @@ class TestTotalResults(unittest.TestCase):
self.assertAlmostEqual(mean, 50)
self.assertAlmostEqual(stdev, 16.32993161855452)
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['cross'])
self.assertEqual(numSamples, 2)
stats, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['cross'])
self.assertEqual(stats['numSamples'], 2)
self.assertEqual(list(filteredResultDict.keys()), [secondMethod.label, thirdMethod.label])
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['Cross'])
self.assertEqual(numSamples, 2)
stats, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['Cross'])
self.assertEqual(stats['numSamples'], 2)
self.assertEqual(list(filteredResultDict.keys()), [secondMethod.label, thirdMethod.label])
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['random'])
self.assertEqual(numSamples, 2)
stats, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['random'])
self.assertEqual(stats['numSamples'], 2)
self.assertEqual(list(filteredResultDict.keys()), [firstMethod.label])
......@@ -279,7 +281,7 @@ class TestSampleResult(unittest.TestCase):
possibleSpiralBoxMethods = 3
possibleRandomBoxMethods = 3
possibleQuarterRandomBoxMethods = 3
possibleChemometricMethods = 0
possibleChemometricMethods = 3 # i.e., the dummy classifier methods
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods + \
possibleRandomBoxMethods + possibleQuarterRandomBoxMethods
......@@ -330,7 +332,7 @@ class TestSampleResult(unittest.TestCase):
possibleRandomMethods = 4
possibleCrossBoxMethods = 3
possibleSpiralBoxMethods = 3
possibleChemometricMethods = 0
possibleChemometricMethods = 3
possibleRandomBoxMethods = 3
possibleQuarterRandomBoxMethods = 3
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
......@@ -369,7 +371,7 @@ class TestSampleResult(unittest.TestCase):
self.assertEqual(len(result.mpCountErrors), 0) # because the added results haven't set any
updatedMethods = self.sampleResult.update_result_with_methods(methods, force=True)
self.assertEqual(len(updatedMethods), 2) # because now we force the update
self.assertEqual(len(updatedMethods), 2*numVariations) # because now we force the update
self.assertTrue(method1 in updatedMethods)
self.assertTrue(method2 in updatedMethods)
......