Commit 5a4bf706 authored by Josef Brandt's avatar Josef Brandt

Trained Subsampling

parent 386f1214
......@@ -20,3 +20,7 @@ cythonModules/build/
chemometrics/Assignments.txt
chemometrics/Data.txt
chemometrics/Assignments_all.txt
chemometrics/Data_all.txt
......@@ -5,7 +5,7 @@ from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN
# from scipy import spatial
# from itertools import combinations
from random import sample
from random import sample, random
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
......@@ -19,7 +19,6 @@ from gepard.helperfunctions import cv2imread_fix
from methods import SubsamplingMethod
from helpers import timingDecorator
def get_pca(data: np.ndarray, numComp: int = 2) -> np.ndarray:
try:
standardizedData = StandardScaler().fit_transform(data.copy())
......@@ -252,12 +251,18 @@ def get_curvature_ft(contour: np.ndarray, angularSegment: float = 20, numHarmoni
class TrainedSubsampling(SubsamplingMethod):
def __init__(self, particleContainer: ParticleContainer, desiredFraction: float,
path: str = r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.79.pkl'):
path: str = r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.79.pkl',
fakeScore: float = 0.8):
super(TrainedSubsampling, self).__init__(particleContainer, desiredFraction)
self.score: float = None
self.clf = None
self.clfPath: str = path
self.fraction = desiredFraction
self.fakeClassifier: bool = True
self.fakeScore: float = fakeScore
self.fractionForExtrapolation: float = 0.0
self.predictedMPIndices: list = []
self._predict_MP_Indices()
def equals(self, otherMethod) -> bool:
isEqual: bool = False
......@@ -266,29 +271,149 @@ class TrainedSubsampling(SubsamplingMethod):
isEqual = True
return isEqual
@property
def label(self) -> str:
return 'Trained Random Sampling'
label: str = 'Trained Random Sampling'
if self.fakeClassifier:
label += f' faked to score {self.fakeScore}'
return label
def _predict_MP_Indices(self) -> None:
from evaluation import is_MP_particle
if not self.fakeClassifier:
self._load_classifier()
fullimgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages'
dsetname: str = self.particleContainer.datasetParent.name
imgPath: str = os.path.join(fullimgpath, dsetname + '.tif')
fullimg = cv2imread_fix(imgPath)
features: np.ndarray = get_particle_featurematrix(self.particleContainer, fullimg)
predictions: np.ndarray = self.clf.predict(features)
else:
self.score = self.fakeScore
particles: list = self.particleContainer.particles
predictions: np.ndarray = np.zeros(len(particles))
falsePositives, falseNegatives = 0, 0
numMP = 0
for index, particle in enumerate(particles):
if is_MP_particle(particle):
numMP += 1
if random() <= self.fakeScore:
predictions[index] = 1
else:
falseNegatives += 1
else:
if random() > self.fakeScore:
predictions[index] = 1
falsePositives += 1
mpIndices = list(np.where(predictions == 1)[0])
nonMPIndices = list(np.where(predictions == 0)[0])
numNonMPIndices = len(nonMPIndices)
fracNonMPToTake: float = float(np.clip(-1 + 1/0.075 * self.fraction, 0.0, 1.0))
numNonMPToTake: int = int(round(fracNonMPToTake * numNonMPIndices))
# numNonMPToTake = int(round(self.fraction**0.8 * numNonMPIndices))
# numNonMPToTake = int(round(self.fraction * numNonMPIndices))
self.predictedMPIndices = mpIndices + sample(nonMPIndices, numNonMPToTake)
def get_maximum_achievable_fraction(self) -> float:
return 1.0
numParticles: int = len(self.particleContainer.particles)
numMPPredicted: int = len(self.predictedMPIndices)
if numParticles > 0:
maxFrac: float = numMPPredicted / numParticles
else:
maxFrac: float = 0.0
return maxFrac
def apply_subsampling_method(self) -> list:
self._load_classifier()
fullimgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages'
dsetname: str = self.particleContainer.datasetParent.name
imgPath: str = os.path.join(fullimgpath, dsetname + '.tif')
fullimg = cv2imread_fix(imgPath)
features: np.ndarray = get_particle_featurematrix(self.particleContainer, fullimg)
predictions: np.ndarray = self.clf.predict(features)
indicesToSelect: set = self._get_measure_indices(list(predictions))
# if not self.fakeClassifier:
# self._load_classifier()
# fullimgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages'
# dsetname: str = self.particleContainer.datasetParent.name
# imgPath: str = os.path.join(fullimgpath, dsetname + '.tif')
# fullimg = cv2imread_fix(imgPath)
#
# features: np.ndarray = get_particle_featurematrix(self.particleContainer, fullimg)
# predictions: np.ndarray = self.clf.predict(features)
# else:
# from evaluation import is_MP_particle
# self.score = self.fakeScore
# particles: list = self.particleContainer.particles
# predictions: np.ndarray = np.zeros(len(particles))
# falsePositives, falseNegatives = 0, 0
# numMP = 0
# for index, particle in enumerate(particles):
# if is_MP_particle(particle):
# numMP += 1
# if random() <= self.fakeScore:
# predictions[index] = 1
# else:
# falseNegatives += 1
# # assert predictions[index] == 1
# else:
# if random() > self.fakeScore:
# predictions[index] = 1
# falsePositives += 1
# # assert predictions[index] == 0
# particles = self.particleContainer.particles
# predictedMPIndices = list(np.where(predictions == 1)[0])
# numPredictedMP = len(self.predictedMPIndices)
# numParticlesToMeasure = round(len(particles) * self.fraction)
# origMPFrac = round(numMP/len(particles)*100, 1)
# origNumMP = numMP
# print(f'numTotal: , {len(particles)}, numPredictedMP: {numPredictedMP}, '
# f'numMeasured: {numParticlesToMeasure}')
# print(f'falsePos: {falsePositives}, falseNeg: {falseNegatives}')
# print('measuredFrac is', self.fraction)
# print(f'orig MP Fraction {origMPFrac}, numMPParticles: {numMP}')
numParticlesToSelect = round(len(self.particleContainer.particles) * self.fraction)
if numParticlesToSelect > len(self.predictedMPIndices):
print(numParticlesToSelect, len(self.predictedMPIndices))
numParticlesToSelect = len(self.predictedMPIndices)
# assert numParticlesToSelect <= len(self.predictedMPIndices)
# enhancedParticles = []
# for particle in self.particleContainer.particles:
# if particle.index in self.predictedMPIndices:
# enhancedParticles.append(particle)
indicesToSelect = sample(self.predictedMPIndices, numParticlesToSelect)
# print(f'having to measure according to fraction: {numParticlesToMeasure}, actually measured: {len(indicesToSelect)}')
# indicesToSelect: set = self._get_measure_indices(list(predictions))
selectedParticles: list = []
for particle in self.particleContainer.particles:
if particle.index in indicesToSelect:
selectedParticles.append(particle)
# fac = measuredMPFrac / self.fraction
# if fac > 0:
# self.fractionForExtrapolation = fac
# else:
# self.fractionForExtrapolation = self.fraction
# numEstimMP = numMP / self.fractionForExtrapolation
numOrigParticles = len(self.particleContainer.particles)
numEnhancedParticles = len(self.predictedMPIndices)
self.fractionForExtrapolation = self.fraction * (numOrigParticles/numEnhancedParticles)
# print(f'fraction: {self.fraction}, measured mpfrac: {measuredMPFrac}')
# print(self.fraction, self.score, measuredMPFrac)
# fac: float = 0.5 * np.log10(self.fraction) + self.score*1.2 # empirical
# estimOrigMPFraction = measuredMPFrac / fac
# print(self.fraction, origMPFrac, round(estimOrigMPFraction*100, 1))
# estimOrigMPFraction = measuredMPFrac / self.score
# estimNumMPParticles = estimOrigMPFraction * len(particles)
# print(f'orig MP {origNumMP}, estimated MP {round(estimNumMPParticles)}')
# if estimNumMPParticles > 0:
# self.fractionForExtrapolation = numMP/estimNumMPParticles
# else:
# self.fractionForExtrapolation = self.fraction
# self.fractionForExtrapolation = self.fraction / self.score
# print('measured MP frac is', round(measuredMPFrac*100, 1))
# print('estimMPFraction', round(estimOrigMPFration*100, 1))
# print('frac for extrap', self.fractionForExtrapolation)
return selectedParticles
def _load_classifier(self) -> None:
......@@ -306,12 +431,12 @@ class TrainedSubsampling(SubsamplingMethod):
mpIndices: list = list(np.where(assignments == 1)[0])
nonMpIndices: list = list(np.where(assignments == 0)[0])
numEstimMPParticles: int = len(mpIndices)
numPredictedMP: int = len(mpIndices)
numParticlesToMeasure = round(len(predictedAssignments) * self.fraction)
if numParticlesToMeasure <= numEstimMPParticles:
if numParticlesToMeasure <= numPredictedMP:
indicesToMeasure = set(sample(mpIndices, numParticlesToMeasure))
else:
remainingIndices: int = int(numParticlesToMeasure - numEstimMPParticles)
remainingIndices: int = int(numParticlesToMeasure - numPredictedMP)
indicesToMeasure = set(mpIndices + sample(nonMpIndices, remainingIndices))
assert len(indicesToMeasure) == numParticlesToMeasure
......@@ -324,11 +449,18 @@ class TrainedSubsampling(SubsamplingMethod):
It is used for extrapolating the mpCount of the subsampled particle list.
:return:
"""
score: float = self.score
diff: float = 1/self.fraction - 1 # i.e., from 50 % score to 100 % score
factor: float = 1 + (1 - score)/0.5 * diff
return 1 / factor
# return self.fraction
# score: float = self.score
# diff: float = 1/self.fraction - 1 # i.e., from 50 % score to 100 % score
# factor: float = 1 + (1 - score)/0.5 * diff
# pow: float = (1-self.fakeScore)
# theoreticFactor: float = (1/factor**pow)
# print('actual fraction, theor. factor is', self.fraction, theoreticFactor)
# return theoreticFactor
return self.fractionForExtrapolation
# class ChemometricSubsampling(SubsamplingMethod):
......@@ -463,4 +595,28 @@ class TrainedSubsampling(SubsamplingMethod):
# assert abs(totalPointsAdded - numPointsToSelect) <= 1
# for clusterIndex in pointsPerCluster.keys():
# assert 0 <= pointsPerCluster[clusterIndex] <= len(labels[labels == clusterIndex])
# return pointsPerCluster
\ No newline at end of file
# return pointsPerCluster
if __name__ == '__main__':
import matplotlib.pyplot as plt
fractions: np.ndarray = np.linspace(0.01, 1, 100)
scores: np.ndarray = np.linspace(0.5, 1.0, 5)
plt.clf()
for score in scores:
# if score == 0.5:
# theorFractions = fractions
# a, b, n = 1, 1, 1.5
# data1 = a * fractions**n / (fractions**n + b)
# data1 -= data1.min()
# data1 /= data1.max()
# theorFactors = 0.5 + 0.5*data1
theorFractions = []
for frac in fractions:
diff: float = 1 / frac - 1 # i.e., from 50 % score to 100 % score
factor: float = 1 + (1 - score) / 0.5 * diff
theorFractions.append(1/factor**0.2)
plt.plot(fractions, theorFractions, label=str(score))
plt.legend()
plt.show()
\ No newline at end of file
import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard.helperfunctions import cv2imread_fix
from gepard.dataset import loadData
from gepard.dataset import DataSet,loadData
from gepard.analysis.particleContainer import ParticleContainer
import cv2
import numpy as np
......@@ -9,7 +9,42 @@ from scipy import spatial
import os
import matplotlib.pyplot as plt
from helpers import get_filterDimensions_from_dataset, get_center_from_filter_dimensions, convert_length_to_pixels
from graphs import get_distance_point_histogramdata
def get_particle_heterogeneity(dataset: DataSet, numCells: int = 50) -> float:
offset, diameter, [width, height] = get_filterDimensions_from_dataset(dataset)
center: np.ndarray = get_center_from_filter_dimensions(offset, diameter)
width: float = convert_length_to_pixels(dataset, width)
height: float = convert_length_to_pixels(dataset, height)
pixelsPerTile: int = max(int(round(width/numCells)), int(round(height/numCells)))
centerX: int = int(round(convert_length_to_pixels(dataset, center[0] / pixelsPerTile)))
centerY: int = int(round(convert_length_to_pixels(dataset, center[1] / pixelsPerTile)))
radius: int = int(round(convert_length_to_pixels(dataset, diameter / pixelsPerTile * 0.5)))
numRows: int = int(np.ceil(height / pixelsPerTile)) + 1
numCols: int = int(np.ceil(width / pixelsPerTile)) + 1
densityImage: np.ndarray = np.zeros((numRows, numCols))
for particle in dataset.particleContainer.particles:
particleCenter: tuple = np.mean(particle.contour[:, 0, 0]), np.mean(particle.contour[:, 0, 1])
row: int = int(round(particleCenter[1] / pixelsPerTile))
col: int = int(round(particleCenter[0] / pixelsPerTile))
densityImage[row, col] += 1
mask: np.ndarray = np.zeros_like(densityImage)
cv2.circle(mask, (centerY, centerX), radius, 1, -1)
relevantData: np.ndarray = densityImage[mask > 0]
mean: np.ndarray = np.round(np.mean(relevantData), 2)
std: np.ndarray = np.round(np.std(relevantData), 2)
ratio: float = round(std/mean, 2)
# plt.imshow(densityImage)
# plt.title(f'sample: {dataset.name},\nmean: {mean}, std: {std}, ratio = {ratio}')
# plt.show()
# print(f'sample: {dataset.name}, mean: {mean}, std: {std}, ratio = {ratio}')
return ratio
if __name__ == '__main__':
......@@ -24,12 +59,13 @@ if __name__ == '__main__':
# plt.imshow(distmap, cmap='gray')
paths: list = [r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\181120_MCI_2_ds1+2_all_ kleiner500_10_1.pkl',
r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\wastewater, slush\190619_5_PTPH_sld_190321_ds1_50_1_neu.pkl']
# paths.append(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\190306_MCII_1_2_50.pkl')
# paths.append(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\190222_MCII_1_1_50_1.pkl')
paths.append(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\190306_MCII_1_2_50.pkl')
paths.append(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\190222_MCII_1_1_50_1.pkl')
distances: list = []
for path in paths:
dset = loadData(path)
particleContainer: ParticleContainer = dset.particleContainer
get_particle_heterogeneity(dset)
# particleContainer: ParticleContainer = dset.particleContainer
# particleCenters: list = []
# for particle in particleContainer.particles:
# particleCenters.append([np.mean(particle.contour[:, 0, 0]), np.mean(particle.contour[:, 0, 1])])
......
......@@ -30,7 +30,7 @@ def get_random_topLefts(int numBoxes, double boxSize, double radius, double maxA
cdef int outerCounter, counter, x, y, i, j, diffX, diffY, successfullyAdded
cdef bint validSolutionFound, boxOverlaps
srand(42) # setting seed
srand(seed) # setting seed
assert RAND_MAX == 32767 # this value is used in the random-function above. For performance-reasons, it is directly typed in there as a number
maxDist = radius - np.sqrt((boxSize/2)**2 + (boxSize/2)**2)
outerCounter = 0
......
......@@ -5,7 +5,7 @@ sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset
from gepard.analysis.particleContainer import ParticleContainer
from cythonModules import rotateContour
from helpers import get_filterDimensions_from_dataset, get_center_from_filter_dimensions, convert_length_to_pixels
from helpers import get_filterDimensions_from_dataset, get_center_from_filter_dimensions, convert_length_to_pixels, timingDecorator
class ParticleVariations(object):
......@@ -15,6 +15,7 @@ class ParticleVariations(object):
self.origParticleContainer: ParticleContainer = self.dataset.particleContainer
self.numVariations = numVariations
@timingDecorator
def get_particleContainer_variations(self) -> ParticleContainer:
if self.numVariations > 0:
offset, diameter, [width, height] = get_filterDimensions_from_dataset(self.dataset)
......
......@@ -8,6 +8,7 @@ Created on Wed Jan 22 13:57:28 2020
# import pickle
import os
import numpy as np
import time
# import matplotlib.pyplot as plt
import concurrent.futures
import operator
......@@ -33,29 +34,36 @@ def get_methods_to_test(dataset: dataset.DataSet, fractions: list = [], maxTries
:return: list of measurement Objects that are applicable
"""
if len(fractions) == 0:
fractions: list = [0.02, 0.05, 0.1, 0.25, 0.5, 0.7, 0.9]
# fractions: list = [0.1, 0.3, 0.5]
# fractions: list = [0.02, 0.04, 0.06, 0.1, 0.2, 0.4, 0.5, 0.7, 0.9]
# fractions: list = [0.02, 0.06, 0.15, 0.2, 0.5]
fractions: list = [0.01, 0.1, 0.5, 0.9]
methods: list = []
particleContainer = dataset.particleContainer
for fraction in fractions:
methods.append(meth.RandomSampling(particleContainer, fraction))
methods.append(meth.SizeBinFractioning(particleContainer, fraction))
boxCreator: gmeth.BoxSelectionCreator = gmeth.BoxSelectionCreator(dataset)
methods += boxCreator.get_crossBoxSubsamplers_for_fraction(fraction)
methods += boxCreator.get_spiralBoxSubsamplers_for_fraction(fraction)
methods += boxCreator.get_randomBoxSubsamplers_for_fraction(fraction, maxTries=maxTries)
methods += boxCreator.get_randomQuarterBoxSubsamplers_for_fraction(fraction, maxTries=maxTries)
# methods.append(cmeth.TrainedSubsampling(particleContainer, fraction))
# methods.append(cmeth.ChemometricSubsampling(particleContainer, fraction))
# methods.append(meth.RandomSampling(particleContainer, fraction))
# methods.append(meth.SizeBinFractioning(particleContainer, fraction))
# boxCreator: gmeth.BoxSelectionCreator = gmeth.BoxSelectionCreator(dataset)
# methods += boxCreator.get_crossBoxSubsamplers_for_fraction(fraction)
# methods += boxCreator.get_spiralBoxSubsamplers_for_fraction(fraction)
# methods += boxCreator.get_randomBoxSubsamplers_for_fraction(fraction, maxTries=maxTries)
# methods += boxCreator.get_randomQuarterBoxSubsamplers_for_fraction(fraction, maxTries=maxTries)
for fakeScore in [0.6, 0.7, 0.8]:
trainedSampling = cmeth.TrainedSubsampling(particleContainer, fraction, fakeScore=fakeScore)
if trainedSampling.config_is_valid():
methods.append(trainedSampling)
else:
print('rejecting trained sample with fraction', fraction)
return methods
def update_sample(sample, force: bool, index: int):
sample.load_dataset()
t0 = time.time()
methods: list = get_methods_to_test(sample.dataset)
print('getting methods for sample', sample.dataset.name, 'took', round(time.time()-t0, 2), 'seconds')
sample.update_result_with_methods(methods, force)
return sample, index
......@@ -131,7 +139,7 @@ class TotalResults(object):
self.sampleResults[index] = updatedResult
print(f'done updating {updatedResult.dataset.name} at index {index}')
def get_error_vs_fraction_data(self, attributes: list = [], methods: list = []) -> dict:
def get_error_vs_fraction_data(self, attributes: list = [], methods: list = []) -> tuple:
"""
:param attributes: A list of attributes that should be used for filtering the samples. Only samples with an
attribute from within that list are considered.
......@@ -139,11 +147,18 @@ class TotalResults(object):
Value: {Dict: Key:Measured Fraction, Value: Tuple (averaged MPCountError, StDev MPCountError) over all samples}
"""
result: dict = {}
allParticles: list = []
numSamples: int = 0
for sample in self.sampleResults:
sample: SampleResult = sample
if attributes == [] or sample.has_any_attribute(attributes):
numSamples += 1
if sample.dataset is None:
sample.load_dataset()
for particle in sample.dataset.particleContainer.particles:
allParticles.append(particle)
for res in sample.results:
res: SubsamplingResult = res
method: meth.SubsamplingMethod = res.method
......@@ -160,6 +175,11 @@ class TotalResults(object):
else:
result[label][frac].append((error, stdev))
numMPParticles: float = get_number_of_MP_particles(allParticles)
stats: dict = {'numSamples': numSamples,
'meanParticleCount': round(len(allParticles) / numSamples),
'meanMPFrac': round(numMPParticles / len(allParticles) * 100, 1)}
for method in result.keys():
methodRes: dict = result[method]
for fraction in methodRes.keys():
......@@ -167,7 +187,7 @@ class TotalResults(object):
meanStd = np.mean([i[1] for i in methodRes[fraction]])
methodRes[fraction] = (meanError, meanStd)
return numSamples, result
return stats, result
class SubsamplingResult(object):
......@@ -289,8 +309,8 @@ class SampleResult(object):
self.load_dataset()
updatedMethods: list = []
t0 = time.time()
particleVariations: ParticleVariations = ParticleVariations(self.dataset, numVariations=self.numVariations)
needsToBeUpdated: dict = {method: False for method in methods}
for index, particleContainer in enumerate(particleVariations.get_particleContainer_variations()):
......@@ -310,12 +330,12 @@ class SampleResult(object):
needsToBeUpdated[method] = True
if needsToBeUpdated[method]:
t0 = time.time()
subParticles = method.apply_subsampling_method()
result.add_result(method.particleContainer.particles, subParticles)
if method not in updatedMethods:
updatedMethods.append(method)
# print(f'updated {self.sampleName} with {method.label} at fraction {method.fraction}, '
# f'iteration {index+1}')
updatedMethods.append(method)
print(f'updated {self.sampleName} with {method.label} at fraction {method.fraction}, '
f'iteration {index+1}, took {round(time.time()-t0, 2)}, seconds')
return updatedMethods
......
......@@ -263,6 +263,7 @@ class CrossBoxSubSampling(BoxSelectionSubsamplingMethod):
numBoxes: int = 2 * self.numBoxesAcross - 1
totalBoxArea: float = numBoxes * (maxBoxSize ** 2)
maxFraction: float = totalBoxArea / self.filterArea
return maxFraction
def equals(self, otherMethod) -> bool:
......@@ -396,7 +397,6 @@ class RandomBoxSampling(BoxSelectionSubsamplingMethod):
return equals
def get_topLeft_of_boxes(self) -> list:
valid, topLefts = randoms.get_random_topLefts(self.numBoxes, self.boxSize,
self.filterDiameter/2, self.__maxAngle,
seed=self.randomSeed, maxTries=self.maxTries)
......
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter, FixedLocator
import numpy as np
from evaluation import TotalResults, SampleResult
from evaluation import TotalResults, get_number_of_MP_particles
from chemometrics.imageOperations import get_particle_heterogeneity
from helpers import get_filterDimensions_from_dataset, get_center_from_filter_dimensions, convert_length_to_pixels
......@@ -13,7 +16,7 @@ def get_error_vs_frac_plot(totalResults: TotalResults, attributes: list = [], me
methods = [[]]*len(attributes)
assert len(attributes) == len(methods)
fig: Figure = plt.figure(figsize=(10, 5))
fig: Figure = plt.figure(figsize=(14, 7))
numRows: int = 1
numCols: int = 1
if len(attributes) == 0:
......@@ -27,31 +30,44 @@ def get_error_vs_frac_plot(totalResults: TotalResults, attributes: list = [], me
index = 0
for attrs, meths in zip(attributes, methods):
ax = fig.add_subplot(numRows, numCols, index + 1)
numSamples, errorPerFraction = totalResults.get_error_vs_fraction_data(attributes=attrs, methods=meths)
stats, errorPerFraction = totalResults.get_error_vs_fraction_data(attributes=attrs, methods=meths)
for methodLabel in errorPerFraction.keys():
errorDict: dict = errorPerFraction[methodLabel]
fractions: list = list(errorDict.keys())
errors: np.ndarray = np.array([errorDict[fraction][0] for fraction in fractions])
stdevs: np.ndarray = np.array([errorDict[fraction][1] for fraction in fractions])
fractions = [i*100 for i in fractions]
alphascale = 0.3 if methodLabel.find('Random Subsampling') != -1 else 1.0
print(methodLabel, errors)
if not standarddevs:
ax.plot(fractions, errors, label=methodLabel, marker='s')
ax.plot(fractions, errors, label=methodLabel, marker='s', alpha=alphascale)
else:
line = ax.errorbar(fractions, errors, stdevs, label=methodLabel, marker='s', capsize=5)
line = ax.errorbar(fractions, errors, stdevs, label=methodLabel, marker='s', capsize=5, alpha=alphascale)
if fill:
color = line[0].get_color()
ax.fill_between(fractions, errors-stdevs, errors+stdevs, alpha=0.2, facecolor=color)
ax.fill_between(fractions, errors-stdevs, errors+stdevs, alpha=0.2*alphascale, facecolor=color)
numSamples = stats['numSamples']
meanParticleCount = stats['meanParticleCount']
meanMPFrac = stats['meanMPFrac']
title: str = ''
if len(attrs) > 0:
title = ', '.join(attr for attr in attrs)
title += f' ({numSamples} samples)'
title += f'\n({numSamples} samples, avg. {meanParticleCount} particles, {meanMPFrac} % MP)'
ax.set_title(title, fontSize=15)
ax.set_title(title, fontSize=13)
ax.set_xscale('log')
ax.set_xlabel('measured fraction', fontsize=12)
ax.xaxis.set_major_formatter(ScalarFormatter())
# ax.xaxis.set_major_locator(FixedLocator([0.02, 0.05, 0.1, 0.2, 0.5, 1.0]))
ax.xaxis.set_major_locator(FixedLocator([2, 5, 10, 20, 50, 100]))
ax.set_xlabel('measured fraction (%)', fontsize=12)
ax.set_ylabel('mpCountError (%)', fontsize=12)
minX, maxX = 0.9 * min(fractions), 1.05
minX, maxX = 0.9 * min(fractions), 105
ax.hlines([20, 40, 60, 80], minX, maxX, colors='gray', alpha=0.5)
ax.set_xlim([minX, maxX])
ax.set_ylim([0, 100])
......@@ -60,12 +76,11 @@ def get_error_vs_frac_plot(totalResults: TotalResults, attributes: list = [], me
index += 1
fig.tight_layout()
return fig
def get_distance_hist_plots(totalResults: TotalResults, attributes: list = []) -> Figure:
fig: Figure = plt.figure(figsize=(10, 5))
fig: Figure = plt.figure(figsize=(14, 7))
numRows: int = 1
numCols: int = 1
if len(attributes) == 0:
......@@ -78,13 +93,20 @@ def get_distance_hist_plots(totalResults: TotalResults, attributes: list = []) -