Commit e2b9a501 authored by Josef Brandt's avatar Josef Brandt

Included StandardDeviation for MPCountError, several fixes

parent a9e96ce0
......@@ -183,7 +183,7 @@ class ChemometricSubsampling(SubsamplingMethod):
if not abs(totalPointsAdded - numPointsToSelect) <= 1:
print('error')
# assert abs(totalPointsAdded - numPointsToSelect) <= 1
assert abs(totalPointsAdded - numPointsToSelect) <= 1
for clusterIndex in pointsPerCluster.keys():
assert 0 <= pointsPerCluster[clusterIndex] <= len(labels[labels == clusterIndex])
return pointsPerCluster
......
......@@ -20,10 +20,7 @@ class ParticleVariations(object):
offset, diameter, [width, height] = get_filterDimensions_from_dataset(self.dataset)
center: np.ndarray = get_center_from_filter_dimensions(offset, diameter)
partContainer: ParticleContainer = self.origParticleContainer
contours: list = partContainer.getParticleContours()
# center: tuple = round(np.mean(contours[:][0][0])),\
# round(np.mean(contours[:][0][1]))
# center: np.ndarray = np.array(center, dtype=np.int32)
# contours: list = partContainer.getParticleContours()
angles = self._get_angles()
for i in range(self.numVariations):
if i > 0:
......
......@@ -10,7 +10,6 @@ import os
import numpy as np
import matplotlib.pyplot as plt
import concurrent.futures
# from multiprocessing import Pool, Process, Event, Queue
import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset
......@@ -32,7 +31,7 @@ def get_methods_to_test(dataset: dataset.DataSet, fractions: list = []) -> list:
:return: list of measurement Objects that are applicable
"""
if len(fractions) == 0:
fractions: list = [0.05, 0.1, 0.3]
fractions: list = [0.02, 0.05, 0.1, 0.2, 0.3, 0.5, 0.7, 0.9]
methods: list = []
particleContainer = dataset.particleContainer
......@@ -82,43 +81,27 @@ class TotalResults(object):
:param force: Wether to force an update of an already existing method.
:return:
"""
print('about to update all')
#############################################################
# p = Pool(processes=len(forceList))
# results = p.map(update_sample, indices)
# p.close()
# print(results)
#############################################################
forceList: list = [force]*len(self.sampleResults)
indices: list = list(np.arange(len(self.sampleResults)))
###########################################################
numSamples: int = len(forceList)
numWorkers: int = 4 # in case of quadcore processor that seams reasonable??
chunksize: int = numSamples // numWorkers
with concurrent.futures.ProcessPoolExecutor() as executor:
results = executor.map(update_sample, self.sampleResults, forceList, indices)
# results = executor.map(update_sample, indices)
results = executor.map(update_sample, self.sampleResults, forceList, indices, chunksize=chunksize)
for index, res in enumerate(results):
updatedSample, processid = res
print(f'returned from process {processid}, iteration index {index}')
self.sampleResults[index] = updatedSample
# for index, sample in enumerate(self.sampleResults):
# sample.load_dataset()
# possibleMethods: list = []
# for fraction in self.measuredFractions:
# for method in self._get_methods_for_fraction(sample.dataset, fraction):
# possibleMethods.append(method)
#
# sample.update_result_with_methods(possibleMethods, force=force)
# print(f'processed {index+1} of {len(self.sampleResults)} samples')
def get_error_vs_fraction_data(self, attributes: list = [], methods: list = []) -> dict:
"""
Returns Dict: Key: Method Label, Value: (Dict: Key:Measured Fraction, Value: averaged MPCountError over all samples)
:param attributes: A list of attributes that should be used for filtering the samples. Only samples with an
attribute from within that list are considered.
:return:
:return: Dict: Key: Method Label,
Value: {Dict: Key:Measured Fraction, Value: Tuple (averaged MPCountError, StDev MPCountError) over all samples}
"""
result: dict = {}
for sample in self.sampleResults:
......@@ -131,18 +114,21 @@ class TotalResults(object):
label: str = method.label
frac: float = method.fraction
error: float = res.mpCountError
stdev: float = res.mpCountErrorStDev
if label not in result.keys():
result[label] = {frac: [error]}
result[label] = {frac: [(error, stdev)]}
elif frac not in result[label].keys():
result[label][frac] = [error]
result[label][frac] = [(error, stdev)]
else:
result[label][frac].append(error)
result[label][frac].append((error, stdev))
for method in result.keys():
methodRes: dict = result[method]
for fraction in methodRes.keys():
methodRes[fraction] = np.mean(methodRes[fraction])
meanError = np.mean([i[0] for i in methodRes[fraction]])
meanStd = np.mean([i[1] for i in methodRes[fraction]])
methodRes[fraction] = (meanError, meanStd)
return result
......@@ -190,7 +176,12 @@ class SubsamplingResult(object):
:param subParticles:
:return:
"""
self.mpCountErrors.append(self._get_mp_count_error(origParticles, subParticles, self.method.fraction))
# if type(self.method) == cmeth.ChemometricSubsampling:
# print('chemometric subsamling found')
# error: float = self._get_mp_count_error(origParticles, subParticles, 1.0)
# else:
error: float = self._get_mp_count_error(origParticles, subParticles, self.method.fraction)
self.mpCountErrors.append(error)
def _get_mp_count_error_per_bin(self, allParticles: list, subParticles: list, fractionMeasured: float) -> tuple:
binSorter = ParticleBinSorter()
......@@ -234,7 +225,7 @@ class SampleResult(object):
"""
An object the actually stores all generated results per sample and can update and report on them.
"""
def __init__(self, filepath: str, numVariations: int = 3):
def __init__(self, filepath: str, numVariations: int = 10):
super(SampleResult, self).__init__()
self.filepath: str = filepath
self.dataset: dataset.DataSet = None
......
import os
import pickle
from evaluation import TotalResults
from helpers import timingDecorator
def load_results(fname: str) -> TotalResults:
# TODO: REMVOE DATASET FROM SAMPLERESULTS, OTHERWISE THE FILESIZE IS GOING TO BE HUGE
res: TotalResults = None
if os.path.exists(fname):
with open(fname, "rb") as fp:
res = pickle.load(fp)
return res
return None
def save_results(fname: str, result: TotalResults) -> None:
storedDsets: dict = {}
for sampleRes in result.sampleResults:
storedDsets[sampleRes.sampleName] = sampleRes.dataset
sampleRes.dataset = None
with open(fname, "wb") as fp:
pickle.dump(result, fp, protocol=-1)
for sampleRes in result.sampleResults:
sampleRes.dataset = storedDsets[sampleRes.sampleName]
def get_pkls_from_directory(dirPath: str) -> dict:
"""
......
......@@ -10,34 +10,34 @@ SET GEPARD TO EVALUATION BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF TH
"""
if __name__ == '__main__':
# results: TotalResults = TotalResults()
# pklsInFolders = get_pkls_from_directory(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets')
#
# for folder in pklsInFolders.keys():
# for samplePath in pklsInFolders[folder]:
# newSampleResult: SampleResult = results.add_sample(samplePath)
# for attr in get_attributes_from_foldername(folder):
# newSampleResult.set_attribute(attr)
#
# t0 = time.time()
# results.update_all()
# print('updating all took', time.time()-t0, 'seconds')
results: TotalResults = TotalResults()
pklsInFolders = get_pkls_from_directory(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets')
# save_results('results1.res', results)
results: TotalResults = load_results('results1.res')
# results.update_all(force=True)
for folder in pklsInFolders.keys():
for samplePath in pklsInFolders[folder]:
newSampleResult: SampleResult = results.add_sample(samplePath)
for attr in get_attributes_from_foldername(folder):
newSampleResult.set_attribute(attr)
t0 = time.time()
results.update_all()
print('updating all took', time.time()-t0, 'seconds')
save_results('results1.res', results)
# results: TotalResults = load_results('results1.res')
# save_results('results1.res', results)
plt.clf()
errorPerFraction: dict = results.get_error_vs_fraction_data(attributes=['air', 'water'],
methods=[])
methods=['random', 'sizeBin', 'chemo'])
plt.subplot(121)
for methodLabel in errorPerFraction.keys():
fractions: list = list(errorPerFraction[methodLabel].keys())
errors: list = list(errorPerFraction[methodLabel].values())
plt.plot(fractions, errors)
plt.scatter(fractions, errors, label=methodLabel)
errorDict: dict = errorPerFraction[methodLabel]
fractions: list = list(errorDict.keys())
errors: list = [errorDict[fraction][0] for fraction in fractions]
stdevs: list = [errorDict[fraction][1] for fraction in fractions]
plt.errorbar(fractions, errors, stdevs, label=methodLabel, marker='s', capsize=5)
plt.title('Air/Water sample', fontSize=15)
plt.xscale('log')
......@@ -47,13 +47,14 @@ if __name__ == '__main__':
plt.legend()
errorPerFraction: dict = results.get_error_vs_fraction_data(attributes=['sediment', 'soil', 'beach', 'slush'],
methods=[])
methods=['random', 'sizeBin', 'chemo'])
plt.subplot(122)
for methodLabel in errorPerFraction.keys():
fractions: list = list(errorPerFraction[methodLabel].keys())
errors: list = list(errorPerFraction[methodLabel].values())
plt.plot(fractions, errors)
plt.scatter(fractions, errors, label=methodLabel)
errorDict: dict = errorPerFraction[methodLabel]
fractions: list = list(errorDict.keys())
errors: list = [errorDict[fraction][0] for fraction in fractions]
stdevs: list = [errorDict[fraction][1] for fraction in fractions]
plt.errorbar(fractions, errors, stdevs, label=methodLabel, marker='s', capsize=5)
plt.title('Sediment/Beach/Slush sample', fontSize=15)
plt.xscale('log')
......
......@@ -42,66 +42,6 @@ class TestTotalResults(unittest.TestCase):
self.assertEqual(len(self.totalResults.sampleResults), 2)
self.assertTrue(newResult is None)
# def test_get_methods_for_fraction(self):
# def containsMethod(listOfMethods: list, template: meth.SubsamplingMethod) -> bool:
# contains: bool = False
# for method in listOfMethods:
# if type(method) == type(template) and method.fraction == template.fraction:
# contains = True
# break
# return contains
#
# dset: gepard.dataset.DataSet = gepard.dataset.DataSet('fakepath')
#
# imgdim = 10
# dset.imagescanMode = 'df'
# dset.imagedim_df = [imgdim, imgdim]
# dset.pixelscale_df = 1.0
# minX, maxX, minY, maxY = 0, 1000, 0, 1000
# dset.maxdim = minX + imgdim / 2, maxY - imgdim / 2, maxX - imgdim / 2, minY + imgdim / 2
#
# desiredFraction = 0.1
# methods = self.totalResults._get_methods_for_fraction(dset, desiredFraction)
# possibleRandomMethods = 2
# possibleCrossBoxMethods = 2
# possibleSpiralBoxMethods = 3
# possibleChemometricMethods = 1
# totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
# possibleSpiralBoxMethods + possibleChemometricMethods
# self.assertEqual(len(methods), totalPossible)
# self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
# self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
# self.assertTrue(containsMethod(methods, gmeth.CrossBoxSubSampling(dset, desiredFraction)))
# self.assertTrue(containsMethod(methods, gmeth.SpiralBoxSubsampling(dset, desiredFraction)))
#
# desiredFraction = 0.5
# methods = self.totalResults._get_methods_for_fraction(dset, desiredFraction)
# possibleRandomMethods = 2
# possibleCrossBoxMethods = 1
# possibleSpiralBoxMethods = 0
# possibleChemometricMethods = 1
# totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
# possibleSpiralBoxMethods + possibleChemometricMethods
# self.assertEqual(len(methods), totalPossible)
# self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
# self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
# self.assertTrue(containsMethod(methods, gmeth.CrossBoxSubSampling(dset, desiredFraction)))
# self.assertFalse(containsMethod(methods, gmeth.SpiralBoxSubsampling(dset, desiredFraction)))
#
# desiredFraction = 0.9
# methods = self.totalResults._get_methods_for_fraction(dset, desiredFraction)
# possibleRandomMethods = 2
# possibleCrossBoxMethods = 0
# possibleSpiralBoxMethods = 0
# possibleChemometricMethods = 1
# totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
# possibleSpiralBoxMethods + possibleChemometricMethods
# self.assertEqual(len(methods), totalPossible)
# self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
# self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
# self.assertFalse(containsMethod(methods, gmeth.CrossBoxSubSampling(dset, desiredFraction)))
# self.assertFalse(containsMethod(methods, gmeth.SpiralBoxSubsampling(dset, desiredFraction)))
def test_get_error_vs_fraction_data(self):
firstSample: SampleResult = self.totalResults.add_sample('sample1.pkl')
firstSample.set_attribute('to be used')
......@@ -110,63 +50,84 @@ class TestTotalResults(unittest.TestCase):
firstMethod: meth.RandomSampling = meth.RandomSampling(None, 0.1)
firstResult: SubsamplingResult = SubsamplingResult(firstMethod)
firstResult.mpCountErrors = [80]
firstResult.mpCountErrors = [70, 90] # mean = 80, stdev = 10
secondMethod: gmeth.CrossBoxSubSampling = gmeth.CrossBoxSubSampling(None, 0.1)
secondMethod.numBoxesAcross = 3
secondResult: SubsamplingResult = SubsamplingResult(secondMethod)
secondResult.mpCountErrors = [60]
secondResult.mpCountErrors = [50, 70] # mean = 60, stdev = 10
thirdMethod: gmeth.CrossBoxSubSampling = gmeth.CrossBoxSubSampling(None, 0.1)
thirdMethod.numBoxesAcross = 5
self.assertEqual(thirdMethod.fraction, 0.1)
thirdResult: SubsamplingResult = SubsamplingResult(thirdMethod)
thirdResult.mpCountErrors = [40]
thirdResult.mpCountErrors = [30, 50] # mean = 40, stdev = 10
thirdMethod2: gmeth.CrossBoxSubSampling = gmeth.CrossBoxSubSampling(None, 0.1)
thirdMethod2.numBoxesAcross = 5
self.assertEqual(thirdMethod2.fraction, 0.1)
thirdResult2: SubsamplingResult = SubsamplingResult(thirdMethod)
thirdResult2.mpCountErrors = [80]
thirdResult2.mpCountErrors = [60, 100] # mean = 80, stdev = 20
thirdMethod3: gmeth.CrossBoxSubSampling = gmeth.CrossBoxSubSampling(None, 0.2)
thirdMethod3.numBoxesAcross = 5
self.assertEqual(thirdMethod3.fraction, 0.2)
thirdResult3: SubsamplingResult = SubsamplingResult(thirdMethod3)
thirdResult3.mpCountErrors = [50]
thirdResult3.mpCountErrors = [30, 50, 70] # mean = 50, stdev = 16.32993161855452
firstSample.results = [firstResult, secondResult, thirdResult, thirdResult3]
secondSample.results = [firstResult, secondResult, thirdResult2, thirdResult3]
resultDict: dict = self.totalResults.get_error_vs_fraction_data()
self.assertEqual(list(resultDict.keys()), [firstMethod.label, secondMethod.label, thirdMethod.label])
for i in range(3):
res: dict = list(resultDict.values())[i]
for i, key in enumerate(resultDict.keys()):
res: dict = resultDict[key]
if i == 0:
self.assertEqual(list(res.keys()), [0.1])
self.assertAlmostEqual(res[0.1], 80)
mean, stdev = res[0.1]
self.assertAlmostEqual(mean, 80)
self.assertAlmostEqual(stdev, 10)
if i == 1:
self.assertEqual(list(res.keys()), [0.1])
self.assertAlmostEqual(res[0.1], 60)
mean, stdev = res[0.1]
self.assertAlmostEqual(mean, 60)
self.assertAlmostEqual(stdev, 10)
if i == 2:
self.assertEqual(list(res.keys()), [0.1, 0.2])
self.assertAlmostEqual(res[0.1], 60) # i.e., mean([40, 808])
self.assertAlmostEqual(res[0.2], 50)
mean, stdev = res[0.1]
self.assertAlmostEqual(mean, 60) # i.e., mean([40, 80])
self.assertAlmostEqual(stdev, 15) # i.e., mean([10, 20])
mean, stdev = res[0.2]
self.assertAlmostEqual(mean, 50)
self.assertAlmostEqual(stdev, 16.32993161855452)
filteredResultDict: dict = self.totalResults.get_error_vs_fraction_data(attributes=['to be used'])
self.assertEqual(list(filteredResultDict.keys()), [firstMethod.label, secondMethod.label, thirdMethod.label])
for i in range(3):
res: dict = list(filteredResultDict.values())[i]
for i, key in enumerate(filteredResultDict.keys()):
res: dict = filteredResultDict[key]
if i == 0:
self.assertEqual(key, firstMethod.label)
self.assertEqual(list(res.keys()), [0.1])
self.assertAlmostEqual(res[0.1], 80)
mean, stdev = res[0.1]
self.assertAlmostEqual(mean, 80)
self.assertAlmostEqual(stdev, 10)
if i == 1:
self.assertEqual(key, secondMethod.label)
self.assertEqual(list(res.keys()), [0.1])
self.assertAlmostEqual(res[0.1], 60)
mean, stdev = res[0.1]
self.assertAlmostEqual(mean, 60)
self.assertAlmostEqual(stdev, 10)
if i == 2:
self.assertEqual(key, thirdMethod.label)
self.assertEqual(list(res.keys()), [0.1, 0.2])
self.assertAlmostEqual(res[0.1], 40) # only the result from the first sample is used, as filtered..
self.assertAlmostEqual(res[0.2], 50)
mean, stdev = res[0.1]
self.assertAlmostEqual(mean, 40) # only the result from the first sample is used, as filtered..
self.assertAlmostEqual(stdev, 10)
mean, stdev = res[0.2]
self.assertAlmostEqual(mean, 50)
self.assertAlmostEqual(stdev, 16.32993161855452)
filteredResultDict: dict = self.totalResults.get_error_vs_fraction_data(methods=['cross'])
self.assertEqual(list(filteredResultDict.keys()), [secondMethod.label, thirdMethod.label])
......
......@@ -2,7 +2,8 @@ import unittest
import os
import shutil
from input_output import get_pkls_from_directory, get_attributes_from_foldername, load_results, save_results
from evaluation import TotalResults
from evaluation import TotalResults, SampleResult
from helpers_for_test import get_default_ParticleContainer, get_default_DataSet
class TestIO(unittest.TestCase):
......@@ -33,12 +34,24 @@ class TestIO(unittest.TestCase):
def test_load_save(self):
newRes: TotalResults = TotalResults()
for _ in range(10):
dset = get_default_DataSet()
dset.particleContainer = get_default_ParticleContainer()
sampleRes: SampleResult = SampleResult('fakepath')
sampleRes.dataset = dset
newRes.sampleResults.append(sampleRes)
fname: str = os.path.join(self.path, 'test.res')
save_results(fname, newRes)
self.assertTrue(os.path.exists(fname))
loadedRes: TotalResults = load_results(fname)
self.assertTrue(loadedRes is not None)
self.assertEqual(type(loadedRes), TotalResults)
for savedSampleRes, loadedSampleRes in zip(newRes.sampleResults, loadedRes.sampleResults):
self.assertTrue(savedSampleRes.dataset is not None)
self.assertTrue(loadedSampleRes.dataset is None)
os.remove(fname)
def test_read_pkls_from_dir(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment