Commit c509aa9c authored by Josef Brandt's avatar Josef Brandt

Filter results for sample attributes and/or methods

parent 5e54cb68
......@@ -4,3 +4,5 @@
__pycache__/
*.png
*.res
......@@ -9,6 +9,7 @@ import pickle
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
from helpers import ParticleBinSorter
import methods as meth
......@@ -60,25 +61,31 @@ class TotalResults(object):
sample.update_result_with_method(curMethod)
print(f'processed {index+1} of {len(self.sampleResults)} samples')
def get_error_vs_fraction_data(self) -> dict:
def get_error_vs_fraction_data(self, attributes: list = [], methods: list = []) -> dict:
"""
Returns Dict: Key: Method Label, Value: (Dict: Key:Measured Fraction, Value: averaged MPCountError over all samples)
:param attributes: A list of attributes that should be used for filtering the samples. Only samples with an
attribute from within that list are considered.
:return:
"""
result: dict = {}
for sample in self.sampleResults:
for res in sample.results:
res: SubsamplingResult = res
label: str = res.method.label
frac: float = res.method.fraction
error: float = res.mpCountError
if label not in result.keys():
result[label] = {frac: [error]}
elif frac not in result[label].keys():
result[label][frac] = [error]
else:
result[label][frac].append(error)
sample: SampleResult = sample
if attributes == [] or sample.has_any_attribute(attributes):
for res in sample.results:
res: SubsamplingResult = res
method: meth.SubsamplingMethod = res.method
if methods == [] or method.matches_any_pattern(methods):
label: str = method.label
frac: float = method.fraction
error: float = res.mpCountError
if label not in result.keys():
result[label] = {frac: [error]}
elif frac not in result[label].keys():
result[label][frac] = [error]
else:
result[label][frac].append(error)
for method in result.keys():
methodRes: dict = result[method]
......@@ -151,8 +158,17 @@ class SampleResult(object):
self.attributes.append(newAttribute)
print(f'sample {self.filepath} has now attribute {newAttribute}')
def has_any_attribute(self, listOfAttributes: list) -> bool:
hasAttr: bool = False
for attr in listOfAttributes:
if self.has_attribute(attr):
hasAttr = True
break
return hasAttr
def has_attribute(self, attribute: str) -> bool:
return attribute in self.attributes
attributes: list = [attr.lower() for attr in self.attributes]
return attribute.lower() in attributes
def _remove_result_of_method(self, method: meth.SubsamplingMethod) -> None:
"""
......
......@@ -41,6 +41,30 @@ class SubsamplingMethod(object):
"""
raise NotImplementedError
def matches_any_pattern(self, patternList: list) -> bool:
"""
Tests. wether one of the given patterns is matching.
:param patternList:
:return:
"""
matches: bool = False
for pattern in patternList:
if self.matches_pattern(pattern):
matches = True
break
return matches
def matches_pattern(self, pattern: str) -> bool:
"""
Tests, wether the method matches a given pattern. Strings of at least 4 characters are required!
:param pattern: The string to test against
:return matchesThePattern:
"""
matches: bool = False
if len(pattern) > 3 and not pattern == 'layout':
matches = (self.label.lower().find(pattern.lower()) != -1)
return matches
class RandomSampling(SubsamplingMethod):
@property
......@@ -67,7 +91,7 @@ class SizeBinFractioning(SubsamplingMethod):
@property
def label(self) -> str:
return 'SizeBin Random Subsampling'
return 'SizeBin Subsampling'
def apply_subsampling_method(self) -> list:
subParticlesPerBin: list = self._get_subParticles_per_bin(self.particleContainer.particles)
......
......@@ -2,7 +2,7 @@ import matplotlib.pyplot as plt
import time
from evaluation import TotalResults, SampleResult
from input_output import get_pkls_from_directory, get_attributes_from_foldername
from input_output import get_pkls_from_directory, get_attributes_from_foldername, save_results, load_results
"""
IMPORTANT!!!
......@@ -10,29 +10,33 @@ SET GEPARD TO EVALUATION BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF TH
"""
results: TotalResults = TotalResults()
pklsInFolders = get_pkls_from_directory(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets')
for folder in pklsInFolders.keys():
for samplePath in pklsInFolders[folder]:
newSampleResult: SampleResult = results.add_sample(samplePath)
for attr in get_attributes_from_foldername(folder):
newSampleResult.set_attribute(attr)
t0 = time.time()
results.update_all()
print('updating all took', time.time()-t0, 'seconds')
errorPerFraction: dict = results.get_error_vs_fraction_data()
# results: TotalResults = TotalResults()
# pklsInFolders = get_pkls_from_directory(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets')
#
# for folder in pklsInFolders.keys():
# for samplePath in pklsInFolders[folder]:
# newSampleResult: SampleResult = results.add_sample(samplePath)
# for attr in get_attributes_from_foldername(folder):
# newSampleResult.set_attribute(attr)
#
# t0 = time.time()
# results.update_all()
# print('updating all took', time.time()-t0, 'seconds')
#
# save_results('results1.res', results)
results: TotalResults = load_results('results1.res')
errorPerFraction: dict = results.get_error_vs_fraction_data(methods=['spiral', 'cross'])
plt.clf()
for methodLabel in errorPerFraction.keys():
fractions: list = list(errorPerFraction[methodLabel].keys())
errors: list = list(errorPerFraction[methodLabel].values())
plt.plot(fractions, errors, label=methodLabel)
plt.title('Spiral or Box Layouts')
plt.xscale('log')
plt.xlabel('measured fraction')
plt.ylabel('mpCountError')
plt.legend()
plt.show()
print('done')
\ No newline at end of file
......@@ -95,7 +95,9 @@ class TestTotalResults(unittest.TestCase):
def test_get_error_vs_fraction_data(self):
firstSample: SampleResult = self.totalResults.add_sample('sample1.pkl')
firstSample.set_attribute('to be used')
secondSample: SampleResult = self.totalResults.add_sample('sample2.pkl')
secondSample.set_attribute('not to be used')
firstMethod: meth.RandomSampling = meth.RandomSampling(None, 0.1)
firstResult: SubsamplingResult = SubsamplingResult(firstMethod)
......@@ -141,9 +143,30 @@ class TestTotalResults(unittest.TestCase):
self.assertEqual(list(res.keys()), [0.1, 0.2])
self.assertAlmostEqual(res[0.1], 0.6) # i.e., mean([0.4, 0.8])
self.assertAlmostEqual(res[0.2], 0.5)
# if i == 3:
# self.assertEqual(list(res.keys()), [0.1, 0.2])
# self.assertAlmostEqual(res[0.1], )
filteredResultDict: dict = self.totalResults.get_error_vs_fraction_data(attributes=['to be used'])
self.assertEqual(list(filteredResultDict.keys()), [firstMethod.label, secondMethod.label, thirdMethod.label])
for i in range(3):
res: dict = list(filteredResultDict.values())[i]
if i == 0:
self.assertEqual(list(res.keys()), [0.1])
self.assertAlmostEqual(res[0.1], 0.8)
if i == 1:
self.assertEqual(list(res.keys()), [0.1])
self.assertAlmostEqual(res[0.1], 0.6)
if i == 2:
self.assertEqual(list(res.keys()), [0.1, 0.2])
self.assertAlmostEqual(res[0.1], 0.4) # only the result from the first sample is used, as filtered..
self.assertAlmostEqual(res[0.2], 0.5)
filteredResultDict: dict = self.totalResults.get_error_vs_fraction_data(methods=['cross'])
self.assertEqual(list(filteredResultDict.keys()), [secondMethod.label, thirdMethod.label])
filteredResultDict: dict = self.totalResults.get_error_vs_fraction_data(methods=['Cross'])
self.assertEqual(list(filteredResultDict.keys()), [secondMethod.label, thirdMethod.label])
filteredResultDict: dict = self.totalResults.get_error_vs_fraction_data(methods=['random'])
self.assertEqual(list(filteredResultDict.keys()), [firstMethod.label])
class TestSampleResult(unittest.TestCase):
......@@ -212,6 +235,8 @@ class TestSampleResult(unittest.TestCase):
def test_attributes(self):
self.sampleResult.set_attribute('soil')
self.assertTrue(self.sampleResult.has_attribute('soil'))
self.assertTrue(self.sampleResult.has_attribute('Soil')) # we want to be case insensitive
self.assertTrue(self.sampleResult.has_attribute('SOIL'))
self.sampleResult.set_attribute('soil') # the attribute is already there and shall not be added again
self.assertEqual(len(self.sampleResult.attributes), 1)
......@@ -220,6 +245,13 @@ class TestSampleResult(unittest.TestCase):
self.assertEqual(len(self.sampleResult.attributes), 2)
self.assertTrue(self.sampleResult.has_attribute('10µmFilter'))
self.assertTrue(self.sampleResult.has_any_attribute(['soil', 'water']))
self.assertTrue(self.sampleResult.has_any_attribute(['soil', 'water', '10µmFilter']))
self.assertTrue(self.sampleResult.has_any_attribute(['water', '10µmFilter']))
self.assertFalse(self.sampleResult.has_any_attribute(['water', 'sediment']))
self.assertFalse(self.sampleResult.has_any_attribute(['beach']))
class TestSubsamplingResult(unittest.TestCase):
def setUp(self):
self.subsamplingResult: SubsamplingResult = SubsamplingResult(meth.RandomSampling(None, 0.1))
......
......@@ -67,7 +67,7 @@ class TestSizeBinFractioning(unittest.TestCase):
class TestMethodEquality(unittest.TestCase):
def test_methodEquality(self):
def test_methodEquality_and_patterns(self):
method1_1: RandomSampling = RandomSampling(None, 0.1)
method1_2: RandomSampling = RandomSampling(None, 0.2)
......@@ -103,3 +103,45 @@ class TestMethodEquality(unittest.TestCase):
self.assertTrue(method1.equals(method2))
else:
self.assertFalse(method1.equals(method2))
randomPatterns: list = ['random', 'ranDOm']
sizeBinPatterns: list = ['size', 'Size', 'sizeBin']
crossBoxPatterns: list = ['cross', 'crossLayout']
spiralBoxPatterns: list = ['spiral', 'spiralLayout']
antiPatterns: list = ['bin', 'box', 'crossBox', 'layout'] # pattern 'layout' is ambiguous...
for randomMeth in [method1_1, method1_2]:
for pos in randomPatterns:
self.assertTrue(randomMeth.matches_pattern(pos))
negPatterns = sizeBinPatterns + crossBoxPatterns + spiralBoxPatterns + antiPatterns
for neg in negPatterns:
self.assertFalse(randomMeth.matches_pattern(neg))
self.assertTrue(randomMeth.matches_any_pattern(randomPatterns + negPatterns))
self.assertFalse(randomMeth.matches_any_pattern(negPatterns))
for sizeBinMeth in [method2_1, method2_2]:
for pos in sizeBinPatterns:
self.assertTrue(sizeBinMeth.matches_pattern(pos))
negPatterns = randomPatterns + crossBoxPatterns + spiralBoxPatterns + antiPatterns
for neg in negPatterns:
self.assertFalse(sizeBinMeth.matches_pattern(neg))
self.assertTrue(sizeBinMeth.matches_any_pattern(sizeBinPatterns + negPatterns))
self.assertFalse(sizeBinMeth.matches_any_pattern(negPatterns))
for crossBoxMethod in [method3_1_1, method3_1_2, method3_2_1, method3_2_2]:
for pos in crossBoxPatterns:
self.assertTrue(crossBoxMethod.matches_pattern(pos))
negPatterns = randomPatterns + sizeBinPatterns + spiralBoxPatterns + antiPatterns
for neg in negPatterns:
self.assertFalse(crossBoxMethod.matches_pattern(neg))
self.assertTrue(crossBoxMethod.matches_any_pattern(crossBoxPatterns + negPatterns))
self.assertFalse(crossBoxMethod.matches_any_pattern(negPatterns))
for spiralBoxMethod in [method4_1_1, method4_1_2, method4_2_1, method4_2_2]:
for pos in spiralBoxPatterns:
self.assertTrue(spiralBoxMethod.matches_pattern(pos))
negPatterns = randomPatterns + sizeBinPatterns + crossBoxPatterns + antiPatterns
for neg in negPatterns:
self.assertFalse(spiralBoxMethod.matches_pattern(neg))
self.assertTrue(spiralBoxMethod.matches_any_pattern(spiralBoxPatterns + negPatterns))
self.assertFalse(spiralBoxMethod.matches_any_pattern(negPatterns))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment