Commit c509aa9c authored by Josef Brandt's avatar Josef Brandt

Filter results for sample attributes and/or methods

parent 5e54cb68
...@@ -4,3 +4,5 @@ ...@@ -4,3 +4,5 @@
__pycache__/ __pycache__/
*.png *.png
*.res
...@@ -9,6 +9,7 @@ import pickle ...@@ -9,6 +9,7 @@ import pickle
import sys import sys
import os import os
import numpy as np import numpy as np
import matplotlib.pyplot as plt
from helpers import ParticleBinSorter from helpers import ParticleBinSorter
import methods as meth import methods as meth
...@@ -60,17 +61,23 @@ class TotalResults(object): ...@@ -60,17 +61,23 @@ class TotalResults(object):
sample.update_result_with_method(curMethod) sample.update_result_with_method(curMethod)
print(f'processed {index+1} of {len(self.sampleResults)} samples') print(f'processed {index+1} of {len(self.sampleResults)} samples')
def get_error_vs_fraction_data(self) -> dict: def get_error_vs_fraction_data(self, attributes: list = [], methods: list = []) -> dict:
""" """
Returns Dict: Key: Method Label, Value: (Dict: Key:Measured Fraction, Value: averaged MPCountError over all samples) Returns Dict: Key: Method Label, Value: (Dict: Key:Measured Fraction, Value: averaged MPCountError over all samples)
:param attributes: A list of attributes that should be used for filtering the samples. Only samples with an
attribute from within that list are considered.
:return: :return:
""" """
result: dict = {} result: dict = {}
for sample in self.sampleResults: for sample in self.sampleResults:
sample: SampleResult = sample
if attributes == [] or sample.has_any_attribute(attributes):
for res in sample.results: for res in sample.results:
res: SubsamplingResult = res res: SubsamplingResult = res
label: str = res.method.label method: meth.SubsamplingMethod = res.method
frac: float = res.method.fraction if methods == [] or method.matches_any_pattern(methods):
label: str = method.label
frac: float = method.fraction
error: float = res.mpCountError error: float = res.mpCountError
if label not in result.keys(): if label not in result.keys():
...@@ -151,8 +158,17 @@ class SampleResult(object): ...@@ -151,8 +158,17 @@ class SampleResult(object):
self.attributes.append(newAttribute) self.attributes.append(newAttribute)
print(f'sample {self.filepath} has now attribute {newAttribute}') print(f'sample {self.filepath} has now attribute {newAttribute}')
def has_any_attribute(self, listOfAttributes: list) -> bool:
hasAttr: bool = False
for attr in listOfAttributes:
if self.has_attribute(attr):
hasAttr = True
break
return hasAttr
def has_attribute(self, attribute: str) -> bool: def has_attribute(self, attribute: str) -> bool:
return attribute in self.attributes attributes: list = [attr.lower() for attr in self.attributes]
return attribute.lower() in attributes
def _remove_result_of_method(self, method: meth.SubsamplingMethod) -> None: def _remove_result_of_method(self, method: meth.SubsamplingMethod) -> None:
""" """
......
...@@ -41,6 +41,30 @@ class SubsamplingMethod(object): ...@@ -41,6 +41,30 @@ class SubsamplingMethod(object):
""" """
raise NotImplementedError raise NotImplementedError
def matches_any_pattern(self, patternList: list) -> bool:
"""
Tests. wether one of the given patterns is matching.
:param patternList:
:return:
"""
matches: bool = False
for pattern in patternList:
if self.matches_pattern(pattern):
matches = True
break
return matches
def matches_pattern(self, pattern: str) -> bool:
"""
Tests, wether the method matches a given pattern. Strings of at least 4 characters are required!
:param pattern: The string to test against
:return matchesThePattern:
"""
matches: bool = False
if len(pattern) > 3 and not pattern == 'layout':
matches = (self.label.lower().find(pattern.lower()) != -1)
return matches
class RandomSampling(SubsamplingMethod): class RandomSampling(SubsamplingMethod):
@property @property
...@@ -67,7 +91,7 @@ class SizeBinFractioning(SubsamplingMethod): ...@@ -67,7 +91,7 @@ class SizeBinFractioning(SubsamplingMethod):
@property @property
def label(self) -> str: def label(self) -> str:
return 'SizeBin Random Subsampling' return 'SizeBin Subsampling'
def apply_subsampling_method(self) -> list: def apply_subsampling_method(self) -> list:
subParticlesPerBin: list = self._get_subParticles_per_bin(self.particleContainer.particles) subParticlesPerBin: list = self._get_subParticles_per_bin(self.particleContainer.particles)
......
...@@ -2,7 +2,7 @@ import matplotlib.pyplot as plt ...@@ -2,7 +2,7 @@ import matplotlib.pyplot as plt
import time import time
from evaluation import TotalResults, SampleResult from evaluation import TotalResults, SampleResult
from input_output import get_pkls_from_directory, get_attributes_from_foldername from input_output import get_pkls_from_directory, get_attributes_from_foldername, save_results, load_results
""" """
IMPORTANT!!! IMPORTANT!!!
...@@ -10,29 +10,33 @@ SET GEPARD TO EVALUATION BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF TH ...@@ -10,29 +10,33 @@ SET GEPARD TO EVALUATION BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF TH
""" """
results: TotalResults = TotalResults() # results: TotalResults = TotalResults()
pklsInFolders = get_pkls_from_directory(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets') # pklsInFolders = get_pkls_from_directory(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets')
#
for folder in pklsInFolders.keys(): # for folder in pklsInFolders.keys():
for samplePath in pklsInFolders[folder]: # for samplePath in pklsInFolders[folder]:
newSampleResult: SampleResult = results.add_sample(samplePath) # newSampleResult: SampleResult = results.add_sample(samplePath)
for attr in get_attributes_from_foldername(folder): # for attr in get_attributes_from_foldername(folder):
newSampleResult.set_attribute(attr) # newSampleResult.set_attribute(attr)
#
t0 = time.time() # t0 = time.time()
results.update_all() # results.update_all()
print('updating all took', time.time()-t0, 'seconds') # print('updating all took', time.time()-t0, 'seconds')
#
errorPerFraction: dict = results.get_error_vs_fraction_data() # save_results('results1.res', results)
results: TotalResults = load_results('results1.res')
errorPerFraction: dict = results.get_error_vs_fraction_data(methods=['spiral', 'cross'])
plt.clf() plt.clf()
for methodLabel in errorPerFraction.keys(): for methodLabel in errorPerFraction.keys():
fractions: list = list(errorPerFraction[methodLabel].keys()) fractions: list = list(errorPerFraction[methodLabel].keys())
errors: list = list(errorPerFraction[methodLabel].values()) errors: list = list(errorPerFraction[methodLabel].values())
plt.plot(fractions, errors, label=methodLabel) plt.plot(fractions, errors, label=methodLabel)
plt.title('Spiral or Box Layouts')
plt.xscale('log') plt.xscale('log')
plt.xlabel('measured fraction') plt.xlabel('measured fraction')
plt.ylabel('mpCountError') plt.ylabel('mpCountError')
plt.legend() plt.legend()
plt.show() plt.show()
print('done')
\ No newline at end of file
...@@ -95,7 +95,9 @@ class TestTotalResults(unittest.TestCase): ...@@ -95,7 +95,9 @@ class TestTotalResults(unittest.TestCase):
def test_get_error_vs_fraction_data(self): def test_get_error_vs_fraction_data(self):
firstSample: SampleResult = self.totalResults.add_sample('sample1.pkl') firstSample: SampleResult = self.totalResults.add_sample('sample1.pkl')
firstSample.set_attribute('to be used')
secondSample: SampleResult = self.totalResults.add_sample('sample2.pkl') secondSample: SampleResult = self.totalResults.add_sample('sample2.pkl')
secondSample.set_attribute('not to be used')
firstMethod: meth.RandomSampling = meth.RandomSampling(None, 0.1) firstMethod: meth.RandomSampling = meth.RandomSampling(None, 0.1)
firstResult: SubsamplingResult = SubsamplingResult(firstMethod) firstResult: SubsamplingResult = SubsamplingResult(firstMethod)
...@@ -141,9 +143,30 @@ class TestTotalResults(unittest.TestCase): ...@@ -141,9 +143,30 @@ class TestTotalResults(unittest.TestCase):
self.assertEqual(list(res.keys()), [0.1, 0.2]) self.assertEqual(list(res.keys()), [0.1, 0.2])
self.assertAlmostEqual(res[0.1], 0.6) # i.e., mean([0.4, 0.8]) self.assertAlmostEqual(res[0.1], 0.6) # i.e., mean([0.4, 0.8])
self.assertAlmostEqual(res[0.2], 0.5) self.assertAlmostEqual(res[0.2], 0.5)
# if i == 3:
# self.assertEqual(list(res.keys()), [0.1, 0.2]) filteredResultDict: dict = self.totalResults.get_error_vs_fraction_data(attributes=['to be used'])
# self.assertAlmostEqual(res[0.1], ) self.assertEqual(list(filteredResultDict.keys()), [firstMethod.label, secondMethod.label, thirdMethod.label])
for i in range(3):
res: dict = list(filteredResultDict.values())[i]
if i == 0:
self.assertEqual(list(res.keys()), [0.1])
self.assertAlmostEqual(res[0.1], 0.8)
if i == 1:
self.assertEqual(list(res.keys()), [0.1])
self.assertAlmostEqual(res[0.1], 0.6)
if i == 2:
self.assertEqual(list(res.keys()), [0.1, 0.2])
self.assertAlmostEqual(res[0.1], 0.4) # only the result from the first sample is used, as filtered..
self.assertAlmostEqual(res[0.2], 0.5)
filteredResultDict: dict = self.totalResults.get_error_vs_fraction_data(methods=['cross'])
self.assertEqual(list(filteredResultDict.keys()), [secondMethod.label, thirdMethod.label])
filteredResultDict: dict = self.totalResults.get_error_vs_fraction_data(methods=['Cross'])
self.assertEqual(list(filteredResultDict.keys()), [secondMethod.label, thirdMethod.label])
filteredResultDict: dict = self.totalResults.get_error_vs_fraction_data(methods=['random'])
self.assertEqual(list(filteredResultDict.keys()), [firstMethod.label])
class TestSampleResult(unittest.TestCase): class TestSampleResult(unittest.TestCase):
...@@ -212,6 +235,8 @@ class TestSampleResult(unittest.TestCase): ...@@ -212,6 +235,8 @@ class TestSampleResult(unittest.TestCase):
def test_attributes(self): def test_attributes(self):
self.sampleResult.set_attribute('soil') self.sampleResult.set_attribute('soil')
self.assertTrue(self.sampleResult.has_attribute('soil')) self.assertTrue(self.sampleResult.has_attribute('soil'))
self.assertTrue(self.sampleResult.has_attribute('Soil')) # we want to be case insensitive
self.assertTrue(self.sampleResult.has_attribute('SOIL'))
self.sampleResult.set_attribute('soil') # the attribute is already there and shall not be added again self.sampleResult.set_attribute('soil') # the attribute is already there and shall not be added again
self.assertEqual(len(self.sampleResult.attributes), 1) self.assertEqual(len(self.sampleResult.attributes), 1)
...@@ -220,6 +245,13 @@ class TestSampleResult(unittest.TestCase): ...@@ -220,6 +245,13 @@ class TestSampleResult(unittest.TestCase):
self.assertEqual(len(self.sampleResult.attributes), 2) self.assertEqual(len(self.sampleResult.attributes), 2)
self.assertTrue(self.sampleResult.has_attribute('10µmFilter')) self.assertTrue(self.sampleResult.has_attribute('10µmFilter'))
self.assertTrue(self.sampleResult.has_any_attribute(['soil', 'water']))
self.assertTrue(self.sampleResult.has_any_attribute(['soil', 'water', '10µmFilter']))
self.assertTrue(self.sampleResult.has_any_attribute(['water', '10µmFilter']))
self.assertFalse(self.sampleResult.has_any_attribute(['water', 'sediment']))
self.assertFalse(self.sampleResult.has_any_attribute(['beach']))
class TestSubsamplingResult(unittest.TestCase): class TestSubsamplingResult(unittest.TestCase):
def setUp(self): def setUp(self):
self.subsamplingResult: SubsamplingResult = SubsamplingResult(meth.RandomSampling(None, 0.1)) self.subsamplingResult: SubsamplingResult = SubsamplingResult(meth.RandomSampling(None, 0.1))
......
...@@ -67,7 +67,7 @@ class TestSizeBinFractioning(unittest.TestCase): ...@@ -67,7 +67,7 @@ class TestSizeBinFractioning(unittest.TestCase):
class TestMethodEquality(unittest.TestCase): class TestMethodEquality(unittest.TestCase):
def test_methodEquality(self): def test_methodEquality_and_patterns(self):
method1_1: RandomSampling = RandomSampling(None, 0.1) method1_1: RandomSampling = RandomSampling(None, 0.1)
method1_2: RandomSampling = RandomSampling(None, 0.2) method1_2: RandomSampling = RandomSampling(None, 0.2)
...@@ -103,3 +103,45 @@ class TestMethodEquality(unittest.TestCase): ...@@ -103,3 +103,45 @@ class TestMethodEquality(unittest.TestCase):
self.assertTrue(method1.equals(method2)) self.assertTrue(method1.equals(method2))
else: else:
self.assertFalse(method1.equals(method2)) self.assertFalse(method1.equals(method2))
randomPatterns: list = ['random', 'ranDOm']
sizeBinPatterns: list = ['size', 'Size', 'sizeBin']
crossBoxPatterns: list = ['cross', 'crossLayout']
spiralBoxPatterns: list = ['spiral', 'spiralLayout']
antiPatterns: list = ['bin', 'box', 'crossBox', 'layout'] # pattern 'layout' is ambiguous...
for randomMeth in [method1_1, method1_2]:
for pos in randomPatterns:
self.assertTrue(randomMeth.matches_pattern(pos))
negPatterns = sizeBinPatterns + crossBoxPatterns + spiralBoxPatterns + antiPatterns
for neg in negPatterns:
self.assertFalse(randomMeth.matches_pattern(neg))
self.assertTrue(randomMeth.matches_any_pattern(randomPatterns + negPatterns))
self.assertFalse(randomMeth.matches_any_pattern(negPatterns))
for sizeBinMeth in [method2_1, method2_2]:
for pos in sizeBinPatterns:
self.assertTrue(sizeBinMeth.matches_pattern(pos))
negPatterns = randomPatterns + crossBoxPatterns + spiralBoxPatterns + antiPatterns
for neg in negPatterns:
self.assertFalse(sizeBinMeth.matches_pattern(neg))
self.assertTrue(sizeBinMeth.matches_any_pattern(sizeBinPatterns + negPatterns))
self.assertFalse(sizeBinMeth.matches_any_pattern(negPatterns))
for crossBoxMethod in [method3_1_1, method3_1_2, method3_2_1, method3_2_2]:
for pos in crossBoxPatterns:
self.assertTrue(crossBoxMethod.matches_pattern(pos))
negPatterns = randomPatterns + sizeBinPatterns + spiralBoxPatterns + antiPatterns
for neg in negPatterns:
self.assertFalse(crossBoxMethod.matches_pattern(neg))
self.assertTrue(crossBoxMethod.matches_any_pattern(crossBoxPatterns + negPatterns))
self.assertFalse(crossBoxMethod.matches_any_pattern(negPatterns))
for spiralBoxMethod in [method4_1_1, method4_1_2, method4_2_1, method4_2_2]:
for pos in spiralBoxPatterns:
self.assertTrue(spiralBoxMethod.matches_pattern(pos))
negPatterns = randomPatterns + sizeBinPatterns + crossBoxPatterns + antiPatterns
for neg in negPatterns:
self.assertFalse(spiralBoxMethod.matches_pattern(neg))
self.assertTrue(spiralBoxMethod.matches_any_pattern(spiralBoxPatterns + negPatterns))
self.assertFalse(spiralBoxMethod.matches_any_pattern(negPatterns))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment