Commit 39031448 authored by Josef Brandt's avatar Josef Brandt

Loading datasets, folder structure is used for setting attributes

parent 229dd4ac
......@@ -28,17 +28,21 @@ class TotalResults(object):
super(TotalResults, self).__init__()
self.sampleResults: list = []
def add_sample(self, filePath: str) -> None:
def add_sample(self, filePath: str):
"""
Adds a new sampleResult object, if a .pkl file is given and if the sample name is not already present.
:param filePath:
:return:
"""
newResult: SampleResult = None
sampleName: str = get_name_from_directory(filePath)
presentSampleNames: list = [res.sampleName for res in self.sampleResults]
if sampleName not in presentSampleNames:
if os.path.basename(filePath).split('.')[-1] == 'pkl':
self.sampleResults.append(SampleResult(filePath))
newResult = SampleResult(filePath)
self.sampleResults.append(newResult)
return newResult
def update_all(self) -> None:
"""
......@@ -53,6 +57,9 @@ class TotalResults(object):
print(f'updating {sample.sampleName} with {curMethod.label} at fraction {fraction}')
sample.update_result_with_method(curMethod)
def get_error_vs_fraction_data(self) -> dict:
return {}
def _get_methods_for_fraction(self, dataset: dataset.DataSet, fraction: float) -> list:
"""
:param fraction: The desired fraction to measure
......@@ -77,6 +84,7 @@ class SampleResult(object):
self.filepath: str = filepath
self.dataset: dataset.DataSet = None
self.results: list = []
self.attributes: list = []
@property
def sampleName(self) -> str:
......@@ -84,6 +92,7 @@ class SampleResult(object):
def load_dataset(self) -> None:
self.dataset = dataset.loadData(self.filepath)
assert self.dataset is not None
def update_result_with_method(self, method: meth.SubsamplingMethod, force: bool = False) -> None:
"""
......@@ -105,6 +114,19 @@ class SampleResult(object):
self.results.append(newResult)
newResult.update()
def set_attribute(self, newAttribute: str) -> None:
"""
Adds a new attribute to the sample, if it does not contain the attribute already
:param newAttribute:
:return:
"""
if not self.has_attribute(newAttribute):
self.attributes.append(newAttribute)
print(f'sample {self.filepath} has now attribute {newAttribute}')
def has_attribute(self, attribute: str) -> bool:
return attribute in self.attributes
def _remove_result_of_method(self, method: meth.SubsamplingMethod) -> None:
"""
Removes the specified result from the list
......@@ -112,7 +134,7 @@ class SampleResult(object):
:return:
"""
for result in self.results:
if type(result.method) == type(method) and result.fraction == method.fraction:
if method.equals(result.method):
self.results.remove(result)
def _result_is_already_present(self, method: meth.SubsamplingMethod) -> bool:
......@@ -155,6 +177,7 @@ class SubsamplingResult(object):
fraction: float = self.method.fraction
self.mpCountError = self._get_mp_count_error(origParticles, subParticles, fraction)
print(f'{self.origParticleCount} particles, thereof {self.subSampledParticleCount} measured, error: {self.mpCountError}')
self.mpCountErrorPerBin = self._get_mp_count_error_per_bin(origParticles, subParticles, fraction)
def _get_mp_count_error_per_bin(self, allParticles: list, subParticles: list, fractionMeasured: float) -> tuple:
......
......@@ -55,6 +55,9 @@ class BoxSelectionSubsamplingMethod(SubsamplingMethod):
newTopLefts.append((topLeft[0] + self.offset[0], topLeft[1] + self.offset[1]))
return newTopLefts
def equals(self, otherMethod) -> bool:
raise NotImplementedError
class BoxSelectionCreator(object):
def __init__(self, dataset: dataset.DataSet):
......@@ -161,6 +164,13 @@ class CrossBoxSubSampling(BoxSelectionSubsamplingMethod):
maxFraction: float = totalBoxArea / self.filterArea
return maxFraction
def equals(self, otherMethod) -> bool:
equals: bool = False
if type(otherMethod) == type(self) and otherMethod.fraction == self.fraction:
if otherMethod.numBoxesAcross == self.numBoxesAcross:
equals = True
return equals
def _get_horizontal_box_starts(self, boxSize: float) -> list:
"""
Returns a list of width-values at which the individual boxes start
......@@ -237,6 +247,13 @@ class SpiralBoxSubsampling(BoxSelectionSubsamplingMethod):
topLefts = self._move_and_scale_toplefts(topLefts)
return self._apply_offset_to_toplefts(topLefts)
def equals(self, otherMethod) -> bool:
equals: bool = False
if type(otherMethod) == type(self) and otherMethod.fraction == self.fraction:
if otherMethod.numBoxes == self.numBoxes:
equals = True
return equals
def _move_and_scale_toplefts(self, topLefts: list) -> list:
"""
The spiral approximation leads to boxes that are outside the filter size limits.
......
import os
def get_pkls_from_directory(dirPath: str) -> dict:
"""
Takes a directory and finds all pkl files in there. The result is returned in a dictionary, where
each subfolder is present as a key and the actual pkl paths in a list as values.
:param dirPath:
:return:
"""
resultDict: dict = {}
subFolders = [x[1] for x in os.walk(dirPath)][0]
for subFolder in subFolders:
if subFolder.find('ignore') == -1:
subFolderPath: str = os.path.join(dirPath, subFolder)
filesInFolder: list = os.listdir(subFolderPath)
pkls: list = [os.path.join(subFolderPath, file) for file in filesInFolder if file.endswith('.pkl')]
resultDict[subFolder] = pkls
return resultDict
def get_attributes_from_foldername(foldername: str) -> list:
return [name.strip() for name in foldername.split(',')]
\ No newline at end of file
......@@ -33,6 +33,14 @@ class SubsamplingMethod(object):
"""
raise NotImplementedError
def equals(self, otherMethod) -> bool:
"""
Checks if another provided method has the same configuration as the used instance.
:param otherMethod:
:return isEqual:
"""
raise NotImplementedError
class RandomSampling(SubsamplingMethod):
@property
......@@ -47,6 +55,9 @@ class RandomSampling(SubsamplingMethod):
def _get_number_of_random_particles(self, numTotalParticles):
return np.int(np.ceil(numTotalParticles * self.fraction))
def equals(self, otherMethod) -> bool:
return type(otherMethod) == type(self) and otherMethod.fraction == self.fraction
class SizeBinFractioning(SubsamplingMethod):
......@@ -79,3 +90,6 @@ class SizeBinFractioning(SubsamplingMethod):
subParticlesPerBin.append(subParticlesInBin)
return subParticlesPerBin
def equals(self, otherMethod) -> bool:
return type(otherMethod) == type(self) and otherMethod.fraction == self.fraction
......@@ -9,27 +9,25 @@ import gepardevaluation
from methods import RandomSampling, SizeBinFractioning
from geometricMethods import BoxSelectionCreator
from helpers import ParticleBinSorter
from evaluation import TotalResults
from evaluation import TotalResults, SampleResult
from input_output import get_pkls_from_directory, get_attributes_from_foldername
"""
IMPORTANT!!!
SET GEPARD TO EVALUATION BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF THE LEGACY CONVERTS MIGHT FAIL..
"""
workingFiles: list = []
workingFiles.append(r'C:\Users\xbrjos\Desktop\temp MP\190313_Soil_5_A_50_5_1_50_1\190313_Soil_5_A_50_5_1_50_1.pkl')
workingFiles.append(r'C:\Users\xbrjos\Desktop\temp MP\181018_Microcatch-St.6_50um\181018_Microcatch-St.6_50um.pkl')
workingFiles.append(r'C:\Users\xbrjos\Desktop\temp MP\190326_MCII_WWTP_SB_50_2\190326_MCII_WWTP_SB_50_2.pkl')
workingFiles.append(r'C:\Users\xbrjos\Desktop\temp MP\190326_MCII_WWTP_SB_50_1\190326_MCII_WWTP_SB_50_1.pkl')
workingFiles.append(r'C:\Users\xbrjos\Desktop\temp MP\190201_BSB_Stroomi_ds2_R1_R2_50\190201_BSB_Stroomi_ds2_R1_R2_50.pkl')
# These do not work, due to no ramanscansortindex??
# fname: str = r'C:\Users\xbrjos\Desktop\temp MP\KWS_CT_3_ds1_all_10_2\KWS_CT_3_ds1_all_10_2.pkl'
results: TotalResults = TotalResults()
pklsInFolders = get_pkls_from_directory(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets')
for folder in pklsInFolders.keys():
for samplePath in pklsInFolders[folder]:
newSampleResult: SampleResult = results.add_sample(samplePath)
for attr in get_attributes_from_foldername(folder):
newSampleResult.set_attribute(attr)
for index, fname in enumerate(workingFiles):
results.add_sample(fname)
t0 = time.time()
results.update_all()
print('updating all took', time.time()-t0, 'seconds')
......@@ -23,17 +23,21 @@ class TestTotalResults(unittest.TestCase):
self.totalResults = TotalResults()
def test_add_sample(self):
self.totalResults.add_sample('fakePath/fakeFolder/fakeFile.pkl')
newResult: SampleResult = self.totalResults.add_sample('fakePath/fakeFolder/fakeFile.pkl')
self.assertEqual(len(self.totalResults.sampleResults), 1)
self.assertTrue(type(newResult) == SampleResult)
self.totalResults.add_sample('fakePath/fakeFolder/fakeFile.pkl') # the same file should not be added again
newResult = self.totalResults.add_sample('fakePath/fakeFolder/fakeFile.pkl') # the same file should not be added again
self.assertEqual(len(self.totalResults.sampleResults), 1)
self.assertTrue(newResult is None)
self.totalResults.add_sample('fakePath/fakeFolder/fakeFile2.pkl') # another should be added, though
newResult = self.totalResults.add_sample('fakePath/fakeFolder/fakeFile2.pkl') # another should be added, though
self.assertEqual(len(self.totalResults.sampleResults), 2)
self.assertTrue(type(newResult) == SampleResult)
self.totalResults.add_sample('fakePath/fakeFolder/fakeFile2.txt') # invalid extention, not added...
newResult = self.totalResults.add_sample('fakePath/fakeFolder/fakeFile2.txt') # invalid extention, not added...
self.assertEqual(len(self.totalResults.sampleResults), 2)
self.assertTrue(newResult is None)
def test_get_methods_for_fraction(self):
def containsMethod(listOfMethods: list, template: meth.SubsamplingMethod) -> bool:
......@@ -89,27 +93,47 @@ class TestTotalResults(unittest.TestCase):
self.assertFalse(containsMethod(methods, gmeth.CrossBoxSubSampling(dset, desiredFraction)))
self.assertFalse(containsMethod(methods, gmeth.SpiralBoxSubsampling(dset, desiredFraction)))
def test_get_error_vs_fraction_data(self):
pass
class TestSampleResult(unittest.TestCase):
def setUp(self) -> None:
self.sampleResult: SampleResult = SampleResult('fakePath/fakeFile.pkl')
self.sampleResult.dataset = gepard.dataset.DataSet('fakePath/fakeFile.pkl')
self.sampleResult.results.append(SubsamplingResult(meth.RandomSampling(None, 0.1)))
self.sampleResult.results.append(SubsamplingResult(gmeth.SpiralBoxSubsampling(None, 0.1)))
self.sampleResult.results.append(SubsamplingResult(gmeth.SpiralBoxSubsampling(None, 0.3)))
newMethod = gmeth.SpiralBoxSubsampling(None, 0.1)
newMethod.numBoxes = 10
self.sampleResult.results.append(SubsamplingResult(newMethod))
newMethod = gmeth.SpiralBoxSubsampling(None, 0.1)
newMethod.numBoxes = 15
self.sampleResult.results.append(SubsamplingResult(newMethod))
newMethod = gmeth.SpiralBoxSubsampling(None, 0.3)
newMethod.numBoxes = 10
self.sampleResult.results.append(SubsamplingResult(newMethod))
def test_sampleResults_added_correctly(self):
method: meth.SubsamplingMethod = self.sampleResult.results[0].method
self.assertTrue(type(method), meth.RandomSampling)
self.assertTrue(method.fraction, 0.1)
self.assertEqual(type(method), meth.RandomSampling)
self.assertEqual(method.fraction, 0.1)
method: meth.SubsamplingMethod = self.sampleResult.results[1].method
self.assertTrue(type(method), gmeth.SpiralBoxSubsampling)
self.assertTrue(method.fraction, 0.1)
self.assertEqual(type(method), gmeth.SpiralBoxSubsampling)
self.assertEqual(method.fraction, 0.1)
self.assertEqual(method.numBoxes, 10)
method: meth.SubsamplingMethod = self.sampleResult.results[2].method
self.assertTrue(type(method), gmeth.SpiralBoxSubsampling)
self.assertTrue(method.fraction, 0.3)
self.assertEqual(type(method), gmeth.SpiralBoxSubsampling)
self.assertEqual(method.fraction, 0.1)
self.assertEqual(method.numBoxes, 15)
method: meth.SubsamplingMethod = self.sampleResult.results[3].method
self.assertEqual(type(method), gmeth.SpiralBoxSubsampling)
self.assertEqual(method.fraction, 0.3)
self.assertEqual(method.numBoxes, 10)
def test_result_is_already_present(self):
newMethod: meth.SubsamplingMethod = meth.RandomSampling(None, 0.1)
......@@ -126,15 +150,26 @@ class TestSampleResult(unittest.TestCase):
self.assertFalse(self.sampleResult._result_is_already_present(newMethod))
def test_remove_result_of_method(self):
numOrigResults = len(self.sampleResult.results)
self.sampleResult._remove_result_of_method(meth.RandomSampling(None, 0.1))
self.assertEqual(len(self.sampleResult.results), 2)
self.assertEqual(len(self.sampleResult.results), numOrigResults-1)
self.sampleResult._remove_result_of_method(gmeth.SpiralBoxSubsampling(None, 0.1))
self.assertEqual(len(self.sampleResult.results), 1)
self.assertEqual(len(self.sampleResult.results), numOrigResults-2)
self.sampleResult._remove_result_of_method(gmeth.SpiralBoxSubsampling(None, 0.2)) # this is one is not present...
self.assertEqual(len(self.sampleResult.results), 1)
self.assertEqual(len(self.sampleResult.results), numOrigResults-2)
def test_attributes(self):
self.sampleResult.set_attribute('soil')
self.assertTrue(self.sampleResult.has_attribute('soil'))
self.sampleResult.set_attribute('soil') # the attribute is already there and shall not be added again
self.assertEqual(len(self.sampleResult.attributes), 1)
self.sampleResult.set_attribute('10µmFilter')
self.assertEqual(len(self.sampleResult.attributes), 2)
self.assertTrue(self.sampleResult.has_attribute('10µmFilter'))
class TestSubsamplingResult(unittest.TestCase):
def setUp(self):
......
import unittest
import os
import shutil
from input_output import get_pkls_from_directory, get_attributes_from_foldername
class TestIO(unittest.TestCase):
folders: list = ['air', 'water', 'sediment', 'ignore']
samples: list = ['sample1', 'sample2', 'sample3']
extensions: list = ['.pkl', '.txt', '.xlsx']
def setUp(self) -> None:
self.path: str = os.getcwd()
self.ignoreFile: str = os.path.join(self.path, 'ignored.pkl')
with open(self.ignoreFile, 'w') as fp:
fp.write('empty')
for folder in self.folders:
folderPath: str = os.path.join(self.path, folder)
os.mkdir(folderPath)
for sample in self.samples:
for ext in self.extensions:
fname: str = os.path.join(folderPath, sample+ext)
with open(fname, 'w') as fp:
fp.write('empty')
def tearDown(self) -> None:
os.remove(self.ignoreFile)
for folder in self.folders:
folderPath: str = os.path.join(self.path, folder)
shutil.rmtree(folderPath)
def test_read_pkls_from_dir(self):
pklsInFolder: dict = get_pkls_from_directory(self.path)
# the ignore folder is to be skipped
self.assertEqual(len(pklsInFolder.keys()), len(self.folders)-1)
ignoredFileFound: bool = False
wrongFileTypeFound: bool = False
ignoredFolderFound: bool = False
for folder in pklsInFolder.keys():
self.assertTrue(folder in self.folders)
self.assertEqual(len(pklsInFolder[folder]), len(self.samples))
if folder.find('ignored') != -1:
ignoredFolderFound = True
for samplePath in pklsInFolder[folder]:
self.assertTrue(os.path.exists(samplePath))
basename: str = os.path.basename(samplePath)
samplename, extension = basename.split('.')
self.assertTrue(samplename in self.samples)
self.assertTrue(basename.endswith('.pkl'))
if samplename.find('ignored') != -1:
ignoredFileFound = True
if extension != 'pkl':
wrongFileTypeFound = True
self.assertFalse(ignoredFolderFound)
self.assertFalse(ignoredFileFound)
self.assertFalse(wrongFileTypeFound)
def test_get_attributes_from_foldername(self):
folderName: str = 'Slush'
attributes: list = get_attributes_from_foldername(folderName)
self.assertEqual(attributes, ['Slush'])
folderName: str = 'Sediment, Strand'
attributes: list = get_attributes_from_foldername(folderName)
self.assertEqual(attributes, ['Sediment', 'Strand'])
folderName = 'Water, Wasser, Liquid'
attributes = get_attributes_from_foldername(folderName)
self.assertEqual(attributes, ['Water', 'Wasser', 'Liquid'])
......@@ -14,6 +14,7 @@ import gepard
from gepard.analysis.particleContainer import ParticleContainer
from gepard.analysis.particleAndMeasurement import Particle
from methods import RandomSampling, SizeBinFractioning
import geometricMethods as gmeth
from helpers import ParticleBinSorter
......@@ -63,3 +64,42 @@ class TestSizeBinFractioning(unittest.TestCase):
subParticlesPerBin: list = self.sizeBinFrac._get_subParticles_per_bin(self.particles)
for subParticles in subParticlesPerBin:
self.assertEqual(len(subParticles), numParticlesPerBinExpected)
class TestMethodEquality(unittest.TestCase):
def test_methodEquality(self):
method1_1: RandomSampling = RandomSampling(None, 0.1)
method1_2: RandomSampling = RandomSampling(None, 0.2)
method2_1: SizeBinFractioning = SizeBinFractioning(None, 0.1)
method2_2: SizeBinFractioning = SizeBinFractioning(None, 0.2)
method3_1_1: gmeth.CrossBoxSubSampling = gmeth.CrossBoxSubSampling(None, 0.1)
method3_1_1.numBoxesAcross = 3
method3_1_2: gmeth.CrossBoxSubSampling = gmeth.CrossBoxSubSampling(None, 0.1)
method3_1_2.numBoxesAcross = 5
method3_2_1: gmeth.CrossBoxSubSampling = gmeth.CrossBoxSubSampling(None, 0.2)
method3_2_1.numBoxesAcross = 3
method3_2_2: gmeth.CrossBoxSubSampling = gmeth.CrossBoxSubSampling(None, 0.2)
method3_2_2.numBoxesAcross = 5
method4_1_1: gmeth.SpiralBoxSubsampling = gmeth.SpiralBoxSubsampling(None, 0.1)
method4_1_1.numBoxes = 5
method4_1_2: gmeth.SpiralBoxSubsampling = gmeth.SpiralBoxSubsampling(None, 0.1)
method4_1_2.numBoxes = 10
method4_2_1: gmeth.SpiralBoxSubsampling = gmeth.SpiralBoxSubsampling(None, 0.2)
method4_2_1.numBoxes = 5
method4_2_2: gmeth.SpiralBoxSubsampling = gmeth.SpiralBoxSubsampling(None, 0.2)
method4_2_2.numBoxes = 10
methods = [method1_1, method1_2, method2_1, method2_2, method3_1_1, method3_1_2,
method3_2_1, method3_2_2, method4_1_1, method4_1_2, method4_2_1, method4_2_2]
for index1, method1 in enumerate(methods):
for index2, method2 in enumerate(methods):
if index1 == index2:
self.assertTrue(method1.equals(method2))
else:
self.assertFalse(method1.equals(method2))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment