Commit afa9eac4 authored by Josef Brandt's avatar Josef Brandt

Multi-Processing in update_all function

parent b02245b5
......@@ -9,6 +9,8 @@ import pickle
import os
import numpy as np
import matplotlib.pyplot as plt
import concurrent.futures
# from multiprocessing import Pool, Process, Event, Queue
import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset
......@@ -24,13 +26,36 @@ def get_name_from_directory(dirPath: str) -> str:
return str(os.path.basename(dirPath).split('.')[0])
class TotalResults(object):
# methods: list = [meth.RandomSampling, meth.SizeBinFractioning, gmeth.CrossBoxSubSampling,
# gmeth.SpiralBoxSubsampling, cmeth.ChemometricSubsampling]
# measuredFractions: list = [0.01, 0.05, 0.1, 0.15, 0.2, 0.5, 0.75, 0.9]
# measuredFractions: list = [0.1, 0.15, 0.2, 0.5, 0.75, 0.9]
measuredFractions: list = [0.1, 0.3, 0.5, 0.9]
def get_methods_to_test(dataset: dataset.DataSet, fractions: list = []) -> list:
"""
:param fraction: The desired fraction to measure
:return: list of measurement Objects that are applicable
"""
if len(fractions) == 0:
fractions: list = [0.05, 0.1, 0.3]
methods: list = []
particleContainer = dataset.particleContainer
for fraction in fractions:
methods.append(meth.RandomSampling(particleContainer, fraction))
methods.append(meth.SizeBinFractioning(particleContainer, fraction))
boxCreator: gmeth.BoxSelectionCreator = gmeth.BoxSelectionCreator(dataset)
methods += boxCreator.get_crossBoxSubsamplers_for_fraction(fraction)
methods += boxCreator.get_spiralBoxSubsamplers_for_fraction(fraction)
methods.append(cmeth.ChemometricSubsampling(particleContainer, fraction))
return methods
def update_sample(sample, force: bool, index: int):
sample.load_dataset()
methods: list = get_methods_to_test(sample.dataset)
sample.update_result_with_methods(methods, force)
return sample, index
class TotalResults(object):
def __init__(self):
super(TotalResults, self).__init__()
self.sampleResults: list = []
......@@ -57,15 +82,36 @@ class TotalResults(object):
:param force: Wether to force an update of an already existing method.
:return:
"""
for index, sample in enumerate(self.sampleResults):
sample.load_dataset()
possibleMethods: list = []
for fraction in self.measuredFractions:
for method in self._get_methods_for_fraction(sample.dataset, fraction):
possibleMethods.append(method)
sample.update_result_with_methods(possibleMethods, force=force)
print(f'processed {index+1} of {len(self.sampleResults)} samples')
print('about to update all')
#############################################################
# p = Pool(processes=len(forceList))
# results = p.map(update_sample, indices)
# p.close()
# print(results)
#############################################################
forceList: list = [force]*len(self.sampleResults)
indices: list = list(np.arange(len(self.sampleResults)))
###########################################################
with concurrent.futures.ProcessPoolExecutor() as executor:
results = executor.map(update_sample, self.sampleResults, forceList, indices)
# results = executor.map(update_sample, indices)
for index, res in enumerate(results):
updatedSample, processid = res
print(f'returned from process {processid}, iteration index {index}')
self.sampleResults[index] = updatedSample
# for index, sample in enumerate(self.sampleResults):
# sample.load_dataset()
# possibleMethods: list = []
# for fraction in self.measuredFractions:
# for method in self._get_methods_for_fraction(sample.dataset, fraction):
# possibleMethods.append(method)
#
# sample.update_result_with_methods(possibleMethods, force=force)
# print(f'processed {index+1} of {len(self.sampleResults)} samples')
def get_error_vs_fraction_data(self, attributes: list = [], methods: list = []) -> dict:
"""
......@@ -100,20 +146,6 @@ class TotalResults(object):
return result
def _get_methods_for_fraction(self, dataset: dataset.DataSet, fraction: float) -> list:
"""
:param fraction: The desired fraction to measure
:return: list of measurement Objects that are applicable
"""
particleContainer = dataset.particleContainer
methods: list = [meth.RandomSampling(particleContainer, fraction),
meth.SizeBinFractioning(particleContainer, fraction)]
boxCreator: gmeth.BoxSelectionCreator = gmeth.BoxSelectionCreator(dataset)
methods += boxCreator.get_crossBoxSubsamplers_for_fraction(fraction)
methods += boxCreator.get_spiralBoxSubsamplers_for_fraction(fraction)
methods.append(cmeth.ChemometricSubsampling(particleContainer, fraction))
return methods
class SubsamplingResult(object):
"""
......@@ -190,11 +222,12 @@ class SubsamplingResult(object):
return numMPParticles
class SampleResult(object):
"""
An object the actually stores all generated results per sample and can update and report on them.
"""
def __init__(self, filepath: str, numVariations: int = 10):
def __init__(self, filepath: str, numVariations: int = 3):
super(SampleResult, self).__init__()
self.filepath: str = filepath
self.dataset: dataset.DataSet = None
......@@ -213,7 +246,7 @@ class SampleResult(object):
def update_result_with_methods(self, methods: list, force: bool = False) -> list:
"""
Updates result with the given method (contains desiredFraction already)
:param method: The SubsamplingMethod Object
:param methods: List of the SubsamplingMethod Objects to use
:param force: Wether to force an update. If False, the result is not updated, if it is already present.
:return: list of updated methods
"""
......@@ -221,8 +254,7 @@ class SampleResult(object):
self.load_dataset()
updatedMethods: list = []
particleVariations: ParticleVariations = ParticleVariations(self.dataset.particleContainer,
numVariations=self.numVariations)
particleVariations: ParticleVariations = ParticleVariations(self.dataset, numVariations=self.numVariations)
needsToBeUpdated: dict = {method: False for method in methods}
......@@ -247,8 +279,8 @@ class SampleResult(object):
result.add_result(method.particleContainer.particles, subParticles)
if method not in updatedMethods:
updatedMethods.append(method)
print(f'updated {self.sampleName} with {method.label} at fraction {method.fraction}, '
f'iteration {index+1}')
# print(f'updated {self.sampleName} with {method.label} at fraction {method.fraction}, '
# f'iteration {index+1}')
return updatedMethods
......@@ -297,7 +329,4 @@ class SampleResult(object):
return requestedResult
# def _get_result_of_method(self, method: meth.SubsamplingMethod) -> SubsamplingResult:
# return None
# return None
\ No newline at end of file
......@@ -9,58 +9,58 @@ IMPORTANT!!!
SET GEPARD TO EVALUATION BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF THE LEGACY CONVERTS MIGHT FAIL..
"""
if __name__ == '__main__':
results: TotalResults = TotalResults()
pklsInFolders = get_pkls_from_directory(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets')
# results: TotalResults = TotalResults()
# pklsInFolders = get_pkls_from_directory(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets')
#
# for folder in pklsInFolders.keys():
# for samplePath in pklsInFolders[folder]:
# newSampleResult: SampleResult = results.add_sample(samplePath)
# for attr in get_attributes_from_foldername(folder):
# newSampleResult.set_attribute(attr)
#
# t0 = time.time()
# results.update_all()
# print('updating all took', time.time()-t0, 'seconds')
#
# save_results('results1.res', results)
results: TotalResults = load_results('results1.res')
# results.update_all(force=True)
# save_results('results1.res', results)
for folder in pklsInFolders.keys():
for samplePath in pklsInFolders[folder]:
newSampleResult: SampleResult = results.add_sample(samplePath)
for attr in get_attributes_from_foldername(folder):
newSampleResult.set_attribute(attr)
plt.clf()
errorPerFraction: dict = results.get_error_vs_fraction_data(attributes=['air', 'water'],
methods=[])
t0 = time.time()
results.update_all()
print('updating all took', time.time()-t0, 'seconds')
plt.subplot(121)
for methodLabel in errorPerFraction.keys():
fractions: list = list(errorPerFraction[methodLabel].keys())
errors: list = list(errorPerFraction[methodLabel].values())
plt.plot(fractions, errors)
plt.scatter(fractions, errors, label=methodLabel)
save_results('results1.res', results)
# results: TotalResults = load_results('results1.res')
# results.update_all(force=True)
# save_results('results1.res', results)
plt.title('Air/Water sample', fontSize=15)
plt.xscale('log')
plt.xlabel('measured fraction', fontsize=12)
plt.ylabel('mpCountError (%)', fontsize=12)
plt.ylim([0, 100])
plt.legend()
plt.clf()
errorPerFraction: dict = results.get_error_vs_fraction_data(attributes=['air', 'water'],
methods=[])
errorPerFraction: dict = results.get_error_vs_fraction_data(attributes=['sediment', 'soil', 'beach', 'slush'],
methods=[])
plt.subplot(122)
for methodLabel in errorPerFraction.keys():
fractions: list = list(errorPerFraction[methodLabel].keys())
errors: list = list(errorPerFraction[methodLabel].values())
plt.plot(fractions, errors)
plt.scatter(fractions, errors, label=methodLabel)
plt.subplot(121)
for methodLabel in errorPerFraction.keys():
fractions: list = list(errorPerFraction[methodLabel].keys())
errors: list = list(errorPerFraction[methodLabel].values())
plt.plot(fractions, errors)
plt.scatter(fractions, errors, label=methodLabel)
plt.title('Sediment/Beach/Slush sample', fontSize=15)
plt.xscale('log')
plt.xlabel('measured fraction', fontsize=12)
plt.ylabel('mpCountError (%)', fontsize=12)
plt.ylim([0, 100])
plt.legend()
plt.title('Air/Water sample', fontSize=15)
plt.xscale('log')
plt.xlabel('measured fraction', fontsize=12)
plt.ylabel('mpCountError (%)', fontsize=12)
plt.ylim([0, 100])
plt.legend()
plt.show()
errorPerFraction: dict = results.get_error_vs_fraction_data(attributes=['sediment', 'soil', 'beach', 'slush'],
methods=[])
plt.subplot(122)
for methodLabel in errorPerFraction.keys():
fractions: list = list(errorPerFraction[methodLabel].keys())
errors: list = list(errorPerFraction[methodLabel].values())
plt.plot(fractions, errors)
plt.scatter(fractions, errors, label=methodLabel)
plt.title('Sediment/Beach/Slush sample', fontSize=15)
plt.xscale('log')
plt.xlabel('measured fraction', fontsize=12)
plt.ylabel('mpCountError (%)', fontsize=12)
plt.ylim([0, 100])
plt.legend()
plt.show()
......@@ -15,7 +15,7 @@ import gepard
from gepard.analysis.particleContainer import ParticleContainer
from gepard.analysis.particleAndMeasurement import Particle, Measurement
from evaluation import TotalResults, SampleResult, SubsamplingResult
from evaluation import TotalResults, SampleResult, SubsamplingResult, get_methods_to_test
import methods as meth
import geometricMethods as gmeth
from helpers_for_test import get_default_ParticleContainer
......@@ -42,65 +42,65 @@ class TestTotalResults(unittest.TestCase):
self.assertEqual(len(self.totalResults.sampleResults), 2)
self.assertTrue(newResult is None)
def test_get_methods_for_fraction(self):
def containsMethod(listOfMethods: list, template: meth.SubsamplingMethod) -> bool:
contains: bool = False
for method in listOfMethods:
if type(method) == type(template) and method.fraction == template.fraction:
contains = True
break
return contains
dset: gepard.dataset.DataSet = gepard.dataset.DataSet('fakepath')
imgdim = 10
dset.imagescanMode = 'df'
dset.imagedim_df = [imgdim, imgdim]
dset.pixelscale_df = 1.0
minX, maxX, minY, maxY = 0, 1000, 0, 1000
dset.maxdim = minX + imgdim / 2, maxY - imgdim / 2, maxX - imgdim / 2, minY + imgdim / 2
desiredFraction = 0.1
methods = self.totalResults._get_methods_for_fraction(dset, desiredFraction)
possibleRandomMethods = 2
possibleCrossBoxMethods = 2
possibleSpiralBoxMethods = 3
possibleChemometricMethods = 1
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods
self.assertEqual(len(methods), totalPossible)
self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, gmeth.CrossBoxSubSampling(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, gmeth.SpiralBoxSubsampling(dset, desiredFraction)))
desiredFraction = 0.5
methods = self.totalResults._get_methods_for_fraction(dset, desiredFraction)
possibleRandomMethods = 2
possibleCrossBoxMethods = 1
possibleSpiralBoxMethods = 0
possibleChemometricMethods = 1
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods
self.assertEqual(len(methods), totalPossible)
self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, gmeth.CrossBoxSubSampling(dset, desiredFraction)))
self.assertFalse(containsMethod(methods, gmeth.SpiralBoxSubsampling(dset, desiredFraction)))
desiredFraction = 0.9
methods = self.totalResults._get_methods_for_fraction(dset, desiredFraction)
possibleRandomMethods = 2
possibleCrossBoxMethods = 0
possibleSpiralBoxMethods = 0
possibleChemometricMethods = 1
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods
self.assertEqual(len(methods), totalPossible)
self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
self.assertFalse(containsMethod(methods, gmeth.CrossBoxSubSampling(dset, desiredFraction)))
self.assertFalse(containsMethod(methods, gmeth.SpiralBoxSubsampling(dset, desiredFraction)))
# def test_get_methods_for_fraction(self):
# def containsMethod(listOfMethods: list, template: meth.SubsamplingMethod) -> bool:
# contains: bool = False
# for method in listOfMethods:
# if type(method) == type(template) and method.fraction == template.fraction:
# contains = True
# break
# return contains
#
# dset: gepard.dataset.DataSet = gepard.dataset.DataSet('fakepath')
#
# imgdim = 10
# dset.imagescanMode = 'df'
# dset.imagedim_df = [imgdim, imgdim]
# dset.pixelscale_df = 1.0
# minX, maxX, minY, maxY = 0, 1000, 0, 1000
# dset.maxdim = minX + imgdim / 2, maxY - imgdim / 2, maxX - imgdim / 2, minY + imgdim / 2
#
# desiredFraction = 0.1
# methods = self.totalResults._get_methods_for_fraction(dset, desiredFraction)
# possibleRandomMethods = 2
# possibleCrossBoxMethods = 2
# possibleSpiralBoxMethods = 3
# possibleChemometricMethods = 1
# totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
# possibleSpiralBoxMethods + possibleChemometricMethods
# self.assertEqual(len(methods), totalPossible)
# self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
# self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
# self.assertTrue(containsMethod(methods, gmeth.CrossBoxSubSampling(dset, desiredFraction)))
# self.assertTrue(containsMethod(methods, gmeth.SpiralBoxSubsampling(dset, desiredFraction)))
#
# desiredFraction = 0.5
# methods = self.totalResults._get_methods_for_fraction(dset, desiredFraction)
# possibleRandomMethods = 2
# possibleCrossBoxMethods = 1
# possibleSpiralBoxMethods = 0
# possibleChemometricMethods = 1
# totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
# possibleSpiralBoxMethods + possibleChemometricMethods
# self.assertEqual(len(methods), totalPossible)
# self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
# self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
# self.assertTrue(containsMethod(methods, gmeth.CrossBoxSubSampling(dset, desiredFraction)))
# self.assertFalse(containsMethod(methods, gmeth.SpiralBoxSubsampling(dset, desiredFraction)))
#
# desiredFraction = 0.9
# methods = self.totalResults._get_methods_for_fraction(dset, desiredFraction)
# possibleRandomMethods = 2
# possibleCrossBoxMethods = 0
# possibleSpiralBoxMethods = 0
# possibleChemometricMethods = 1
# totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
# possibleSpiralBoxMethods + possibleChemometricMethods
# self.assertEqual(len(methods), totalPossible)
# self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
# self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
# self.assertFalse(containsMethod(methods, gmeth.CrossBoxSubSampling(dset, desiredFraction)))
# self.assertFalse(containsMethod(methods, gmeth.SpiralBoxSubsampling(dset, desiredFraction)))
def test_get_error_vs_fraction_data(self):
firstSample: SampleResult = self.totalResults.add_sample('sample1.pkl')
......@@ -274,6 +274,84 @@ class TestSampleResult(unittest.TestCase):
self.assertFalse(self.sampleResult.has_any_attribute(['water', 'sediment']))
self.assertFalse(self.sampleResult.has_any_attribute(['beach']))
def test_get_methods_to_test(self):
def containsMethod(listOfMethods: list, template: meth.SubsamplingMethod) -> bool:
contains: bool = False
for method in listOfMethods:
if type(method) == type(template) and method.fraction == template.fraction:
contains = True
break
return contains
dset: gepard.dataset.DataSet = gepard.dataset.DataSet('fakepath')
imgdim = 10
dset.imagescanMode = 'df'
dset.imagedim_df = [imgdim, imgdim]
dset.pixelscale_df = 1.0
minX, maxX, minY, maxY = 0, 1000, 0, 1000
dset.maxdim = minX + imgdim / 2, maxY - imgdim / 2, maxX - imgdim / 2, minY + imgdim / 2
desiredFraction = 0.1
methods = get_methods_to_test(dset, [desiredFraction])
possibleRandomMethods = 2
possibleCrossBoxMethods = 2
possibleSpiralBoxMethods = 3
possibleChemometricMethods = 1
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods
self.assertEqual(len(methods), totalPossible)
self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, gmeth.CrossBoxSubSampling(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, gmeth.SpiralBoxSubsampling(dset, desiredFraction)))
desiredFraction = 0.5
methods = get_methods_to_test(dset, [desiredFraction])
possibleRandomMethods = 2
possibleCrossBoxMethods = 1
possibleSpiralBoxMethods = 0
possibleChemometricMethods = 1
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods
self.assertEqual(len(methods), totalPossible)
self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, gmeth.CrossBoxSubSampling(dset, desiredFraction)))
self.assertFalse(containsMethod(methods, gmeth.SpiralBoxSubsampling(dset, desiredFraction)))
desiredFraction = 0.9
methods = get_methods_to_test(dset, [desiredFraction])
possibleRandomMethods = 2
possibleCrossBoxMethods = 0
possibleSpiralBoxMethods = 0
possibleChemometricMethods = 1
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods
self.assertEqual(len(methods), totalPossible)
self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
self.assertFalse(containsMethod(methods, gmeth.CrossBoxSubSampling(dset, desiredFraction)))
self.assertFalse(containsMethod(methods, gmeth.SpiralBoxSubsampling(dset, desiredFraction)))
desiredFractions = [0.1, 0.5]
methods = get_methods_to_test(dset, desiredFractions)
possibleRandomMethods = 4
possibleCrossBoxMethods = 3
possibleSpiralBoxMethods = 3
possibleChemometricMethods = 2
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods
self.assertEqual(len(methods), totalPossible)
for desiredFraction in desiredFractions:
self.assertTrue(containsMethod(methods, meth.RandomSampling(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, meth.SizeBinFractioning(dset, desiredFraction)))
self.assertTrue(containsMethod(methods, gmeth.CrossBoxSubSampling(dset, desiredFraction)))
if desiredFraction == 0.1:
self.assertTrue(containsMethod(methods, gmeth.SpiralBoxSubsampling(dset, desiredFraction)))
else:
self.assertFalse(containsMethod(methods, gmeth.SpiralBoxSubsampling(dset, desiredFraction)))
def test_update_result_with_methods(self):
particleContainer = get_default_ParticleContainer()
for numVariations in [1, 5, 20]:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment