Commit aec15820 authored by Josef Brandt's avatar Josef Brandt

Merge branch 'Development'

parents b02245b5 68561321
......@@ -183,7 +183,7 @@ class ChemometricSubsampling(SubsamplingMethod):
if not abs(totalPointsAdded - numPointsToSelect) <= 1:
print('error')
# assert abs(totalPointsAdded - numPointsToSelect) <= 1
assert abs(totalPointsAdded - numPointsToSelect) <= 1
for clusterIndex in pointsPerCluster.keys():
assert 0 <= pointsPerCluster[clusterIndex] <= len(labels[labels == clusterIndex])
return pointsPerCluster
......
import numpy as np
cimport numpy as np
cimport numpy.random
cimport cython
DTYPE = np.float
ctypedef np.int32_t INT32_t
cdef get_random_topleft(double maxDist, double maxAngle, double radius, double boxSize):
cdef double angle, dist, x, y
cdef np.ndarray[INT32_t, ndim=1] newTopLeft
dist = np.random.rand() * maxDist
angle = np.random.rand() * maxAngle
newTopLeft = np.empty(2, dtype=np.int32)
x = dist*np.cos(angle) + radius - boxSize/2
y = dist*np.sin(angle) + radius - boxSize/2
newTopLeft[0] = np.int32(np.round(x))
newTopLeft[1] = np.int32(np.round(y))
return newTopLeft
def get_random_topLefts(int numBoxes, double boxSize, double radius, double maxAngle, int seed=1337, int maxTries=50):
cdef np.ndarray[INT32_t, ndim=2] topLefts
cdef np.ndarray[INT32_t, ndim=1] newTopLeft
cdef double maxDist
cdef int outerCounter, counter, x, y, i, j, diffX, diffY, successfullyAdded
cdef bint validSolutionFound, boxOverlaps
np.random.seed(seed)
maxDist = radius - np.sqrt((boxSize/2)**2 + (boxSize/2)**2)
outerCounter = 0
validSolutionFound = False
while not validSolutionFound and outerCounter < maxTries:
successfullyAdded = 0
topLefts = np.empty((numBoxes, 2), dtype=np.int32)
for i in range(numBoxes):
if i == 0:
topLefts[0, :] = get_random_topleft(maxDist, maxAngle, radius, boxSize)
successfullyAdded += 1
else:
counter = 0
while counter < 50:
newTopLeft = get_random_topleft(maxDist, maxAngle, radius, boxSize)
boxOverlaps = False
for j in range(i):
diffX = abs(np.float(newTopLeft[0] - np.float(topLefts[j, 0])))
diffY = abs(np.float(newTopLeft[1] - np.float(topLefts[j, 1])))
if diffX < boxSize and diffY < boxSize:
boxOverlaps = True
break
if boxOverlaps:
counter += 1
else:
topLefts[i, :] = newTopLeft
successfullyAdded += 1
break
if successfullyAdded == numBoxes:
validSolutionFound = True
else:
outerCounter += 1
return validSolutionFound, topLefts
\ No newline at end of file
......@@ -9,10 +9,18 @@ if len(sys.argv) == 1:
sys.argv.append("build_ext")
sys.argv.append("--inplace")
ext = Extension("rotateContour", ["rotateContour.pyx"], extra_compile_args=['-O3'],)
# ext = Extension("rotateContour", ["rotateContour.pyx"], extra_compile_args=['-O3'],)
# setup(
# name="rotate contour around reference point",
# ext_modules=cythonize([ext], annotate=True), # accepts a glob pattern
# include_dirs=[np.get_include()]
# )
# ext = Extension("getRandomTopLefts", ["getRandomTopLefts.pyx"], extra_compile_args=['-O3'],)
setup(
name="rotate contour around reference point",
ext_modules=cythonize([ext], annotate=True), # accepts a glob pattern
name="get a given number of random topLefts",
ext_modules=cythonize("randoms.pyx", annotate=True), # accepts a glob pattern
include_dirs=[np.get_include()]
)
\ No newline at end of file
)
......@@ -2,23 +2,28 @@ import copy
import numpy as np
import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset
from gepard.analysis.particleContainer import ParticleContainer
from cythonModules import rotateContour
from helpers import get_filterDimensions_from_dataset, get_center_from_filter_dimensions, convert_length_to_pixels
class ParticleVariations(object):
def __init__(self, particleContainer: ParticleContainer, numVariations: int = 10) -> None:
def __init__(self, dataset: dataset.DataSet, numVariations: int = 10) -> None:
super(ParticleVariations, self).__init__()
self.origParticleContainer = particleContainer
self.dataset: dataset.DataSet = dataset
self.origParticleContainer: ParticleContainer = self.dataset.particleContainer
self.numVariations = numVariations
def get_particleContainer_variations(self) -> ParticleContainer:
if self.numVariations > 0:
offset, diameter, [width, height] = get_filterDimensions_from_dataset(self.dataset)
diameter: float = convert_length_to_pixels(self.dataset, diameter)
offset: tuple = convert_length_to_pixels(self.dataset, offset[0]), \
convert_length_to_pixels(self.dataset, offset[1])
center: np.ndarray = get_center_from_filter_dimensions(offset, diameter)
partContainer: ParticleContainer = self.origParticleContainer
contours: list = partContainer.getParticleContours()
center: tuple = round(np.mean(contours[:][0][0])),\
round(np.mean(contours[:][0][1]))
center: np.ndarray = np.array(center, dtype=np.int32)
angles = self._get_angles()
for i in range(self.numVariations):
if i > 0:
......
......@@ -9,9 +9,11 @@ import pickle
import os
import numpy as np
import matplotlib.pyplot as plt
import concurrent.futures
import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset
from gepard.analysis.particleAndMeasurement import Particle
from helpers import ParticleBinSorter
import methods as meth
......@@ -24,13 +26,51 @@ def get_name_from_directory(dirPath: str) -> str:
return str(os.path.basename(dirPath).split('.')[0])
class TotalResults(object):
# methods: list = [meth.RandomSampling, meth.SizeBinFractioning, gmeth.CrossBoxSubSampling,
# gmeth.SpiralBoxSubsampling, cmeth.ChemometricSubsampling]
# measuredFractions: list = [0.01, 0.05, 0.1, 0.15, 0.2, 0.5, 0.75, 0.9]
# measuredFractions: list = [0.1, 0.15, 0.2, 0.5, 0.75, 0.9]
measuredFractions: list = [0.1, 0.3, 0.5, 0.9]
def get_methods_to_test(dataset: dataset.DataSet, fractions: list = []) -> list:
"""
:param fraction: The desired fraction to measure
:return: list of measurement Objects that are applicable
"""
if len(fractions) == 0:
fractions: list = [0.02, 0.05, 0.1, 0.25, 0.5, 0.7, 0.9]
# fractions: list = [0.02, 0.1, 0.5, 0.9]
methods: list = []
particleContainer = dataset.particleContainer
for fraction in fractions:
methods.append(meth.RandomSampling(particleContainer, fraction))
methods.append(meth.SizeBinFractioning(particleContainer, fraction))
boxCreator: gmeth.BoxSelectionCreator = gmeth.BoxSelectionCreator(dataset)
methods += boxCreator.get_crossBoxSubsamplers_for_fraction(fraction)
methods += boxCreator.get_spiralBoxSubsamplers_for_fraction(fraction)
methods += boxCreator.get_randomBoxSubsamplers_for_fraction(fraction)
methods += boxCreator.get_randomQuarterBoxSubsamplers_for_fraction(fraction)
# methods.append(cmeth.ChemometricSubsampling(particleContainer, fraction))
return methods
def update_sample(sample, force: bool, index: int):
sample.load_dataset()
methods: list = get_methods_to_test(sample.dataset)
sample.update_result_with_methods(methods, force)
return sample, index
def is_MP_particle(particle: Particle) -> bool:
# TODO: UPDATE PATTERNS -> ARE THESE REASONABLE???
isMP: bool = False
mpPatterns = ['poly', 'rubber', 'pb', 'pr', 'pg', 'py', 'pv']
assignment = particle.getParticleAssignment()
for pattern in mpPatterns:
if assignment.lower().find(pattern) != -1:
isMP = True
break
return isMP
class TotalResults(object):
def __init__(self):
super(TotalResults, self).__init__()
self.sampleResults: list = []
......@@ -57,22 +97,28 @@ class TotalResults(object):
:param force: Wether to force an update of an already existing method.
:return:
"""
for index, sample in enumerate(self.sampleResults):
sample.load_dataset()
possibleMethods: list = []
for fraction in self.measuredFractions:
for method in self._get_methods_for_fraction(sample.dataset, fraction):
possibleMethods.append(method)
forceList: list = [force]*len(self.sampleResults)
indices: list = list(np.arange(len(self.sampleResults)))
numSamples: int = len(forceList)
numWorkers: int = 4 # in case of quadcore processor that seams reasonable??
chunksize: int = int(round(numSamples / numWorkers * 0.7)) # we want to have slightly more chunks than workers
print(f'multiprocessing with {numSamples} samples and chunksize of {chunksize}')
with concurrent.futures.ProcessPoolExecutor() as executor:
results = executor.map(update_sample, self.sampleResults, forceList, indices, chunksize=chunksize)
sample.update_result_with_methods(possibleMethods, force=force)
print(f'processed {index+1} of {len(self.sampleResults)} samples')
for index, res in enumerate(results):
updatedSample, processid = res
print(f'returned from process {processid}, iteration index {index}')
self.sampleResults[index] = updatedSample
def get_error_vs_fraction_data(self, attributes: list = [], methods: list = []) -> dict:
"""
Returns Dict: Key: Method Label, Value: (Dict: Key:Measured Fraction, Value: averaged MPCountError over all samples)
:param attributes: A list of attributes that should be used for filtering the samples. Only samples with an
attribute from within that list are considered.
:return:
:return: Dict: Key: Method Label,
Value: {Dict: Key:Measured Fraction, Value: Tuple (averaged MPCountError, StDev MPCountError) over all samples}
"""
result: dict = {}
for sample in self.sampleResults:
......@@ -85,49 +131,40 @@ class TotalResults(object):
label: str = method.label
frac: float = method.fraction
error: float = res.mpCountError
stdev: float = res.mpCountErrorStDev
if label not in result.keys():
result[label] = {frac: [error]}
result[label] = {frac: [(error, stdev)]}
elif frac not in result[label].keys():
result[label][frac] = [error]
result[label][frac] = [(error, stdev)]
else:
result[label][frac].append(error)
result[label][frac].append((error, stdev))
for method in result.keys():
methodRes: dict = result[method]
for fraction in methodRes.keys():
methodRes[fraction] = np.mean(methodRes[fraction])
meanError = np.mean([i[0] for i in methodRes[fraction]])
meanStd = np.mean([i[1] for i in methodRes[fraction]])
methodRes[fraction] = (meanError, meanStd)
return result
def _get_methods_for_fraction(self, dataset: dataset.DataSet, fraction: float) -> list:
"""
:param fraction: The desired fraction to measure
:return: list of measurement Objects that are applicable
"""
particleContainer = dataset.particleContainer
methods: list = [meth.RandomSampling(particleContainer, fraction),
meth.SizeBinFractioning(particleContainer, fraction)]
boxCreator: gmeth.BoxSelectionCreator = gmeth.BoxSelectionCreator(dataset)
methods += boxCreator.get_crossBoxSubsamplers_for_fraction(fraction)
methods += boxCreator.get_spiralBoxSubsamplers_for_fraction(fraction)
methods.append(cmeth.ChemometricSubsampling(particleContainer, fraction))
return methods
class SubsamplingResult(object):
"""
Stores all interesting results from a subsampling experiment
"""
# TODO: UPDATE PATTERNS -> ARE THESE REASONABLE???
mpPatterns = ['poly', 'rubber', 'pb', 'pr', 'pg', 'py', 'pv']
# # # TODO: UPDATE PATTERNS -> ARE THESE REASONABLE???
# mpPatterns = ['poly', 'rubber', 'pb', 'pr', 'pg', 'py', 'pv']
def __init__(self, subsamplingMethod: meth.SubsamplingMethod):
super(SubsamplingResult, self).__init__()
self.method: meth.SubsamplingMethod = subsamplingMethod
self.mpCountErrors: list = []
# self.origParticleCount: int = None
# self.subSampledParticleCount: int = None
self.origParticleCount: int = 0
self.subSampledParticleCount: int = 0
self.origMPCount: int = 0
self.estimMPCounts: list = []
# self.mpCountErrorPerBin: tuple = None
@property
......@@ -137,12 +174,24 @@ class SubsamplingResult(object):
error = float(np.mean(self.mpCountErrors))
return error
@property
def mpCountErrorStDev(self) -> float:
stdev: float = 0.0
if len(self.mpCountErrors) > 0:
stdev = float(np.std(self.mpCountErrors))
return stdev
@property
def estimMPCount(self) -> float:
return float(np.mean(self.estimMPCounts))
def reset_results(self) -> None:
"""
Deletes all results
:return:
"""
self.mpCountErrors = []
self.estimMPCounts = []
def add_result(self, origParticles: list, subParticles: list) -> None:
"""
......@@ -151,7 +200,9 @@ class SubsamplingResult(object):
:param subParticles:
:return:
"""
self.mpCountErrors.append(self._get_mp_count_error(origParticles, subParticles, self.method.fraction))
error: float = self._get_mp_count_error(origParticles, subParticles, self.method.fraction)
self.origParticleCount = len(origParticles)
self.mpCountErrors.append(error)
def _get_mp_count_error_per_bin(self, allParticles: list, subParticles: list, fractionMeasured: float) -> tuple:
binSorter = ParticleBinSorter()
......@@ -164,7 +215,9 @@ class SubsamplingResult(object):
def _get_mp_count_error(self, allParticles: list, subParticles: list, fractionMeasured: float) -> float:
numMPOrig = self._get_number_of_MP_particles(allParticles)
self.origMPCount = numMPOrig
numMPEstimate = self._get_number_of_MP_particles(subParticles) / fractionMeasured
self.estimMPCounts.append(numMPEstimate)
if numMPOrig != 0:
mpCountError = self._get_error_from_values(numMPOrig, numMPEstimate)
......@@ -182,19 +235,16 @@ class SubsamplingResult(object):
def _get_number_of_MP_particles(self, particleList: list) -> int:
numMPParticles = 0
for particle in particleList:
assignment = particle.getParticleAssignment()
for pattern in self.mpPatterns:
if assignment.lower().find(pattern) != -1:
numMPParticles += 1
break
if is_MP_particle(particle):
numMPParticles += 1
return numMPParticles
class SampleResult(object):
"""
An object the actually stores all generated results per sample and can update and report on them.
An object the stores all generated results per sample and can update and report on them.
"""
def __init__(self, filepath: str, numVariations: int = 10):
def __init__(self, filepath: str, numVariations: int = 5):
super(SampleResult, self).__init__()
self.filepath: str = filepath
self.dataset: dataset.DataSet = None
......@@ -213,7 +263,7 @@ class SampleResult(object):
def update_result_with_methods(self, methods: list, force: bool = False) -> list:
"""
Updates result with the given method (contains desiredFraction already)
:param method: The SubsamplingMethod Object
:param methods: List of the SubsamplingMethod Objects to use
:param force: Wether to force an update. If False, the result is not updated, if it is already present.
:return: list of updated methods
"""
......@@ -221,8 +271,7 @@ class SampleResult(object):
self.load_dataset()
updatedMethods: list = []
particleVariations: ParticleVariations = ParticleVariations(self.dataset.particleContainer,
numVariations=self.numVariations)
particleVariations: ParticleVariations = ParticleVariations(self.dataset, numVariations=self.numVariations)
needsToBeUpdated: dict = {method: False for method in methods}
......@@ -247,8 +296,8 @@ class SampleResult(object):
result.add_result(method.particleContainer.particles, subParticles)
if method not in updatedMethods:
updatedMethods.append(method)
print(f'updated {self.sampleName} with {method.label} at fraction {method.fraction}, '
f'iteration {index+1}')
# print(f'updated {self.sampleName} with {method.label} at fraction {method.fraction}, '
# f'iteration {index+1}')
return updatedMethods
......@@ -295,9 +344,3 @@ class SampleResult(object):
requestedResult = result
break
return requestedResult
# def _get_result_of_method(self, method: meth.SubsamplingMethod) -> SubsamplingResult:
# return None
import numpy as np
from itertools import combinations
from methods import SubsamplingMethod
from copy import deepcopy
import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset
import helpers
from cythonModules import randoms
def box_overlaps_other_box(topLeft1: list, topLeft2: list, boxSize: float) -> bool:
"""
Returns true if the two specified boxes overlap
:param topLeft1:
:param topLeft2:
:param boxSize:
:return:
"""
return abs(topLeft1[0] - topLeft2[0]) < boxSize and abs(topLeft1[1] - topLeft2[1]) < boxSize
class BoxSelectionSubsamplingMethod(SubsamplingMethod):
possibleBoxNumbers: list = [7, 10, 15]
def __init__(self, *args):
super(BoxSelectionSubsamplingMethod, self).__init__(*args)
self.filterDiameter: float = 500
self.offset: tuple = (0, 0)
self.numBoxes: int = 1
self.maxFractions: dict = {}
@property
def label(self) -> str:
......@@ -22,6 +38,16 @@ class BoxSelectionSubsamplingMethod(SubsamplingMethod):
def filterArea(self) -> float:
return np.pi * (self.filterDiameter / 2) ** 2
@property
def boxSize(self) -> float:
totalBoxArea: float = self.filterArea * self.fraction
boxArea: float = totalBoxArea / self.numBoxes
return boxArea ** 0.5
@property
def noBoxOverlap(self) -> bool:
return not self._boxes_are_overlapping(self.get_topLeft_of_boxes())
def apply_subsampling_method(self) -> list:
def distanceToCnt(topleft: tuple):
return abs(topleft[0] - cntStart[0]) + abs(topleft[1] - cntStart[1])
......@@ -37,7 +63,6 @@ class BoxSelectionSubsamplingMethod(SubsamplingMethod):
for topLeftXY in sortedTopLefts:
if helpers.box_overlaps_contour(topLeftXY, boxWidthHeight, particle.contour):
subParticles.append(particle)
break
return subParticles
......@@ -55,8 +80,48 @@ class BoxSelectionSubsamplingMethod(SubsamplingMethod):
newTopLefts.append((topLeft[0] + self.offset[0], topLeft[1] + self.offset[1]))
return newTopLefts
def equals(self, otherMethod) -> bool:
raise NotImplementedError
def _boxes_are_overlapping(self, topLefts: list) -> bool:
"""
Calculates if there is any overlap of the boxes
:return:
"""
overlaps: bool = False
boxSize = self.boxSize
for topLeft1, topLeft2 in combinations(topLefts, 2):
if box_overlaps_other_box(topLeft1, topLeft2, boxSize):
overlaps = True
break
return overlaps
def _get_max_distance_of_box_to_center(self, boxCenter: tuple, center: tuple = (0, 0)) -> float:
"""
Calculates the maximal distance of all point in a box to the given center
:param boxCenter:
:param center:
:return:
"""
center = np.array(center)
boxSize = self.boxSize
coords: np.ndarray = np.array([[boxCenter[0] - 0.5 * boxSize, boxCenter[1] - 0.5 * boxSize],
[boxCenter[0] + 0.5 * boxSize, boxCenter[1] - 0.5 * boxSize],
[boxCenter[0] - 0.5 * boxSize, boxCenter[1] + 0.5 * boxSize],
[boxCenter[0] + 0.5 * boxSize, boxCenter[1] + 0.5 * boxSize]])
distances: np.ndarray = np.linalg.norm(coords - center, axis=1)
return np.max(distances)
def get_maximum_achievable_fraction(self) -> float:
if len(self.maxFractions) == 0:
self.update_max_fractions()
if self.numBoxes not in self.maxFractions.keys():
self.maxFractions[self.numBoxes] = determine_max_achievable_frac(self, self.numBoxes)
return self.maxFractions[self.numBoxes]
def update_max_fractions(self) -> None:
for boxNum in self.possibleBoxNumbers:
self.maxFractions[boxNum] = determine_max_achievable_frac(self, boxNum)
class BoxSelectionCreator(object):
......@@ -71,10 +136,7 @@ class BoxSelectionCreator(object):
:return list of CrossBoxSubsamplers:
"""
crossBoxSubsamplers = []
offset, diameter, widthHeight = helpers.get_filterDimensions_from_dataset(self.dataset)
diameter = helpers.convert_length_to_pixels(self.dataset, diameter)
offset = helpers.convert_length_to_pixels(self.dataset, offset[0]), \
helpers.convert_length_to_pixels(self.dataset, offset[1])
diameter, offset = self._get_diameter_and_offset()
for numBoxesAcross in [3, 5]:
newBoxSelector: CrossBoxSubSampling = CrossBoxSubSampling(self.dataset.particleContainer, desiredFraction)
......@@ -82,8 +144,7 @@ class BoxSelectionCreator(object):
newBoxSelector.offset = offset
newBoxSelector.numBoxesAcross = numBoxesAcross
maxFraction: float = newBoxSelector.get_maximum_achievable_fraction()
if desiredFraction <= maxFraction:
if newBoxSelector.config_is_valid():
crossBoxSubsamplers.append(newBoxSelector)
return crossBoxSubsamplers
......@@ -95,10 +156,7 @@ class BoxSelectionCreator(object):
:return list of SpiralBoxSelectors:
"""
spiralBoxSubsamplers = []
offset, diameter, widthHeight = helpers.get_filterDimensions_from_dataset(self.dataset)
diameter = helpers.convert_length_to_pixels(self.dataset, diameter)
offset = helpers.convert_length_to_pixels(self.dataset, offset[0]), \
helpers.convert_length_to_pixels(self.dataset, offset[1])
diameter, offset = self._get_diameter_and_offset()
for numBoxes in SpiralBoxSubsampling.possibleBoxNumbers:
newBoxSelector: SpiralBoxSubsampling = SpiralBoxSubsampling(self.dataset.particleContainer, desiredFraction)
......@@ -106,11 +164,52 @@ class BoxSelectionCreator(object):
newBoxSelector.offset = offset
newBoxSelector.numBoxes = numBoxes
if newBoxSelector.noBoxOverlap:
if newBoxSelector.config_is_valid():
spiralBoxSubsamplers.append(newBoxSelector)
return spiralBoxSubsamplers
def get_randomBoxSubsamplers_for_fraction(self, desiredFraction: float) -> list:
randomBoxSamplers: list = []
diameter, offset = self._get_diameter_and_offset()
randomBoxSampler: RandomBoxSampling = RandomBoxSampling(None, desiredFraction)
randomBoxSampler.update_max_fractions()
for numBoxes in randomBoxSampler.possibleBoxNumbers:
randomBoxSampler.numBoxes = numBoxes
if randomBoxSampler.config_is_valid():
newSampler: RandomBoxSampling = deepcopy(randomBoxSampler)
newSampler.particleContainer = self.dataset.particleContainer
newSampler.filterDiameter = diameter
newSampler.offset = offset
randomBoxSamplers.append(newSampler)
return randomBoxSamplers
def get_randomQuarterBoxSubsamplers_for_fraction(self, desiredFraction: float) -> list:
randomBoxSamplers: list = []
diameter, offset = self._get_diameter_and_offset()
randomBoxSampler: RandomQuarterBoxes = RandomQuarterBoxes(None, desiredFraction)
randomBoxSampler.update_max_fractions()
for numBoxes in randomBoxSampler.possibleBoxNumbers:
randomBoxSampler.numBoxes = numBoxes
if randomBoxSampler.config_is_valid():
newSampler: RandomBoxSampling = deepcopy(randomBoxSampler)
newSampler.particleContainer = self.dataset.particleContainer
newSampler.filterDiameter = diameter
newSampler.offset = offset
randomBoxSamplers.append(newSampler)
return randomBoxSamplers
def _get_diameter_and_offset(self) -> tuple:
offset, diameter, widthHeight = helpers.get_filterDimensions_from_dataset(self.dataset)