...
 
Commits (2)
......@@ -20,3 +20,7 @@ cythonModules/build/
chemometrics/Assignments.txt
chemometrics/Data.txt
chemometrics/Assignments_all.txt
chemometrics/Data_all.txt
......@@ -5,7 +5,7 @@ from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN
# from scipy import spatial
# from itertools import combinations
from random import sample
from random import sample, random
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
......@@ -19,7 +19,6 @@ from gepard.helperfunctions import cv2imread_fix
from methods import SubsamplingMethod
from helpers import timingDecorator
def get_pca(data: np.ndarray, numComp: int = 2) -> np.ndarray:
try:
standardizedData = StandardScaler().fit_transform(data.copy())
......@@ -224,7 +223,7 @@ def get_curvature_ft(contour: np.ndarray, angularSegment: float = 20, numHarmoni
tan_before: float = 0
if x - x_before != 0:
tan_before = np.rad2deg(np.arctan((y-y_before) / (x-x_before)))
tanr_before = np.rad2deg(np.arctan((y-y_before) / (x-x_before)))
tan_after: float = 0
if x_after - x != 0:
......@@ -252,12 +251,18 @@ def get_curvature_ft(contour: np.ndarray, angularSegment: float = 20, numHarmoni
class TrainedSubsampling(SubsamplingMethod):
def __init__(self, particleContainer: ParticleContainer, desiredFraction: float,
path: str = r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.79.pkl'):
path: str = r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.79.pkl',
fakeScore: float = 0.8):
super(TrainedSubsampling, self).__init__(particleContainer, desiredFraction)
self.score: float = None
self.clf = None
self.clfPath: str = path
self.fraction = desiredFraction
self.fakeClassifier: bool = True
self.fakeScore: float = fakeScore
self.fractionForExtrapolation: float = 0.0
self.predictedMPIndices: list = []
self._predict_MP_Indices()
def equals(self, otherMethod) -> bool:
isEqual: bool = False
......@@ -266,15 +271,18 @@ class TrainedSubsampling(SubsamplingMethod):
isEqual = True
return isEqual
@property
def label(self) -> str:
return 'Trained Random Sampling'
def get_maximum_achievable_fraction(self) -> float:
return 1.0
label: str = 'Dummy Trained Random Sampling'
if self.fakeClassifier:
label += f' (score {self.fakeScore})'
else:
label += f' (score {self.score})'
return label
def apply_subsampling_method(self) -> list:
def _predict_MP_Indices(self) -> None:
from evaluation import is_MP_particle
if not self.fakeClassifier:
self._load_classifier()
fullimgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages'
dsetname: str = self.particleContainer.datasetParent.name
......@@ -283,12 +291,43 @@ class TrainedSubsampling(SubsamplingMethod):
features: np.ndarray = get_particle_featurematrix(self.particleContainer, fullimg)
predictions: np.ndarray = self.clf.predict(features)
indicesToSelect: set = self._get_measure_indices(list(predictions))
else:
self.score = self.fakeScore
particles: list = self.particleContainer.particles
predictions: np.ndarray = np.zeros(len(particles))
for index, particle in enumerate(particles):
if is_MP_particle(particle):
if random() <= self.fakeScore:
predictions[index] = 1
else:
if random() > self.fakeScore:
predictions[index] = 1
mpIndices = list(np.where(predictions == 1)[0])
nonMPIndices = list(np.where(predictions == 0)[0])
numNonMPIndices = len(nonMPIndices)
fracNonMPToTake: float = float(np.clip(-0.5 + 1/0.1 * self.fraction, 0.0, 1.0))
numNonMPToTake: int = int(round(fracNonMPToTake * numNonMPIndices))
self.predictedMPIndices = mpIndices + sample(nonMPIndices, numNonMPToTake)
def get_maximum_achievable_fraction(self) -> float:
maxFrac: float = 0.10
return maxFrac
def apply_subsampling_method(self) -> list:
numParticlesToSelect = round(len(self.particleContainer.particles) * self.fraction)
if numParticlesToSelect > len(self.predictedMPIndices):
numParticlesToSelect = len(self.predictedMPIndices)
indicesToSelect = sample(self.predictedMPIndices, numParticlesToSelect)
selectedParticles: list = []
for particle in self.particleContainer.particles:
if particle.index in indicesToSelect:
selectedParticles.append(particle)
for index in indicesToSelect:
selectedParticles.append(self.particleContainer.getParticleOfIndex(index))
numOrigParticles = len(self.particleContainer.particles)
numEnhancedParticles = len(self.predictedMPIndices)
self.fractionForExtrapolation = self.fraction * (numOrigParticles/numEnhancedParticles)
return selectedParticles
def _load_classifier(self) -> None:
......@@ -306,12 +345,12 @@ class TrainedSubsampling(SubsamplingMethod):
mpIndices: list = list(np.where(assignments == 1)[0])
nonMpIndices: list = list(np.where(assignments == 0)[0])
numEstimMPParticles: int = len(mpIndices)
numPredictedMP: int = len(mpIndices)
numParticlesToMeasure = round(len(predictedAssignments) * self.fraction)
if numParticlesToMeasure <= numEstimMPParticles:
if numParticlesToMeasure <= numPredictedMP:
indicesToMeasure = set(sample(mpIndices, numParticlesToMeasure))
else:
remainingIndices: int = int(numParticlesToMeasure - numEstimMPParticles)
remainingIndices: int = int(numParticlesToMeasure - numPredictedMP)
indicesToMeasure = set(mpIndices + sample(nonMpIndices, remainingIndices))
assert len(indicesToMeasure) == numParticlesToMeasure
......@@ -324,11 +363,18 @@ class TrainedSubsampling(SubsamplingMethod):
It is used for extrapolating the mpCount of the subsampled particle list.
:return:
"""
score: float = self.score
diff: float = 1/self.fraction - 1 # i.e., from 50 % score to 100 % score
factor: float = 1 + (1 - score)/0.5 * diff
return 1 / factor
# return self.fraction
# score: float = self.score
# diff: float = 1/self.fraction - 1 # i.e., from 50 % score to 100 % score
# factor: float = 1 + (1 - score)/0.5 * diff
# pow: float = (1-self.fakeScore)
# theoreticFactor: float = (1/factor**pow)
# print('actual fraction, theor. factor is', self.fraction, theoreticFactor)
# return theoreticFactor
return self.fractionForExtrapolation
# class ChemometricSubsampling(SubsamplingMethod):
......@@ -464,3 +510,27 @@ class TrainedSubsampling(SubsamplingMethod):
# for clusterIndex in pointsPerCluster.keys():
# assert 0 <= pointsPerCluster[clusterIndex] <= len(labels[labels == clusterIndex])
# return pointsPerCluster
if __name__ == '__main__':
import matplotlib.pyplot as plt
fractions: np.ndarray = np.linspace(0.01, 1, 100)
scores: np.ndarray = np.linspace(0.5, 1.0, 5)
plt.clf()
for score in scores:
# if score == 0.5:
# theorFractions = fractions
# a, b, n = 1, 1, 1.5
# data1 = a * fractions**n / (fractions**n + b)
# data1 -= data1.min()
# data1 /= data1.max()
# theorFactors = 0.5 + 0.5*data1
theorFractions = []
for frac in fractions:
diff: float = 1 / frac - 1 # i.e., from 50 % score to 100 % score
factor: float = 1 + (1 - score) / 0.5 * diff
theorFractions.append(1/factor**0.2)
plt.plot(fractions, theorFractions, label=str(score))
plt.legend()
plt.show()
\ No newline at end of file
import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard.helperfunctions import cv2imread_fix
from gepard.dataset import loadData
from gepard.dataset import DataSet,loadData
from gepard.analysis.particleContainer import ParticleContainer
import cv2
import numpy as np
......@@ -9,43 +9,73 @@ from scipy import spatial
import os
import matplotlib.pyplot as plt
from helpers import get_filterDimensions_from_dataset, get_center_from_filter_dimensions, convert_length_to_pixels
from graphs import get_distance_point_histogramdata
# from evaluation import is_MP_particle
import evaluation
def get_particle_patchiness(dataset: DataSet, numCells: int = 50, onlyMP=False) -> float:
offset, diameter, [width, height] = get_filterDimensions_from_dataset(dataset)
center: np.ndarray = get_center_from_filter_dimensions(offset, diameter)
width: float = convert_length_to_pixels(dataset, width)
height: float = convert_length_to_pixels(dataset, height)
pixelsPerTile: int = max(int(round(width/numCells)), int(round(height/numCells)))
centerX: int = int(round(convert_length_to_pixels(dataset, center[0] / pixelsPerTile)))
centerY: int = int(round(convert_length_to_pixels(dataset, center[1] / pixelsPerTile)))
radius: int = int(round(convert_length_to_pixels(dataset, diameter / pixelsPerTile * 0.5)))
numRows: int = int(np.ceil(height / pixelsPerTile)) + 1
numCols: int = int(np.ceil(width / pixelsPerTile)) + 1
partCount: int = 0
densityImage: np.ndarray = np.zeros((numRows, numCols))
for particle in dataset.particleContainer.particles:
if (onlyMP and evaluation.is_MP_particle(particle)) or not onlyMP:
particleCenter: tuple = np.mean(particle.contour[:, 0, 0]), np.mean(particle.contour[:, 0, 1])
row: int = int(round(particleCenter[1] / pixelsPerTile))
col: int = int(round(particleCenter[0] / pixelsPerTile))
densityImage[row, col] += 1
partCount += 1
mask: np.ndarray = np.zeros_like(densityImage)
cv2.circle(mask, (centerY, centerX), radius, 1, -1)
relevantData: np.ndarray = densityImage[mask > 0]
mean: np.ndarray = np.round(np.mean(relevantData), 2)
std: np.ndarray = np.round(np.std(relevantData), 2)
ratio: float = round(std/mean, 2)
# plt.imshow(densityImage)
# plt.title(f'MP particle count: {partCount},\ndensity mean: {mean}, density std: {std},\npatchiness = {ratio}')
# plt.axis('off')
# plt.tight_layout()
# plt.show()
# print(f'sample: {dataset.name}, mean: {mean}, std: {std}, ratio = {ratio}')
return ratio
if __name__ == '__main__':
# imgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages'
# imgname: str = '181120_MCI_2_ds1+2_all_ kleiner500_10_1.tif'
# imgname: str = '190619_5_PTPH_sld_190321_ds1_50_1_neu.tif'
#
#191213_P190814_TPHZ_ds1_50_1
# img: np.ndarray = cv2imread_fix(os.path.join(imgpath, imgname))
# gray: np.ndarray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# _, binimg = cv2.threshold(gray, 20, 1, cv2.THRESH_BINARY_INV)
# distmap: np.ndarray = cv2.distanceTransform(binimg, cv2.DIST_L1, 3)
# plt.imshow(distmap, cmap='gray')
paths: list = [r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\181120_MCI_2_ds1+2_all_ kleiner500_10_1.pkl',
r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\wastewater, slush\190619_5_PTPH_sld_190321_ds1_50_1_neu.pkl']
# paths.append(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\190306_MCII_1_2_50.pkl')
# paths.append(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\190222_MCII_1_1_50_1.pkl')
r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\wastewater, water\191213_P190814_TPHZ_ds1_50_1.pkl',
r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\Air\191119_RW6_Solling_50_2_neu.pkl']
distances: list = []
allParticles = []
for path in paths:
dset = loadData(path)
particleContainer: ParticleContainer = dset.particleContainer
# particleCenters: list = []
# for particle in particleContainer.particles:
# particleCenters.append([np.mean(particle.contour[:, 0, 0]), np.mean(particle.contour[:, 0, 1])])
#
# closest_particle_distances: np.ndarray = np.zeros(len(particleCenters))
# particleCenters: np.ndarray = np.array(particleCenters)
# print('particle centers done')
# distMat: np.ndarray = spatial.distance_matrix(particleCenters, particleCenters)
# print('distmat computed')
# for i in range(distMat.shape[0]):
# if i == 0:
# closest_particle_distances[i] = np.min(distMat[i, 1:])
# elif i == distMat.shape[0]-1:
# closest_particle_distances[i] = np.min(distMat[i, :-1])
# else:
# closest_particle_distances[i] = np.min([np.min(distMat[i, :i]), np.min(distMat[i, i+1:])])
# distances.append(closest_particle_distances)
# plt.boxplot(distances)
get_particle_patchiness(dset, 50, onlyMP=True)
for particle in dset.particleContainer.particles:
if evaluation.is_MP_particle(particle):
allParticles.append(particle.getParticleAssignment())
print(set(allParticles))
\ No newline at end of file
import numpy as np
cimport numpy as np
cimport cython
from cython cimport boundscheck, wraparound
@boundscheck(False)
@wraparound(False)
cdef bint box_overlaps_contour(unsigned int[:] boxTopLeftXY, unsigned int boxSize, unsigned int[:, :, :] contourData):
cdef bint isOverlapping = False
cdef unsigned int xmin, xmax, width, boxXmin, boxXmax, ymin, ymax, height, boxYmin, boxYmax
xmin = np.min(contourData[:, 0, 1])
xmax = np.max(contourData[:, 0, 1])
width = xmax - xmin
boxXmin = boxTopLeftXY[0]
boxXmax = boxTopLeftXY[0] + boxSize
if xmin > (boxXmin-width/2):
if xmax < (boxXmax+width/2):
ymin = np.min(contourData[:, 0, 1])
ymax = np.max(contourData[:, 0, 1])
height = ymax - ymin
boxYmin = boxTopLeftXY[1]
boxYmax = boxTopLeftXY[1] + boxSize
if ymin > (boxYmin-height/2):
if ymax < (boxYmax+width/2):
isOverlapping = True
return isOverlapping
@boundscheck(False)
@wraparound(False)
def def_get_indices_of_overlapping_particles(contours not None, unsigned int[:, :] topLefts, unsigned int boxSize):
cdef Py_ssize_t i, j
cdef unsigned int counter, numParticles, numTopLefts
numParticles = len(contours)
numTopLefts = topLefts.shape[0]
cdef unsigned int[:] overlappingIndices = np.zeros(numParticles, dtype=np.uint32)
cdef unsigned int[:, :, :] currentContour
counter = 0
for i in range(numParticles):
currentContour = contours[i]
for j in range(numTopLefts):
if box_overlaps_contour(topLefts[j, :], boxSize, currentContour):
overlappingIndices[counter] = i
counter += 1
break
return overlappingIndices[:counter]
......@@ -30,7 +30,7 @@ def get_random_topLefts(int numBoxes, double boxSize, double radius, double maxA
cdef int outerCounter, counter, x, y, i, j, diffX, diffY, successfullyAdded
cdef bint validSolutionFound, boxOverlaps
srand(42) # setting seed
srand(seed) # setting seed
assert RAND_MAX == 32767 # this value is used in the random-function above. For performance-reasons, it is directly typed in there as a number
maxDist = radius - np.sqrt((boxSize/2)**2 + (boxSize/2)**2)
outerCounter = 0
......
......@@ -19,8 +19,14 @@ if len(sys.argv) == 1:
# ext = Extension("getRandomTopLefts", ["getRandomTopLefts.pyx"], extra_compile_args=['-O3'],)
# setup(
# name="get a given number of random topLefts",
# ext_modules=cythonize("randoms.pyx", annotate=True), # accepts a glob pattern
# include_dirs=[np.get_include()]
# )
setup(
name="get a given number of random topLefts",
ext_modules=cythonize("randoms.pyx", annotate=True), # accepts a glob pattern
name="checks which particle contours overlap the boxes",
ext_modules=cythonize("particleBoxOverlap.pyx", annotate=True), # accepts a glob pattern
include_dirs=[np.get_include()]
)
\ No newline at end of file
......@@ -8,6 +8,7 @@ Created on Wed Jan 22 13:57:28 2020
# import pickle
import os
import numpy as np
import time
# import matplotlib.pyplot as plt
import concurrent.futures
import operator
......@@ -20,6 +21,7 @@ from helpers import ParticleBinSorter
import methods as meth
import geometricMethods as gmeth
from chemometrics import chemometricMethods as cmeth
from chemometrics.imageOperations import get_particle_patchiness
from datasetOperations import ParticleVariations
......@@ -33,8 +35,9 @@ def get_methods_to_test(dataset: dataset.DataSet, fractions: list = [], maxTries
:return: list of measurement Objects that are applicable
"""
if len(fractions) == 0:
fractions: list = [0.02, 0.05, 0.1, 0.25, 0.5, 0.7, 0.9]
# fractions: list = [0.1, 0.3, 0.5]
fractions: list = [0.02, 0.03, 0.05, 0.1, 0.25, 0.5, 0.7, 0.9]
# fractions: list = [0.02, 0.06, 0.15, 0.2, 0.5]
# fractions: list = [0.01, 0.1, 0.5, 0.9]
methods: list = []
particleContainer = dataset.particleContainer
......@@ -47,15 +50,19 @@ def get_methods_to_test(dataset: dataset.DataSet, fractions: list = [], maxTries
methods += boxCreator.get_spiralBoxSubsamplers_for_fraction(fraction)
methods += boxCreator.get_randomBoxSubsamplers_for_fraction(fraction, maxTries=maxTries)
methods += boxCreator.get_randomQuarterBoxSubsamplers_for_fraction(fraction, maxTries=maxTries)
# methods.append(cmeth.TrainedSubsampling(particleContainer, fraction))
# methods.append(cmeth.ChemometricSubsampling(particleContainer, fraction))
for fakeScore in [0.6, 0.7, 0.8]:
trainedSampling = cmeth.TrainedSubsampling(particleContainer, fraction, fakeScore=fakeScore)
if trainedSampling.config_is_valid():
methods.append(trainedSampling)
return methods
def update_sample(sample, force: bool, index: int):
sample.load_dataset()
t0 = time.time()
methods: list = get_methods_to_test(sample.dataset)
print('getting methods for sample', sample.dataset.name, 'took', round(time.time()-t0, 2), 'seconds')
sample.update_result_with_methods(methods, force)
return sample, index
......@@ -131,19 +138,38 @@ class TotalResults(object):
self.sampleResults[index] = updatedResult
print(f'done updating {updatedResult.dataset.name} at index {index}')
def get_error_vs_fraction_data(self, attributes: list = [], methods: list = []) -> dict:
def get_error_vs_fraction_data(self, attributes: list = [], methods: list = [], partCount: tuple = (0, np.inf)) -> tuple:
"""
:param attributes: A list of attributes that should be used for filtering the samples. Only samples with an
attribute from within that list are considered.
:param methods: A list of methods to extract
:param patchiness: Tuple, first val: min patchiness, second val, max patchiness
:return: Dict: Key: Method Label,
Value: {Dict: Key:Measured Fraction, Value: Tuple (averaged MPCountError, StDev MPCountError) over all samples}
"""
result: dict = {}
allParticles: list = []
numSamples: int = 0
for sample in self.sampleResults:
sample: SampleResult = sample
if attributes == [] or sample.has_any_attribute(attributes):
if partCount[0] == 0 and partCount[1] == np.inf:
samplePartCount: int = 1
# samplePatchiness: float = 1.0 # doesn't matter in this case
else:
if sample.dataset is None:
sample.load_dataset()
samplePartCount: int = len(sample.dataset.particleContainer.particles)
# samplePatchiness: float = sample.get_patchiness()
# print(sample.sampleName, samplePatchiness)
if partCount[0] <= samplePartCount < partCount[1]:
numSamples += 1
if sample.dataset is None:
sample.load_dataset()
for particle in sample.dataset.particleContainer.particles:
allParticles.append(particle)
for res in sample.results:
res: SubsamplingResult = res
method: meth.SubsamplingMethod = res.method
......@@ -160,6 +186,11 @@ class TotalResults(object):
else:
result[label][frac].append((error, stdev))
numMPParticles: float = get_number_of_MP_particles(allParticles)
stats: dict = {'numSamples': numSamples,
'meanParticleCount': round(len(allParticles) / numSamples),
'meanMPFrac': round(numMPParticles / len(allParticles) * 100, 1)}
for method in result.keys():
methodRes: dict = result[method]
for fraction in methodRes.keys():
......@@ -167,7 +198,7 @@ class TotalResults(object):
meanStd = np.mean([i[1] for i in methodRes[fraction]])
methodRes[fraction] = (meanError, meanStd)
return numSamples, result
return stats, result
class SubsamplingResult(object):
......@@ -262,13 +293,14 @@ class SampleResult(object):
"""
An object the stores all generated results per sample and can update and report on them.
"""
def __init__(self, filepath: str, numVariations: int = 10):
def __init__(self, filepath: str, numVariations: int = 20):
super(SampleResult, self).__init__()
self.filepath: str = filepath
self.dataset: dataset.DataSet = None
self.results: list = []
self.attributes: list = []
self.numVariations: int = numVariations # how often the sample is altered for each method
self.patchiness: float = -1
@property
def sampleName(self) -> str:
......@@ -278,6 +310,13 @@ class SampleResult(object):
self.dataset = dataset.loadData(self.filepath)
assert self.dataset is not None
def get_patchiness(self) -> float:
if not hasattr(self, "patchiness") or self.patchiness == -1:
if self.dataset is None:
self.load_dataset()
self.patchiness = get_particle_patchiness(self.dataset)
return self.patchiness
def update_result_with_methods(self, methods: list, force: bool = False) -> list:
"""
Updates result with the given method (contains desiredFraction already)
......@@ -289,8 +328,8 @@ class SampleResult(object):
self.load_dataset()
updatedMethods: list = []
t0 = time.time()
particleVariations: ParticleVariations = ParticleVariations(self.dataset, numVariations=self.numVariations)
needsToBeUpdated: dict = {method: False for method in methods}
for index, particleContainer in enumerate(particleVariations.get_particleContainer_variations()):
......@@ -312,11 +351,10 @@ class SampleResult(object):
if needsToBeUpdated[method]:
subParticles = method.apply_subsampling_method()
result.add_result(method.particleContainer.particles, subParticles)
if method not in updatedMethods:
updatedMethods.append(method)
# print(f'updated {self.sampleName} with {method.label} at fraction {method.fraction}, '
# f'iteration {index+1}')
# f'iteration {index+1}, took {round(time.time()-t0, 2)}, seconds')
print(f'finished updating sample {self.sampleName}, it took {round(time.time()-t0, 2)} seconds')
return updatedMethods
def set_attribute(self, newAttribute: str) -> None:
......
import time
import numpy as np
from itertools import combinations
from methods import SubsamplingMethod
......@@ -6,7 +7,7 @@ import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset
import helpers
from cythonModules import randoms
from cythonModules import randoms, particleBoxOverlap
def box_overlaps_other_box(topLeft1: list, topLeft2: list, boxSize: float) -> bool:
......@@ -53,9 +54,19 @@ class BoxSelectionSubsamplingMethod(SubsamplingMethod):
return abs(topleft[0] - cntStart[0]) + abs(topleft[1] - cntStart[1])
subParticles: list = []
topLefts: list = self.get_topLeft_of_boxes()
boxWidthHeight: tuple = (self.boxSize, self.boxSize)
topLefts: np.ndarray = np.array(self.get_topLeft_of_boxes())
cython: bool = False
if cython:
topLefts = np.round(topLefts).astype(np.uint32)
# contours: np.ndarray = np.array(self.particleContainer.getParticleContours())
contours = [cnt.astype(np.uint32) for cnt in self.particleContainer.getParticleContours()]
boxSize: np.uint32 = np.uint32(round(self.boxSize))
indices = particleBoxOverlap.def_get_indices_of_overlapping_particles(contours, topLefts, boxSize)
for index in indices:
subParticles.append(self.particleContainer.getParticleOfIndex(index))
else:
boxWidthHeight: tuple = (self.boxSize, self.boxSize)
for particle in self.particleContainer.particles:
cntStart: tuple = (particle.contour[0, 0, 0], particle.contour[0, 0, 1])
sortedTopLefts = sorted(topLefts, key=distanceToCnt)
......@@ -63,6 +74,7 @@ class BoxSelectionSubsamplingMethod(SubsamplingMethod):
for topLeftXY in sortedTopLefts:
if helpers.box_overlaps_contour(topLeftXY, boxWidthHeight, particle.contour):
subParticles.append(particle)
break
return subParticles
......@@ -263,6 +275,7 @@ class CrossBoxSubSampling(BoxSelectionSubsamplingMethod):
numBoxes: int = 2 * self.numBoxesAcross - 1
totalBoxArea: float = numBoxes * (maxBoxSize ** 2)
maxFraction: float = totalBoxArea / self.filterArea
return maxFraction
def equals(self, otherMethod) -> bool:
......@@ -386,7 +399,7 @@ class RandomBoxSampling(BoxSelectionSubsamplingMethod):
@property
def label(self) -> str:
return f'Boxes random layout ({self.numBoxes} boxes)'
return f'Boxes Random layout ({self.numBoxes} boxes)'
def equals(self, otherMethod) -> bool:
equals: bool = False
......@@ -396,7 +409,6 @@ class RandomBoxSampling(BoxSelectionSubsamplingMethod):
return equals
def get_topLeft_of_boxes(self) -> list:
valid, topLefts = randoms.get_random_topLefts(self.numBoxes, self.boxSize,
self.filterDiameter/2, self.__maxAngle,
seed=self.randomSeed, maxTries=self.maxTries)
......@@ -413,7 +425,7 @@ class RandomQuarterBoxes(RandomBoxSampling):
@property
def label(self) -> str:
return f'Boxes random layout (quarter) ({self.numBoxes} boxes)'
return f'Boxes Random layout (quarter) ({self.numBoxes} boxes)'
def determine_max_achievable_frac(method: BoxSelectionSubsamplingMethod, numBoxes: int) -> float:
......
This diff is collapsed.
......@@ -172,3 +172,14 @@ def convert_length_to_pixels(dataset: dataset.DataSet, length: float) -> float:
pixelScale: float = (dataset.pixelscale_df if imgMode == 'df' else dataset.pixelscale_bf)
length /= pixelScale
return length
if __name__ == '__main__':
counts = [2600, 14662, 9472, 16533]
mpfracs = [4.7, 0.9, 0.5, 0.5]
errorMargins = [0.2]
sigma = 0.7
for count, mpFrac in zip(counts, mpfracs):
for margin in errorMargins:
angerFrac = get_Anger_fraction(count, sigma=sigma, mpFraction=mpFrac/100, errorMargin=margin)
print(count, mpFrac, margin, angerFrac, angerFrac / count)
\ No newline at end of file
......@@ -4,11 +4,11 @@ import time
from evaluation import TotalResults, SampleResult
from input_output import get_pkls_from_directory, get_attributes_from_foldername, save_results, load_results
from graphs import get_error_vs_frac_plot, get_distance_hist_plots
from graphs import get_error_vs_frac_plot, get_distance_hist_plots, get_error_vs_mpfrac_plot
"""
IMPORTANT!!!
SET GEPARD TO EVALUATION BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF THE LEGACY CONVERTS MIGHT FAIL..
SET GEPARD TO DEVELOPMENT BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF THE LEGACY CONVERTS MIGHT FAIL..
"""
if __name__ == '__main__':
......@@ -17,7 +17,7 @@ if __name__ == '__main__':
# counter = 0
# for folder in pklsInFolders.keys():
# for samplePath in pklsInFolders[folder]:
# if counter < 100:
# if counter < 50:
# newSampleResult: SampleResult = results.add_sample(samplePath)
# for attr in get_attributes_from_foldername(folder):
# newSampleResult.set_attribute(attr)
......@@ -27,18 +27,22 @@ if __name__ == '__main__':
# results.update_all(multiprocessing=True)
# print('updating all took', time.time()-t0, 'seconds')
#
# save_results('results2.res', results)
results: TotalResults = load_results('results2.res')
#
plot: Figure = get_error_vs_frac_plot(results,
attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']],
# # attributes=[['$ mpFraction < 0.02'], ['$ mpFraction >= 0.02']],
# # methods=[['random layout (7', 'random layout (1']]*2)
methods=[['random sub', 'cross', 'layout (10']]*4)
# # methods=[['Random Subsampling', 'Sizebin']] * 2)
# # methods=[['layout (5', 'layout (10', 'layout (15', 'cross', 'random subsampling', 'sizebin']] * 2)
# plot.show()
# save_results('results5.res', results)
results: TotalResults = load_results('results4.res')
plot: Figure = get_error_vs_frac_plot(results,
# attributes=[],
# attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']],
partCounts=[(0, 2000), (2000, 5000), (5000, 10000), (10000, 40000)],
# methods=[['random subs', 'dummy']]*2,
# methods=[['random subsampling', 'random']]*2,
methods=[['layout (10', 'crossLayout (3', 'random subsampling', 'quarter) (10']] * 4,
poissonRef=False, fill=True)
plot.show()
#
# plot2: Figure = get_distance_hist_plots(results,
# attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']])
# plot2.show()
# plot3: Figure = get_error_vs_mpfrac_plot(results, attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']])
# plot3.show()
\ No newline at end of file
......@@ -18,6 +18,7 @@ def get_default_DataSet() -> DataSet:
dset.imagedim_df = [10, 10]
dset.pixelscale_df = 1.0
setMaxDim(dset, 10, 0, 10, 0, 10)
dset.particleContainer = get_default_ParticleContainer()
return dset
......
......@@ -127,8 +127,8 @@ class TestTrainedSubsampling(unittest.TestCase):
self.assertEqual(type(self.trainedSampling.label), str)
def test_load_classifier(self):
self.assertTrue(self.trainedSampling.clf is None)
self.assertTrue(self.trainedSampling.score is None)
# self.assertTrue(self.trainedSampling.clf is None)
# self.assertTrue(self.trainedSampling.score is None) # TODO: REIMPLEMENT
self.trainedSampling.clfPath = r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.72.pkl'
self.trainedSampling._load_classifier()
......@@ -160,16 +160,16 @@ class TestTrainedSubsampling(unittest.TestCase):
for index in range(numMPParticles): # all MP Particles should be measured
self.assertTrue(index in indicesToMeasure)
def test_get_theoretic_fraction(self):
for frac in [0.1, 0.3, 0.5, 0.9, 1.0]:
for score in [0.5, 0.7, 1.0]:
self.trainedSampling.fraction = frac
self.trainedSampling.score = score
score: float = self.trainedSampling.score
diff: float = 1 / self.trainedSampling.fraction - 1 # i.e., from 50 % score to 100 % score
factor: float = 1 + (1 - score) / 0.5 * diff
self.assertEqual(self.trainedSampling.get_theoretic_frac(), 1/factor)
# def test_get_theoretic_fraction(self):
# for frac in [0.1, 0.3, 0.5, 0.9, 1.0]:
# for score in [0.5, 0.7, 1.0]:
# self.trainedSampling.fraction = frac
# self.trainedSampling.score = score
#
# score: float = self.trainedSampling.score
# diff: float = 1 / self.trainedSampling.fraction - 1 # i.e., from 50 % score to 100 % score
# factor: float = 1 + (1 - score) / 0.5 * diff
# self.assertEqual(self.trainedSampling.get_theoretic_frac(), 1/factor)
# def test_make_subparticles_match_fraction(self):
......
......@@ -43,8 +43,10 @@ class TestTotalResults(unittest.TestCase):
def test_get_error_vs_fraction_data(self):
firstSample: SampleResult = self.totalResults.add_sample('sample1.pkl')
firstSample.set_attribute('to be used')
firstSample.dataset = get_default_DataSet()
secondSample: SampleResult = self.totalResults.add_sample('sample2.pkl')
secondSample.set_attribute('not to be used')
secondSample.dataset = get_default_DataSet()
firstMethod: meth.RandomSampling = meth.RandomSampling(None, 0.1)
firstResult: SubsamplingResult = SubsamplingResult(firstMethod)
......@@ -76,8 +78,8 @@ class TestTotalResults(unittest.TestCase):
firstSample.results = [firstResult, secondResult, thirdResult, thirdResult3]
secondSample.results = [firstResult, secondResult, thirdResult2, thirdResult3]
numSamples, resultDict = self.totalResults.get_error_vs_fraction_data()
self.assertEqual(numSamples, 2)
stats, resultDict = self.totalResults.get_error_vs_fraction_data()
self.assertEqual(stats['numSamples'], 2)
self.assertEqual(list(resultDict.keys()), [firstMethod.label, secondMethod.label, thirdMethod.label])
for i, key in enumerate(resultDict.keys()):
......@@ -101,8 +103,8 @@ class TestTotalResults(unittest.TestCase):
self.assertAlmostEqual(mean, 50)
self.assertAlmostEqual(stdev, 16.32993161855452)
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(attributes=['to be used'])
self.assertEqual(numSamples, 1)
stats, filteredResultDict = self.totalResults.get_error_vs_fraction_data(attributes=['to be used'])
self.assertEqual(stats['numSamples'], 1)
self.assertEqual(list(filteredResultDict.keys()), [firstMethod.label, secondMethod.label, thirdMethod.label])
for i, key in enumerate(filteredResultDict.keys()):
......@@ -129,16 +131,16 @@ class TestTotalResults(unittest.TestCase):
self.assertAlmostEqual(mean, 50)
self.assertAlmostEqual(stdev, 16.32993161855452)
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['cross'])
self.assertEqual(numSamples, 2)
stats, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['cross'])
self.assertEqual(stats['numSamples'], 2)
self.assertEqual(list(filteredResultDict.keys()), [secondMethod.label, thirdMethod.label])
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['Cross'])
self.assertEqual(numSamples, 2)
stats, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['Cross'])
self.assertEqual(stats['numSamples'], 2)
self.assertEqual(list(filteredResultDict.keys()), [secondMethod.label, thirdMethod.label])
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['random'])
self.assertEqual(numSamples, 2)
stats, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['random'])
self.assertEqual(stats['numSamples'], 2)
self.assertEqual(list(filteredResultDict.keys()), [firstMethod.label])
......@@ -279,7 +281,7 @@ class TestSampleResult(unittest.TestCase):
possibleSpiralBoxMethods = 3
possibleRandomBoxMethods = 3
possibleQuarterRandomBoxMethods = 3
possibleChemometricMethods = 0
possibleChemometricMethods = 3 # i.e., the dummy classifier methods
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods + \
possibleRandomBoxMethods + possibleQuarterRandomBoxMethods
......@@ -330,7 +332,7 @@ class TestSampleResult(unittest.TestCase):
possibleRandomMethods = 4
possibleCrossBoxMethods = 3
possibleSpiralBoxMethods = 3
possibleChemometricMethods = 0
possibleChemometricMethods = 3
possibleRandomBoxMethods = 3
possibleQuarterRandomBoxMethods = 3
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
......@@ -369,7 +371,7 @@ class TestSampleResult(unittest.TestCase):
self.assertEqual(len(result.mpCountErrors), 0) # because the added results haven't set any
updatedMethods = self.sampleResult.update_result_with_methods(methods, force=True)
self.assertEqual(len(updatedMethods), 2) # because now we force the update
self.assertEqual(len(updatedMethods), 2*numVariations) # because now we force the update
self.assertTrue(method1 in updatedMethods)
self.assertTrue(method2 in updatedMethods)
......