Commit 386f1214 authored by Josef Brandt's avatar Josef Brandt

More Graphs, Dataset particle stats

parent 0ddfc91d
......@@ -15,11 +15,11 @@ sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard.analysis.particleContainer import ParticleContainer
from gepard.analysis import particleAndMeasurement as pm
from gepard.analysis import particleCharacterization as pc
from gepard.helperfunctions import cv2imread_fix
from methods import SubsamplingMethod
from helpers import timingDecorator
def get_pca(data: np.ndarray, numComp: int = 2) -> np.ndarray:
try:
standardizedData = StandardScaler().fit_transform(data.copy())
......@@ -106,7 +106,11 @@ def get_characteristic_vector(particle: pm.Particle, particleImg: np.ndarray = N
# print('error')
# assert len(vector) == 7 + 4, f'wrong feature vector: {vector} with shape: {vector.shape}'
for entry in vector:
assert type(entry) in [float, int, np.int, np.float64], f'{entry} is {type(entry)}'
try:
float(entry)
except ValueError:
print('not numeric value found')
raise
assert not np.isnan(entry)
return np.array(vector)
......@@ -166,9 +170,14 @@ def get_image_feature_vec(particleImg: np.ndarray) -> np.ndarray:
def get_mean_and_stdev(img: np.ndarray) -> np.ndarray:
meanStd: tuple = cv2.meanStdDev(img)
colorMean: np.ndarray = np.array([i[0] for i in meanStd[0]])
colorStd: np.ndarray = np.array([i[0] for i in meanStd[1]])
try:
meanStd: tuple = cv2.meanStdDev(img)
colorMean: np.ndarray = np.array([i[0] for i in meanStd[0]])
colorStd: np.ndarray = np.array([i[0] for i in meanStd[1]])
except cv2.error: # i.e, one pixel images...
colorMean: np.ndarray = np.array([128, 128, 128])
colorStd: np.ndarray = np.array([0, 0, 0])
# print('invalid here...:', img, img.shape, colorMean, colorStd, np.vstack((colorMean, colorStd)))
return np.vstack((colorMean, colorStd))
......@@ -243,7 +252,7 @@ def get_curvature_ft(contour: np.ndarray, angularSegment: float = 20, numHarmoni
class TrainedSubsampling(SubsamplingMethod):
def __init__(self, particleContainer: ParticleContainer, desiredFraction: float,
path: str = r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.72.pkl'):
path: str = r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.79.pkl'):
super(TrainedSubsampling, self).__init__(particleContainer, desiredFraction)
self.score: float = None
self.clf = None
......@@ -267,7 +276,12 @@ class TrainedSubsampling(SubsamplingMethod):
def apply_subsampling_method(self) -> list:
self._load_classifier()
features: np.ndarray = get_particle_featurematrix(self.particleContainer)
fullimgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages'
dsetname: str = self.particleContainer.datasetParent.name
imgPath: str = os.path.join(fullimgpath, dsetname + '.tif')
fullimg = cv2imread_fix(imgPath)
features: np.ndarray = get_particle_featurematrix(self.particleContainer, fullimg)
predictions: np.ndarray = self.clf.predict(features)
indicesToSelect: set = self._get_measure_indices(list(predictions))
selectedParticles: list = []
......
......@@ -71,12 +71,12 @@ if __name__ == '__main__':
counter = 0
for folder in pklsInFolders.keys():
for pklPath in pklsInFolders[folder]:
if counter < 100:
if counter < 50:
dset: dataset.DataSet = dataset.loadData(pklPath)
print('loaded', dset.name)
print('loaded', dset.name, counter)
imgPath: str = os.path.join(fullimgpath, dset.name + '.tif')
fullimg = cv2imread_fix(imgPath)
print('loaded fullimg', imgPath)
print('loaded fullimg', imgPath, counter)
partContainer: ParticleContainer = dset.particleContainer
firstVecLength: int = 0
......@@ -93,18 +93,20 @@ if __name__ == '__main__':
counter += 1
X: np.ndarray = np.array(X)
y: np.ndarray = np.array(y)
X_all: np.ndarray = np.array(X)
y_all: np.ndarray = np.array(y)
MPindices: np.ndarray = np.where(y == 1)[0]
nonMPindices: np.ndarray = np.where(y == 0)[0]
X_all: np.ndarray = SelectKBest(chi2, k=15).fit_transform(abs(X_all), y_all)
MPindices: np.ndarray = np.where(y_all == 1)[0]
nonMPindices: np.ndarray = np.where(y_all == 0)[0]
nonMPindices: list = sample(list(nonMPindices), len(MPindices))
X_MP: list = list(X[MPindices])
y_MP: list = list(y[MPindices])
X_MP: list = list(X_all[MPindices])
y_MP: list = list(y_all[MPindices])
X_nonMP: list = list(X[nonMPindices])
y_nonMP: list = list(y[nonMPindices])
X_nonMP: list = list(X_all[nonMPindices])
y_nonMP: list = list(y_all[nonMPindices])
assert set(y_MP) == {1}
assert set(y_nonMP) == {0}
......@@ -113,13 +115,6 @@ if __name__ == '__main__':
X_equalized: np.ndarray = np.array(X_MP + X_nonMP)
y_equalized: np.ndarray = np.array(y_MP + y_nonMP)
sum = X_MP + X_nonMP
for i in range(len(sum)):
if i== 0:
continue
try:
np.min(np.array(sum[:i]))
except:
print('error')
dset: tuple = (X_equalized, y_equalized)
......@@ -136,15 +131,31 @@ if __name__ == '__main__':
# y_predicted = clf.predict(X)
# np.savetxt('Data.txt', X)
# np.savetxt('Assignments.txt', y)
np.savetxt('Data.txt', X)
np.savetxt('Assignments.txt', y)
np.savetxt('Data_all.txt', X_all)
np.savetxt('Assignments_all.txt', y_all)
# princComps = get_pca(X.transpose(), numComp=2)
#
# plt.scatter(princComps[:, 0], princComps[:, 1])
# print(X_equalized.shape)
# X: np.ndarray = SelectKBest(chi2, k=5).fit_transform(X, y)
# X: np.ndarray = SelectKBest(chi2, k=15).fit_transform(X, y)
# print(X_equalized.shape)
test_classification_models((X, y))
X = StandardScaler().fit_transform(X)
clf = RandomForestClassifier(n_estimators=1000)
clf.fit(X, y)
score = clf.score(X_all, y_all)
y_predicted = clf.predict(X_all)
errors: dict = {int(k): 0 for k in np.unique(y_all)}
for j in range(len(y_predicted)):
if y_all[j] != y_predicted[j]:
errors[y_all[j]] += 1
print('num MP Particles in set:', len(X_MP))
print(f'randForest with test size {len(y_all)} has score {round(score, 2)}, errors: {errors}')
......@@ -8,6 +8,9 @@ import numpy as np
from scipy import spatial
import os
import matplotlib.pyplot as plt
from helpers import get_filterDimensions_from_dataset, get_center_from_filter_dimensions, convert_length_to_pixels
from graphs import get_distance_point_histogramdata
if __name__ == '__main__':
# imgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages'
......@@ -27,21 +30,22 @@ if __name__ == '__main__':
for path in paths:
dset = loadData(path)
particleContainer: ParticleContainer = dset.particleContainer
particleCenters: list = []
for particle in particleContainer.particles:
particleCenters.append([np.mean(particle.contour[:, 0, 0]), np.mean(particle.contour[:, 0, 1])])
# particleCenters: list = []
# for particle in particleContainer.particles:
# particleCenters.append([np.mean(particle.contour[:, 0, 0]), np.mean(particle.contour[:, 0, 1])])
#
# closest_particle_distances: np.ndarray = np.zeros(len(particleCenters))
# particleCenters: np.ndarray = np.array(particleCenters)
# print('particle centers done')
closest_particle_distances: np.ndarray = np.zeros(len(particleCenters))
particleCenters: np.ndarray = np.array(particleCenters)
print('particle centers done')
distMat: np.ndarray = spatial.distance_matrix(particleCenters, particleCenters)
print('distmat computed')
for i in range(distMat.shape[0]):
if i == 0:
closest_particle_distances[i] = np.min(distMat[i, 1:])
elif i == distMat.shape[0]-1:
closest_particle_distances[i] = np.min(distMat[i, :-1])
else:
closest_particle_distances[i] = np.min([np.min(distMat[i, :i]), np.min(distMat[i, i+1:])])
distances.append(closest_particle_distances)
plt.boxplot(distances)
# distMat: np.ndarray = spatial.distance_matrix(particleCenters, particleCenters)
# print('distmat computed')
# for i in range(distMat.shape[0]):
# if i == 0:
# closest_particle_distances[i] = np.min(distMat[i, 1:])
# elif i == distMat.shape[0]-1:
# closest_particle_distances[i] = np.min(distMat[i, :-1])
# else:
# closest_particle_distances[i] = np.min([np.min(distMat[i, :i]), np.min(distMat[i, i+1:])])
# distances.append(closest_particle_distances)
# plt.boxplot(distances)
......@@ -5,11 +5,12 @@ Created on Wed Jan 22 13:57:28 2020
@author: luna
"""
import pickle
# import pickle
import os
import numpy as np
import matplotlib.pyplot as plt
# import matplotlib.pyplot as plt
import concurrent.futures
import operator
import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset
......@@ -32,8 +33,8 @@ def get_methods_to_test(dataset: dataset.DataSet, fractions: list = [], maxTries
:return: list of measurement Objects that are applicable
"""
if len(fractions) == 0:
# fractions: list = [0.02, 0.05, 0.1, 0.25, 0.5, 0.7, 0.9]
fractions: list = [0.1, 0.3, 0.5]
fractions: list = [0.02, 0.05, 0.1, 0.25, 0.5, 0.7, 0.9]
# fractions: list = [0.1, 0.3, 0.5]
methods: list = []
particleContainer = dataset.particleContainer
......@@ -46,7 +47,7 @@ def get_methods_to_test(dataset: dataset.DataSet, fractions: list = [], maxTries
methods += boxCreator.get_spiralBoxSubsamplers_for_fraction(fraction)
methods += boxCreator.get_randomBoxSubsamplers_for_fraction(fraction, maxTries=maxTries)
methods += boxCreator.get_randomQuarterBoxSubsamplers_for_fraction(fraction, maxTries=maxTries)
methods.append(cmeth.TrainedSubsampling(particleContainer, fraction))
# methods.append(cmeth.TrainedSubsampling(particleContainer, fraction))
# methods.append(cmeth.ChemometricSubsampling(particleContainer, fraction))
return methods
......@@ -71,6 +72,14 @@ def is_MP_particle(particle: Particle) -> bool:
return isMP
def get_number_of_MP_particles(particleList: list) -> int:
numMPParticles = 0
for particle in particleList:
if is_MP_particle(particle):
numMPParticles += 1
return numMPParticles
class TotalResults(object):
def __init__(self):
super(TotalResults, self).__init__()
......@@ -102,11 +111,12 @@ class TotalResults(object):
if multiprocessing:
forceList: list = [force]*len(self.sampleResults)
indices: list = list(np.arange(len(self.sampleResults)))
numSamples: int = len(forceList)
numWorkers: int = 4 # in case of quadcore processor that seams reasonable??
chunksize: int = int(round(numSamples / numWorkers * 0.7)) # we want to have slightly more chunks than workers
print(f'multiprocessing with {numSamples} samples and chunksize of {chunksize}')
# numSamples: int = len(forceList)
# numWorkers: int = 4 # in case of quadcore processor that seams reasonable??
# chunksize: int = int(round(numSamples / numWorkers * 0.7)) # we want to have slightly more chunks than workers
# print(f'multiprocessing with {numSamples} samples and chunksize of {chunksize}')
chunksize = 1
with concurrent.futures.ProcessPoolExecutor() as executor:
results = executor.map(update_sample, self.sampleResults, forceList, indices, chunksize=chunksize)
......@@ -129,9 +139,11 @@ class TotalResults(object):
Value: {Dict: Key:Measured Fraction, Value: Tuple (averaged MPCountError, StDev MPCountError) over all samples}
"""
result: dict = {}
numSamples: int = 0
for sample in self.sampleResults:
sample: SampleResult = sample
if attributes == [] or sample.has_any_attribute(attributes):
numSamples += 1
for res in sample.results:
res: SubsamplingResult = res
method: meth.SubsamplingMethod = res.method
......@@ -155,7 +167,7 @@ class TotalResults(object):
meanStd = np.mean([i[1] for i in methodRes[fraction]])
methodRes[fraction] = (meanError, meanStd)
return result
return numSamples, result
class SubsamplingResult(object):
......@@ -227,9 +239,9 @@ class SubsamplingResult(object):
return binSorter.bins, mpCountErrorsPerBin
def _get_mp_count_error(self, allParticles: list, subParticles: list, fractionMeasured: float) -> float:
numMPOrig = self._get_number_of_MP_particles(allParticles)
numMPOrig = get_number_of_MP_particles(allParticles)
self.origMPCount = numMPOrig
numMPEstimate = self._get_number_of_MP_particles(subParticles) / fractionMeasured
numMPEstimate = get_number_of_MP_particles(subParticles) / fractionMeasured
self.estimMPCounts.append(numMPEstimate)
if numMPOrig != 0:
......@@ -245,19 +257,12 @@ class SubsamplingResult(object):
assert (exact != 0)
return abs(exact - estimate) / exact * 100
def _get_number_of_MP_particles(self, particleList: list) -> int:
numMPParticles = 0
for particle in particleList:
if is_MP_particle(particle):
numMPParticles += 1
return numMPParticles
class SampleResult(object):
"""
An object the stores all generated results per sample and can update and report on them.
"""
def __init__(self, filepath: str, numVariations: int = 5):
def __init__(self, filepath: str, numVariations: int = 10):
super(SampleResult, self).__init__()
self.filepath: str = filepath
self.dataset: dataset.DataSet = None
......@@ -332,8 +337,38 @@ class SampleResult(object):
return hasAttr
def has_attribute(self, attribute: str) -> bool:
attributes: list = [attr.lower() for attr in self.attributes]
return attribute.lower() in attributes
hasAttr: bool = False
if attribute.find('$') == -1:
attributes: list = [attr.lower() for attr in self.attributes]
hasAttr = attribute.lower() in attributes
else:
operators: dict = {
'<': operator.__lt__,
'<=': operator.__le__,
'==': operator.__eq__,
'!=': operator.__ne__,
'>=': operator.__ge__,
'>': operator.__gt__
}
if self.dataset is None:
self.load_dataset()
valueToCompare: float = 0.0
dsetValue: float = 0.0
for string in operators.keys():
if attribute.find(string) != -1:
valueToCompare = float(attribute.split(string)[1])
if attribute.find('numParticles') != -1:
dsetValue = len(self.dataset.particleContainer.particles)
break
elif attribute.find('mpFraction') != -1:
particles: list = self.dataset.particleContainer.particles
dsetValue = get_number_of_MP_particles(particles) / len(particles)
break
hasAttr = operators[string](dsetValue, valueToCompare)
return hasAttr
def _remove_result_of_method(self, method: meth.SubsamplingMethod) -> None:
"""
......
......@@ -21,7 +21,7 @@ def box_overlaps_other_box(topLeft1: list, topLeft2: list, boxSize: float) -> bo
class BoxSelectionSubsamplingMethod(SubsamplingMethod):
possibleBoxNumbers: list = [7, 10, 15]
possibleBoxNumbers: list = [5, 10, 20]
def __init__(self, *args):
super(BoxSelectionSubsamplingMethod, self).__init__(*args)
......
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
import numpy as np
from evaluation import TotalResults
from evaluation import TotalResults, SampleResult
from helpers import get_filterDimensions_from_dataset, get_center_from_filter_dimensions, convert_length_to_pixels
def get_error_vs_frac_plot(totalResults: TotalResults, attributes: list = [], methods: list = [],
......@@ -26,8 +27,7 @@ def get_error_vs_frac_plot(totalResults: TotalResults, attributes: list = [], me
index = 0
for attrs, meths in zip(attributes, methods):
ax = fig.add_subplot(numRows, numCols, index + 1)
errorPerFraction: dict = totalResults.get_error_vs_fraction_data(attributes=attrs,
methods=meths)
numSamples, errorPerFraction = totalResults.get_error_vs_fraction_data(attributes=attrs, methods=meths)
for methodLabel in errorPerFraction.keys():
errorDict: dict = errorPerFraction[methodLabel]
......@@ -45,12 +45,15 @@ def get_error_vs_frac_plot(totalResults: TotalResults, attributes: list = [], me
title: str = ''
if len(attrs) > 0:
title = ', '.join(attr for attr in attrs)
print('title is', title)
title += f' ({numSamples} samples)'
ax.set_title(title, fontSize=15)
ax.set_xscale('log')
ax.set_xlabel('measured fraction', fontsize=12)
ax.set_ylabel('mpCountError (%)', fontsize=12)
ax.set_xlim([0.9 * min(fractions), 1.05])
minX, maxX = 0.9 * min(fractions), 1.05
ax.hlines([20, 40, 60, 80], minX, maxX, colors='gray', alpha=0.5)
ax.set_xlim([minX, maxX])
ax.set_ylim([0, 100])
ax.legend()
......@@ -61,20 +64,74 @@ def get_error_vs_frac_plot(totalResults: TotalResults, attributes: list = [], me
return fig
def get_distance_hist_plots(totalResults: TotalResults, attributes: list = []) -> Figure:
fig: Figure = plt.figure(figsize=(10, 5))
numRows: int = 1
numCols: int = 1
if len(attributes) == 0:
attributes = [[]]
elif len(attributes) <= 2:
numCols = len(attributes)
else:
numRows = 2
numCols = np.ceil(len(attributes) / numRows)
for index, attrs in enumerate(attributes):
ax = fig.add_subplot(numRows, numCols, index + 1)
densities: list = []
for sampleRes in totalResults.sampleResults:
if sampleRes.has_any_attribute(attrs):
if sampleRes.dataset is None:
sampleRes.load_dataset()
dset = sampleRes.dataset
offset, diameter, [width, height] = get_filterDimensions_from_dataset(dset)
center = get_center_from_filter_dimensions(offset, diameter)
center[0] = convert_length_to_pixels(dset, center[0])
center[1] = convert_length_to_pixels(dset, center[1])
histdata = get_distance_point_histogramdata(dset.particleContainer.particles, center)
densities.append(histdata[1])
# ax.plot(histdata[0], histdata[1])
numSamples = len(densities)
title: str = ''
if len(attrs) > 0:
title = ', '.join(attr for attr in attrs)
title += f' ({numSamples} samples)'
ax.set_title(title, fontSize=15)
densities: np.ndarray = np.mean(np.array(densities), axis=0)
ax.plot(histdata[0], densities)
return fig
def get_distance_point_histogramdata(particles: list, center: np.ndarray) -> tuple:
"""
:param particles: list of Particles
:param center: np.array([x, y]) of center point, in px
:return histogramdata: tuple: (center bin dist , particle count)
"""
def get_area_of_circle_ring(innerRadius: float, outerRadius: float) -> float:
area: float = np.pi * (outerRadius + innerRadius) * (outerRadius - innerRadius)
return area
maxUpperLimit: float = 1E4
numBins: int = 11
bins: np.ndarray = np.linspace(0, maxUpperLimit, numBins, endpoint=True)
particleCenters: list = []
for particle in particles:
particleCenters.append([np.mean(particle.contour[:, 0, 0]), np.mean(particle.contour[:, 0, 1])])
distancesToPoints: np.ndarray = np.linalg.norm(particleCenters - center, axis=1)
data, binMaxima = np.histogram(distancesToPoints, bins)
densities: np.ndarray = np.zeros_like(data, dtype=np.float)
for i in range(len(data)):
densities[i] = float(data[i]) / get_area_of_circle_ring(binMaxima[i], binMaxima[i+1])
densities /= densities.max()
binCenters: list = [np.mean([binMaxima[i], binMaxima[i+1]]) for i in range(len(binMaxima)-1)]
return binCenters, densities
# def get_grouped_spectra_plot(groupedSpectra: list, wavenumbers=None) -> matplotlib.figure.Figure:
# if wavenumbers is None:
# wavenumbers = np.arange(len(groupedSpectra[0][0]))
#
# numLabels = len(groupedSpectra)
# numRows = numLabels // 3
# numCols = np.ceil(numLabels / numRows)
# fig: matplotlib.figure.Figure = plt.figure()
#
# for index, specs in enumerate(groupedSpectra):
# ax = fig.add_subplot(numRows, numCols, index + 1)
# for spec in specs:
# ax.plot(wavenumbers, spec)
# ax.set_title(f'{len(specs)} spectra of label {index + 1}')
#
# return fig
\ No newline at end of file
......@@ -4,7 +4,7 @@ import time
from evaluation import TotalResults, SampleResult
from input_output import get_pkls_from_directory, get_attributes_from_foldername, save_results, load_results
from graphs import get_error_vs_frac_plot
from graphs import get_error_vs_frac_plot, get_distance_hist_plots
"""
IMPORTANT!!!
......@@ -12,29 +12,33 @@ SET GEPARD TO EVALUATION BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF TH
"""
if __name__ == '__main__':
results: TotalResults = TotalResults()
pklsInFolders = get_pkls_from_directory(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets')
counter = 0
for folder in pklsInFolders.keys():
for samplePath in pklsInFolders[folder]:
if counter < 10:
newSampleResult: SampleResult = results.add_sample(samplePath)
for attr in get_attributes_from_foldername(folder):
newSampleResult.set_attribute(attr)
counter += 1
# results: TotalResults = TotalResults()
# pklsInFolders = get_pkls_from_directory(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets')
# counter = 0
# for folder in pklsInFolders.keys():
# for samplePath in pklsInFolders[folder]:
# if counter < 100:
# newSampleResult: SampleResult = results.add_sample(samplePath)
# for attr in get_attributes_from_foldername(folder):
# newSampleResult.set_attribute(attr)
# counter += 1
#
# t0 = time.time()
# results.update_all(multiprocessing=True)
# print('updating all took', time.time()-t0, 'seconds')
#
# save_results('results2.res', results)
results: TotalResults = load_results('results2.res')
#
plot: Figure = get_error_vs_frac_plot(results,
attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']],
# # attributes=[['$ mpFraction < 0.02'], ['$ mpFraction >= 0.02']],
# # methods=[['random layout (7', 'random layout (1']]*2)
methods=[['random sub', 'cross', 'layout (10']]*4)
# # methods=[['Random Subsampling', 'Sizebin']] * 2)
# # methods=[['layout (5', 'layout (10', 'layout (15', 'cross', 'random subsampling', 'sizebin']] * 2)
# plot.show()
t0 = time.time()
results.update_all(multiprocessing=False)
print('updating all took', time.time()-t0, 'seconds')
save_results('results_test.res', results)
# results: TotalResults = load_results('results2.res')
plot: Figure = get_error_vs_frac_plot(results, attributes=[[]],
methods=[['random', 'trained']], standarddevs=True)
# plot: Figure = get_error_vs_frac_plot(results, attributes=[['air', 'water'], ['sediment', 'soil', 'beach', 'slush']],
# methods=[['random layout (7', 'random layout (1']]*2)
# methods=[[]]*2)
# methods=[['Random Subsampling', 'Sizebin']] * 2)
# methods=[['layout (7', 'layout (10', 'layout (15', 'cross', 'random subsampling', 'sizebin']] * 2)
plot.show()
# plot2: Figure = get_distance_hist_plots(results,
# attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']])
# plot2.show()
......@@ -10,12 +10,12 @@ import unittest
import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
import gepard
from gepard.dataset import DataSet
from gepard.analysis.particleContainer import ParticleContainer
from evaluation import TotalResults, SampleResult, SubsamplingResult, get_methods_to_test
from evaluation import TotalResults, SampleResult, SubsamplingResult, get_methods_to_test, get_number_of_MP_particles
import methods as meth
import geometricMethods as gmeth
from chemometrics.chemometricMethods import TrainedSubsampling
from helpers_for_test import get_default_ParticleContainer, get_default_DataSet, get_MP_particles, get_non_MP_particles, get_MP_particle, get_non_MP_particle
......@@ -76,7 +76,8 @@ class TestTotalResults(unittest.TestCase):
firstSample.results = [firstResult, secondResult, thirdResult, thirdResult3]
secondSample.results = [firstResult, secondResult, thirdResult2, thirdResult3]
resultDict: dict = self.totalResults.get_error_vs_fraction_data()
numSamples, resultDict = self.totalResults.get_error_vs_fraction_data()
self.assertEqual(numSamples, 2)
self.assertEqual(list(resultDict.keys()), [firstMethod.label, secondMethod.label, thirdMethod.label])
for i, key in enumerate(resultDict.keys()):
......@@ -100,7 +101,8 @@ class TestTotalResults(unittest.TestCase):
self.assertAlmostEqual(mean, 50)
self.assertAlmostEqual(stdev, 16.32993161855452)
filteredResultDict: dict = self.totalResults.get_error_vs_fraction_data(attributes=['to be used'])
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(attributes=['to be used'])
self.assertEqual(numSamples, 1)
self.assertEqual(list(filteredResultDict.keys()), [firstMethod.label, secondMethod.label, thirdMethod.label])
for i, key in enumerate(filteredResultDict.keys()):
......@@ -127,13 +129,16 @@ class TestTotalResults(unittest.TestCase):
self.assertAlmostEqual(mean, 50)
self.assertAlmostEqual(stdev, 16.32993161855452)
filteredResultDict: dict = self.totalResults.get_error_vs_fraction_data(methods=['cross'])
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['cross'])
self.assertEqual(numSamples, 2)
self.assertEqual(list(filteredResultDict.keys()), [secondMethod.label, thirdMethod.label])
filteredResultDict: dict = self.totalResults.get_error_vs_fraction_data(methods=['Cross'])
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['Cross'])
self.assertEqual(numSamples, 2)
self.assertEqual(list(filteredResultDict.keys()), [secondMethod.label, thirdMethod.label])
filteredResultDict: dict = self.totalResults.get_error_vs_fraction_data(methods=['random'])
numSamples, filteredResultDict = self.totalResults.get_error_vs_fraction_data(methods=['random'])
self.assertEqual(numSamples, 2)
self.assertEqual(list(filteredResultDict.keys()), [firstMethod.label])
......@@ -233,6 +238,22 @@ class TestSampleResult(unittest.TestCase):
self.assertFalse(self.sampleResult.has_any_attribute(['water', 'sediment']))
self.assertFalse(self.sampleResult.has_any_attribute(['beach']))
# keywords with $ allow filtering for properties from the dataset itself
self.sampleResult.dataset = get_default_DataSet()
self.sampleResult.dataset.particleContainer.initializeParticles(999)
self.assertTrue(self.sampleResult.has_attribute('$ numParticles < 1000'))
self.assertFalse(self.sampleResult.has_attribute('$ numParticles >= 1000'))
self.sampleResult.dataset.particleContainer.initializeParticles(1000)
self.assertFalse(self.sampleResult.has_attribute('$ numParticles < 1000'))
self.assertTrue(self.sampleResult.has_attribute('$ numParticles >= 1000'))
fifty_percent_mp: list = get_non_MP_particles(100) + get_MP_particles(100)
self.sampleResult.dataset.particleContainer.particles = fifty_percent_mp
self.assertTrue(self.sampleResult.has_attribute('$ mpFraction == 0.5'))
self.assertFalse(self.sampleResult.has_attribute('$ mpFraction > 0.5'))
self.assertFalse(self.sampleResult.has_attribute('$ mpFraction < 0.5'))
def test_get_methods_to_test(self):
def containsMethod(listOfMethods: list, template: meth.SubsamplingMethod) -> bool:
contains: bool = False
......@@ -258,7 +279,7 @@ class TestSampleResult(unittest.TestCase):
possibleSpiralBoxMethods = 3
possibleRandomBoxMethods = 3
possibleQuarterRandomBoxMethods = 3
possibleChemometricMethods = 1
possibleChemometricMethods = 0
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods + \
possibleRandomBoxMethods + possibleQuarterRandomBoxMethods
......@@ -277,7 +298,7 @@ class TestSampleResult(unittest.TestCase):
possibleSpiralBoxMethods = 0
possibleRandomBoxMethods = 0
possibleQuarterRandomBoxMethods = 0
possibleChemometricMethods = 1
possibleChemometricMethods = 0
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods + \
possibleRandomBoxMethods + possibleQuarterRandomBoxMethods
......@@ -294,7 +315,7 @@ class TestSampleResult(unittest.TestCase):
possibleSpiralBoxMethods = 0
possibleRandomBoxMethods = 0
possibleQuarterRandomBoxMethods = 0
possibleChemometricMethods = 1
possibleChemometricMethods = 0
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
possibleSpiralBoxMethods + possibleChemometricMethods + \
possibleRandomBoxMethods + possibleQuarterRandomBoxMethods
......@@ -309,7 +330,7 @@ class TestSampleResult(unittest.TestCase):
possibleRandomMethods = 4
possibleCrossBoxMethods = 3
possibleSpiralBoxMethods = 3
possibleChemometricMethods = 2
possibleChemometricMethods = 0
possibleRandomBoxMethods = 3
possibleQuarterRandomBoxMethods = 3
totalPossible = possibleCrossBoxMethods + possibleRandomMethods + \
......@@ -456,7 +477,7 @@ class TestSubsamplingResult(unittest.TestCase):
allParticles = mpParticles + nonMPparticles
calculatedNumMPParticles = self.subsamplingResult._get_number_of_MP_particles(allParticles)
calculatedNumMPParticles = get_number_of_MP_particles(allParticles)
self.assertEqual(numMPParticles, calculatedNumMPParticles)
def test_get_mp_count_error(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment