...
 
Commits (9)
......@@ -20,3 +20,7 @@ cythonModules/build/
chemometrics/Assignments.txt
chemometrics/Data.txt
chemometrics/Assignments_all.txt
chemometrics/Data_all.txt
This diff is collapsed.
......@@ -12,9 +12,12 @@ from sklearn.feature_selection import chi2
import pickle
import time
import sys
import os
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset
from gepard.analysis.particleContainer import ParticleContainer
from gepard.analysis.particleCharacterization import getParticleImageFromFullimage
from gepard.helperfunctions import cv2imread_fix
from input_output import get_pkls_from_directory
from chemometricMethods import get_log_hu_moments, get_color_index, get_pca, get_characteristic_vector
......@@ -61,38 +64,49 @@ if __name__ == '__main__':
recreateNew: bool = True
if recreateNew:
pklsInFolders: dict = get_pkls_from_directory(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets')
fullimgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages'
X: list = []
y: list = []
counter = 0
for folder in pklsInFolders.keys():
for pklPath in pklsInFolders[folder]:
if counter < 100:
if counter < 50:
dset: dataset.DataSet = dataset.loadData(pklPath)
print('loaded', dset.name)
print('loaded', dset.name, counter)
imgPath: str = os.path.join(fullimgpath, dset.name + '.tif')
fullimg = cv2imread_fix(imgPath)
print('loaded fullimg', imgPath, counter)
partContainer: ParticleContainer = dset.particleContainer
for particle in partContainer.particles:
features: np.ndarray = get_characteristic_vector(particle)
# features: list = [abs(i) for i in get_log_hu_moments(particle.contour)]
# features.append(get_color_index(particle.color))
firstVecLength: int = 0
for index, particle in enumerate(partContainer.particles):
partImg: np.ndarray = getParticleImageFromFullimage(particle.contour, fullimg)
features: np.ndarray = get_characteristic_vector(particle, partImg)
if index == 0:
firstVecLength = features.shape[0]
else:
assert features.shape[0] == firstVecLength
X.append(features)
y.append(int(is_MP_particle(particle)))
counter += 1
X: np.ndarray = np.array(X)
y: np.ndarray = np.array(y)
X_all: np.ndarray = np.array(X)
y_all: np.ndarray = np.array(y)
MPindices: np.ndarray = np.where(y == 1)[0]
nonMPindices: np.ndarray = np.where(y == 0)[0]
X_all: np.ndarray = SelectKBest(chi2, k=15).fit_transform(abs(X_all), y_all)
MPindices: np.ndarray = np.where(y_all == 1)[0]
nonMPindices: np.ndarray = np.where(y_all == 0)[0]
nonMPindices: list = sample(list(nonMPindices), len(MPindices))
X_MP: list = list(X[MPindices])
y_MP: list = list(y[MPindices])
X_MP: list = list(X_all[MPindices])
y_MP: list = list(y_all[MPindices])
X_nonMP: list = list(X[nonMPindices])
y_nonMP: list = list(y[nonMPindices])
X_nonMP: list = list(X_all[nonMPindices])
y_nonMP: list = list(y_all[nonMPindices])
assert set(y_MP) == {1}
assert set(y_nonMP) == {0}
......@@ -100,6 +114,7 @@ if __name__ == '__main__':
X_equalized: np.ndarray = np.array(X_MP + X_nonMP)
y_equalized: np.ndarray = np.array(y_MP + y_nonMP)
sum = X_MP + X_nonMP
dset: tuple = (X_equalized, y_equalized)
......@@ -110,15 +125,37 @@ if __name__ == '__main__':
dset: tuple = pickle.load(fp)
X, y = dset
# np.savetxt('Data.txt', X)
# np.savetxt('Assignments.txt', y)
# with open(r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.72.pkl', "rb") as fp:
# clf: RandomForestClassifier = pickle.load(fp)
# y_predicted = clf.predict(X)
np.savetxt('Data.txt', X)
np.savetxt('Assignments.txt', y)
np.savetxt('Data_all.txt', X_all)
np.savetxt('Assignments_all.txt', y_all)
# princComps = get_pca(X.transpose(), numComp=2)
#
# plt.scatter(princComps[:, 0], princComps[:, 1])
# print(X_equalized.shape)
# X: np.ndarray = SelectKBest(chi2, k=5).fit_transform(X, y)
# X: np.ndarray = SelectKBest(chi2, k=15).fit_transform(X, y)
# print(X_equalized.shape)
test_classification_models((X, y))
X = StandardScaler().fit_transform(X)
clf = RandomForestClassifier(n_estimators=1000)
clf.fit(X, y)
score = clf.score(X_all, y_all)
y_predicted = clf.predict(X_all)
errors: dict = {int(k): 0 for k in np.unique(y_all)}
for j in range(len(y_predicted)):
if y_all[j] != y_predicted[j]:
errors[y_all[j]] += 1
print('num MP Particles in set:', len(X_MP))
print(f'randForest with test size {len(y_all)} has score {round(score, 2)}, errors: {errors}')
import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard.helperfunctions import cv2imread_fix
from gepard.dataset import DataSet,loadData
from gepard.analysis.particleContainer import ParticleContainer
import cv2
import numpy as np
from scipy import spatial
import os
import matplotlib.pyplot as plt
from helpers import get_filterDimensions_from_dataset, get_center_from_filter_dimensions, convert_length_to_pixels
# from evaluation import is_MP_particle
import evaluation
def get_particle_patchiness(dataset: DataSet, numCells: int = 50, onlyMP=False) -> float:
offset, diameter, [width, height] = get_filterDimensions_from_dataset(dataset)
center: np.ndarray = get_center_from_filter_dimensions(offset, diameter)
width: float = convert_length_to_pixels(dataset, width)
height: float = convert_length_to_pixels(dataset, height)
pixelsPerTile: int = max(int(round(width/numCells)), int(round(height/numCells)))
centerX: int = int(round(convert_length_to_pixels(dataset, center[0] / pixelsPerTile)))
centerY: int = int(round(convert_length_to_pixels(dataset, center[1] / pixelsPerTile)))
radius: int = int(round(convert_length_to_pixels(dataset, diameter / pixelsPerTile * 0.5)))
numRows: int = int(np.ceil(height / pixelsPerTile)) + 1
numCols: int = int(np.ceil(width / pixelsPerTile)) + 1
partCount: int = 0
densityImage: np.ndarray = np.zeros((numRows, numCols))
for particle in dataset.particleContainer.particles:
if (onlyMP and evaluation.is_MP_particle(particle)) or not onlyMP:
particleCenter: tuple = np.mean(particle.contour[:, 0, 0]), np.mean(particle.contour[:, 0, 1])
row: int = int(round(particleCenter[1] / pixelsPerTile))
col: int = int(round(particleCenter[0] / pixelsPerTile))
densityImage[row, col] += 1
partCount += 1
mask: np.ndarray = np.zeros_like(densityImage)
cv2.circle(mask, (centerY, centerX), radius, 1, -1)
relevantData: np.ndarray = densityImage[mask > 0]
mean: np.ndarray = np.round(np.mean(relevantData), 2)
std: np.ndarray = np.round(np.std(relevantData), 2)
ratio: float = round(std/mean, 2)
# plt.imshow(densityImage)
# plt.title(f'MP particle count: {partCount},\ndensity mean: {mean}, density std: {std},\npatchiness = {ratio}')
# plt.axis('off')
# plt.tight_layout()
# plt.show()
# print(f'sample: {dataset.name}, mean: {mean}, std: {std}, ratio = {ratio}')
return ratio
if __name__ == '__main__':
# imgpath: str = r'C:\Users\xbrjos\Desktop\temp MP\Fullimages'
# imgname: str = '181120_MCI_2_ds1+2_all_ kleiner500_10_1.tif'
# imgname: str = '190619_5_PTPH_sld_190321_ds1_50_1_neu.tif'
#191213_P190814_TPHZ_ds1_50_1
# img: np.ndarray = cv2imread_fix(os.path.join(imgpath, imgname))
# gray: np.ndarray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# _, binimg = cv2.threshold(gray, 20, 1, cv2.THRESH_BINARY_INV)
# distmap: np.ndarray = cv2.distanceTransform(binimg, cv2.DIST_L1, 3)
# plt.imshow(distmap, cmap='gray')
paths: list = [r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\water\181120_MCI_2_ds1+2_all_ kleiner500_10_1.pkl',
r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\wastewater, water\191213_P190814_TPHZ_ds1_50_1.pkl',
r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets\Air\191119_RW6_Solling_50_2_neu.pkl']
distances: list = []
allParticles = []
for path in paths:
dset = loadData(path)
get_particle_patchiness(dset, 50, onlyMP=True)
for particle in dset.particleContainer.particles:
if evaluation.is_MP_particle(particle):
allParticles.append(particle.getParticleAssignment())
print(set(allParticles))
\ No newline at end of file
import numpy as np
cimport numpy as np
cimport cython
from cython cimport boundscheck, wraparound
@boundscheck(False)
@wraparound(False)
cdef bint box_overlaps_contour(unsigned int[:] boxTopLeftXY, unsigned int boxSize, unsigned int[:, :, :] contourData):
cdef bint isOverlapping = False
cdef unsigned int xmin, xmax, width, boxXmin, boxXmax, ymin, ymax, height, boxYmin, boxYmax
xmin = np.min(contourData[:, 0, 1])
xmax = np.max(contourData[:, 0, 1])
width = xmax - xmin
boxXmin = boxTopLeftXY[0]
boxXmax = boxTopLeftXY[0] + boxSize
if xmin > (boxXmin-width/2):
if xmax < (boxXmax+width/2):
ymin = np.min(contourData[:, 0, 1])
ymax = np.max(contourData[:, 0, 1])
height = ymax - ymin
boxYmin = boxTopLeftXY[1]
boxYmax = boxTopLeftXY[1] + boxSize
if ymin > (boxYmin-height/2):
if ymax < (boxYmax+width/2):
isOverlapping = True
return isOverlapping
@boundscheck(False)
@wraparound(False)
def def_get_indices_of_overlapping_particles(contours not None, unsigned int[:, :] topLefts, unsigned int boxSize):
cdef Py_ssize_t i, j
cdef unsigned int counter, numParticles, numTopLefts
numParticles = len(contours)
numTopLefts = topLefts.shape[0]
cdef unsigned int[:] overlappingIndices = np.zeros(numParticles, dtype=np.uint32)
cdef unsigned int[:, :, :] currentContour
counter = 0
for i in range(numParticles):
currentContour = contours[i]
for j in range(numTopLefts):
if box_overlaps_contour(topLefts[j, :], boxSize, currentContour):
overlappingIndices[counter] = i
counter += 1
break
return overlappingIndices[:counter]
......@@ -3,33 +3,35 @@ cimport numpy as np
cimport numpy.random
cimport cython
from libc.math cimport sin, cos, round, abs
from libc.stdlib cimport rand, srand, RAND_MAX
DTYPE = np.float
ctypedef np.int32_t INT32_t
cdef get_random_topleft(double maxDist, double maxAngle, double radius, double boxSize):
cdef double angle, dist, x, y
cdef np.ndarray[INT32_t, ndim=1] newTopLeft
dist = np.random.rand() * maxDist
angle = np.random.rand() * maxAngle
newTopLeft = np.empty(2, dtype=np.int32)
x = dist*np.cos(angle) + radius - boxSize/2
y = dist*np.sin(angle) + radius - boxSize/2
newTopLeft[0] = np.int32(np.round(x))
newTopLeft[1] = np.int32(np.round(y))
cdef int newTopLeft[2]
dist = rand() / 32767.0 * maxDist
angle = rand() / 32767.0 * maxAngle
x = dist*cos(angle) + radius - boxSize/2
y = dist*sin(angle) + radius - boxSize/2
newTopLeft[0] = int(round(x))
newTopLeft[1] = int(round(y))
return newTopLeft
def get_random_topLefts(int numBoxes, double boxSize, double radius, double maxAngle, int seed=1337, int maxTries=50):
cdef np.ndarray[INT32_t, ndim=2] topLefts
cdef np.ndarray[INT32_t, ndim=1] newTopLeft
cdef int newTopLeft[2]
cdef double maxDist
cdef int outerCounter, counter, x, y, i, j, diffX, diffY, successfullyAdded
cdef bint validSolutionFound, boxOverlaps
np.random.seed(seed)
srand(seed) # setting seed
assert RAND_MAX == 32767 # this value is used in the random-function above. For performance-reasons, it is directly typed in there as a number
maxDist = radius - np.sqrt((boxSize/2)**2 + (boxSize/2)**2)
outerCounter = 0
validSolutionFound = False
......@@ -47,8 +49,8 @@ def get_random_topLefts(int numBoxes, double boxSize, double radius, double maxA
newTopLeft = get_random_topleft(maxDist, maxAngle, radius, boxSize)
boxOverlaps = False
for j in range(i):
diffX = abs(np.float(newTopLeft[0] - np.float(topLefts[j, 0])))
diffY = abs(np.float(newTopLeft[1] - np.float(topLefts[j, 1])))
diffX = abs(newTopLeft[0] - topLefts[j, 0])
diffY = abs(newTopLeft[1] - topLefts[j, 1])
if diffX < boxSize and diffY < boxSize:
boxOverlaps = True
......
......@@ -19,8 +19,14 @@ if len(sys.argv) == 1:
# ext = Extension("getRandomTopLefts", ["getRandomTopLefts.pyx"], extra_compile_args=['-O3'],)
# setup(
# name="get a given number of random topLefts",
# ext_modules=cythonize("randoms.pyx", annotate=True), # accepts a glob pattern
# include_dirs=[np.get_include()]
# )
setup(
name="get a given number of random topLefts",
ext_modules=cythonize("randoms.pyx", annotate=True), # accepts a glob pattern
name="checks which particle contours overlap the boxes",
ext_modules=cythonize("particleBoxOverlap.pyx", annotate=True), # accepts a glob pattern
include_dirs=[np.get_include()]
)
)
\ No newline at end of file
This diff is collapsed.
import time
import numpy as np
from itertools import combinations
from methods import SubsamplingMethod
......@@ -6,7 +7,7 @@ import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset
import helpers
from cythonModules import randoms
from cythonModules import randoms, particleBoxOverlap
def box_overlaps_other_box(topLeft1: list, topLeft2: list, boxSize: float) -> bool:
......@@ -21,7 +22,7 @@ def box_overlaps_other_box(topLeft1: list, topLeft2: list, boxSize: float) -> bo
class BoxSelectionSubsamplingMethod(SubsamplingMethod):
possibleBoxNumbers: list = [7, 10, 15]
possibleBoxNumbers: list = [5, 10, 20]
def __init__(self, *args):
super(BoxSelectionSubsamplingMethod, self).__init__(*args)
......@@ -53,16 +54,27 @@ class BoxSelectionSubsamplingMethod(SubsamplingMethod):
return abs(topleft[0] - cntStart[0]) + abs(topleft[1] - cntStart[1])
subParticles: list = []
topLefts: list = self.get_topLeft_of_boxes()
boxWidthHeight: tuple = (self.boxSize, self.boxSize)
topLefts: np.ndarray = np.array(self.get_topLeft_of_boxes())
cython: bool = False
if cython:
topLefts = np.round(topLefts).astype(np.uint32)
# contours: np.ndarray = np.array(self.particleContainer.getParticleContours())
contours = [cnt.astype(np.uint32) for cnt in self.particleContainer.getParticleContours()]
boxSize: np.uint32 = np.uint32(round(self.boxSize))
indices = particleBoxOverlap.def_get_indices_of_overlapping_particles(contours, topLefts, boxSize)
for index in indices:
subParticles.append(self.particleContainer.getParticleOfIndex(index))
for particle in self.particleContainer.particles:
cntStart: tuple = (particle.contour[0, 0, 0], particle.contour[0, 0, 1])
sortedTopLefts = sorted(topLefts, key=distanceToCnt)
else:
boxWidthHeight: tuple = (self.boxSize, self.boxSize)
for particle in self.particleContainer.particles:
cntStart: tuple = (particle.contour[0, 0, 0], particle.contour[0, 0, 1])
sortedTopLefts = sorted(topLefts, key=distanceToCnt)
for topLeftXY in sortedTopLefts:
if helpers.box_overlaps_contour(topLeftXY, boxWidthHeight, particle.contour):
subParticles.append(particle)
for topLeftXY in sortedTopLefts:
if helpers.box_overlaps_contour(topLeftXY, boxWidthHeight, particle.contour):
subParticles.append(particle)
break
return subParticles
......@@ -263,6 +275,7 @@ class CrossBoxSubSampling(BoxSelectionSubsamplingMethod):
numBoxes: int = 2 * self.numBoxesAcross - 1
totalBoxArea: float = numBoxes * (maxBoxSize ** 2)
maxFraction: float = totalBoxArea / self.filterArea
return maxFraction
def equals(self, otherMethod) -> bool:
......@@ -386,7 +399,7 @@ class RandomBoxSampling(BoxSelectionSubsamplingMethod):
@property
def label(self) -> str:
return f'Boxes random layout ({self.numBoxes} boxes)'
return f'Boxes Random layout ({self.numBoxes} boxes)'
def equals(self, otherMethod) -> bool:
equals: bool = False
......@@ -396,57 +409,13 @@ class RandomBoxSampling(BoxSelectionSubsamplingMethod):
return equals
def get_topLeft_of_boxes(self) -> list:
#
# valid, topLefts = randoms.get_random_topLefts(self.numBoxes, self.boxSize,
# self.filterDiameter/2, self.__maxAngle,
# seed=self.randomSeed, maxTries=self.maxTries)
#
# if not valid:
# raise AttributeError
#
# topLefts: list = [[topLefts[i, 0], topLefts[i, 1]] for i in range(topLefts.shape[0])]
#
def get_random_topleft() -> list:
angle = np.random.rand() * self.__maxAngle
dist = np.random.rand() * maxDist
x: float = dist * np.cos(angle) + radius - boxSize / 2
y: float = dist * np.sin(angle) + radius - boxSize / 2
return [x, y]
np.random.seed(self.randomSeed)
topLefts: list = []
boxSize: float = self.boxSize
radius: float = self.filterDiameter / 2
maxDist: float = radius - np.sqrt((boxSize / 2) ** 2 + (boxSize / 2) ** 2)
outerCounter: int = 0
validSolutionFound: bool = False
while not validSolutionFound and outerCounter < self.maxTries:
topLefts = []
for i in range(self.numBoxes):
if i == 0:
topLefts.append(get_random_topleft())
else:
counter: int = 0
while counter < 50:
newTopLeft: list = get_random_topleft()
for topLeft2 in topLefts:
if box_overlaps_other_box(newTopLeft, topLeft2, boxSize):
break
else: # i.e., if no break occurred
topLefts.append(newTopLeft)
break
counter += 1
if len(topLefts) == self.numBoxes:
validSolutionFound = True
else:
outerCounter += 1
if not validSolutionFound:
valid, topLefts = randoms.get_random_topLefts(self.numBoxes, self.boxSize,
self.filterDiameter/2, self.__maxAngle,
seed=self.randomSeed, maxTries=self.maxTries)
if not valid:
raise AttributeError
topLefts: list = [[topLefts[i, 0], topLefts[i, 1]] for i in range(topLefts.shape[0])]
return topLefts
......@@ -456,7 +425,7 @@ class RandomQuarterBoxes(RandomBoxSampling):
@property
def label(self) -> str:
return f'Boxes random layout (quarter) ({self.numBoxes} boxes)'
return f'Boxes Random layout (quarter) ({self.numBoxes} boxes)'
def determine_max_achievable_frac(method: BoxSelectionSubsamplingMethod, numBoxes: int) -> float:
......
This diff is collapsed.
......@@ -172,3 +172,14 @@ def convert_length_to_pixels(dataset: dataset.DataSet, length: float) -> float:
pixelScale: float = (dataset.pixelscale_df if imgMode == 'df' else dataset.pixelscale_bf)
length /= pixelScale
return length
if __name__ == '__main__':
counts = [2600, 14662, 9472, 16533]
mpfracs = [4.7, 0.9, 0.5, 0.5]
errorMargins = [0.2]
sigma = 0.7
for count, mpFrac in zip(counts, mpfracs):
for margin in errorMargins:
angerFrac = get_Anger_fraction(count, sigma=sigma, mpFraction=mpFrac/100, errorMargin=margin)
print(count, mpFrac, margin, angerFrac, angerFrac / count)
\ No newline at end of file
......@@ -2,7 +2,7 @@ import os
import pickle
from evaluation import TotalResults
from helpers import timingDecorator
from chemometrics.chemometricMethods import TrainedSubsampling
def load_results(fname: str) -> TotalResults:
res: TotalResults = None
......@@ -17,6 +17,10 @@ def save_results(fname: str, result: TotalResults) -> None:
for sampleRes in result.sampleResults:
storedDsets[sampleRes.sampleName] = sampleRes.dataset
sampleRes.dataset = None
for subsamplingRes in sampleRes.results:
subsamplingRes.method.particleContainer = None
if type(subsamplingRes.method) == TrainedSubsampling:
subsamplingRes.method.clf = None
with open(fname, "wb") as fp:
pickle.dump(result, fp, protocol=-1)
......
......@@ -18,6 +18,15 @@ class SubsamplingMethod(object):
self.particleContainer = particleConatainer
self.fraction: float = desiredFraction
# @property
# def fraction(self) -> float:
# """
# The TrainedSubsampling, e.g., changes its fraction depending on the quality of its training.
# All "regular" methods just return the desired Fraction.
# :return:
# """
# return self.desiredFraction
@property
def label(self) -> str:
"""
......
pyqt5~=5.14.1
numpy~=1.18.1
pytest-qt~=3.3.0
opencv-python~=4.2.0.32
matplotlib~=3.1.3
scikit-learn~=0.22.1
scipy~=1.4.1
setuptools~=46.1.1
cython~=0.29.14
\ No newline at end of file
......@@ -4,35 +4,45 @@ import time
from evaluation import TotalResults, SampleResult
from input_output import get_pkls_from_directory, get_attributes_from_foldername, save_results, load_results
from graphs import get_error_vs_frac_plot
from graphs import get_error_vs_frac_plot, get_distance_hist_plots, get_error_vs_mpfrac_plot
"""
IMPORTANT!!!
SET GEPARD TO EVALUATION BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF THE LEGACY CONVERTS MIGHT FAIL..
SET GEPARD TO DEVELOPMENT BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF THE LEGACY CONVERTS MIGHT FAIL..
"""
if __name__ == '__main__':
# results: TotalResults = TotalResults()
# pklsInFolders = get_pkls_from_directory(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets')
#
# counter = 0
# for folder in pklsInFolders.keys():
# for samplePath in pklsInFolders[folder]:
# newSampleResult: SampleResult = results.add_sample(samplePath)
# for attr in get_attributes_from_foldername(folder):
# newSampleResult.set_attribute(attr)
# if counter < 50:
# newSampleResult: SampleResult = results.add_sample(samplePath)
# for attr in get_attributes_from_foldername(folder):
# newSampleResult.set_attribute(attr)
# counter += 1
#
# t0 = time.time()
# results.update_all()
# results.update_all(multiprocessing=True)
# print('updating all took', time.time()-t0, 'seconds')
#
# save_results('results2_without_rot.res', results)
results: TotalResults = load_results('results2.res')
# save_results('results5.res', results)
plot: Figure = get_error_vs_frac_plot(results, attributes=[[]],
methods=[['random subs', 'sizebin', '5 boxes', '15']], standarddevs=False)
# plot: Figure = get_error_vs_frac_plot(results, attributes=[['air', 'water'], ['sediment', 'soil', 'beach', 'slush']],
# methods=[['random layout (7', 'random layout (1']]*2)
# methods=[[]]*2)
# methods=[['Random Subsampling', 'Sizebin']] * 2)
# methods=[['layout (7', 'layout (10', 'layout (15', 'cross', 'random subsampling', 'sizebin']] * 2)
results: TotalResults = load_results('results4.res')
plot: Figure = get_error_vs_frac_plot(results,
# attributes=[],
# attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']],
partCounts=[(0, 2000), (2000, 5000), (5000, 10000), (10000, 40000)],
# methods=[['random subs', 'dummy']]*2,
# methods=[['random subsampling', 'random']]*2,
methods=[['layout (10', 'crossLayout (3', 'random subsampling', 'quarter) (10']] * 4,
poissonRef=False, fill=True)
plot.show()
#
# plot2: Figure = get_distance_hist_plots(results,
# attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']])
# plot2.show()
# plot3: Figure = get_error_vs_mpfrac_plot(results, attributes=[['air'], ['water'], ['sediment', 'soil', 'beach'], ['slush']])
# plot3.show()
\ No newline at end of file
import numpy as np
import random
import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
import gepard
from gepard.dataset import DataSet
from gepard.analysis.particleContainer import ParticleContainer
from gepard.analysis.particleAndMeasurement import Particle, Measurement
def setMaxDim(dataset: DataSet, imgSize: float, minX: float, maxX: float, minY: float, maxY: float) -> None:
......@@ -16,6 +18,7 @@ def get_default_DataSet() -> DataSet:
dset.imagedim_df = [10, 10]
dset.pixelscale_df = 1.0
setMaxDim(dset, 10, 0, 10, 0, 10)
dset.particleContainer = get_default_ParticleContainer()
return dset
......@@ -34,3 +37,39 @@ def get_default_ParticleContainer() -> ParticleContainer:
return particleContainer
def get_MP_particles(numParticles) -> list:
mpParticles = []
for _ in range(numParticles):
mpParticles.append(get_MP_particle())
return mpParticles
def get_non_MP_particles(numParticles) -> list:
nonMPParticles = []
for _ in range(numParticles):
nonMPParticles.append(get_non_MP_particle())
return nonMPParticles
def get_MP_particle() -> Particle:
random.seed(15203018)
polymerNames = ['Poly (methyl methacrylate',
'Polyethylene',
'Silicone rubber',
'PB15',
'PY13',
'PR20']
polymName = random.sample(polymerNames, 1)[0]
newParticle: Particle = Particle()
newMeas = Measurement()
newMeas.setAssignment(polymName)
newParticle.addMeasurement(newMeas)
return newParticle
def get_non_MP_particle() -> Particle:
newParticle: Particle = Particle()
newParticle.addMeasurement(Measurement())
return newParticle
......@@ -16,11 +16,38 @@ from gepard.analysis.particleContainer import ParticleContainer
from gepard import dataset
from chemometrics import chemometricMethods as cmeth
from helpers_for_test import get_default_ParticleContainer
from helpers_for_test import get_default_ParticleContainer, get_non_MP_particles, get_MP_particles
from evaluation import SubsamplingResult
class TestParticleFeatures(unittest.TestCase):
def test_image_features(self):
img: np.ndarray = np.zeros((100, 100, 3), dtype=np.uint8)
img[:, :, 0] = 255 # we just have a plain red image
imgFeatureVector: np.ndarray = cmeth.get_image_feature_vec(img)
self.assertEqual(imgFeatureVector.shape[0], 6)
def test_get_mean_color_and_stdev(self):
img: np.ndarray = np.zeros((100, 100, 3), dtype=np.uint8)
img[:, :, 0] = 255 # we just have a plain red image
meanStd = cmeth.get_mean_and_stdev(img)
self.assertTrue(np.array_equal(meanStd[0], np.array([255, 0, 0])))
self.assertTrue(np.array_equal(meanStd[1], np.array([0, 0, 0])))
img[:, :, 1] = 255
meanStd = cmeth.get_mean_and_stdev(img)
self.assertTrue(np.array_equal(meanStd[0], np.array([255, 255, 0])))
self.assertTrue(np.array_equal(meanStd[1], np.array([0, 0, 0])))
img[:50, :50, 0] = 128
meanStd = cmeth.get_mean_and_stdev(img)
meanRed: float = np.mean([128, 255, 255, 255])
stdRed: float = np.std([128, 255, 255, 255])
self.assertTrue(np.array_equal(meanStd[0], np.array([meanRed, 255, 0])))
self.assertTrue(np.array_equal(meanStd[1], np.array([stdRed, 0, 0])))
def test_get_contour_moments(self):
imgs = []
imgs: list = []
imgA: np.ndarray = np.zeros((200, 200), dtype=np.uint8)
cv2.putText(imgA, 'A', (25, 175), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=7, color=1, thickness=5)
......@@ -49,6 +76,25 @@ class TestParticleFeatures(unittest.TestCase):
diff: np.ndarray = moments[i, :] - np.mean(moments[i, :])
self.assertFalse(np.any(diff > 0.1))
def test_get_curvature_ft(self):
def get_cnt_from_img(binImg: np.ndarray) -> np.ndarray:
contours, hierarchy = cv2.findContours(binImg, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
return contours[0]
harmonics: list = []
for shape in [cv2.MORPH_CROSS, cv2.MORPH_ELLIPSE, cv2.MORPH_RECT]:
for size in [50, 500]:
padding = round(size/10)
img: np.ndarray = np.zeros((size + 2*padding, size + 2*padding))
img[padding:size+padding, padding:size+padding] = cv2.getStructuringElement(shape, (size, size))
cnt: np.ndarray = get_cnt_from_img(img.astype(np.uint8))
for numHarmonics in [2, 5, 15]:
dft: np.ndarray = cmeth.get_curvature_ft(cnt, numHarmonics=numHarmonics)
harmonics.append(dft)
self.assertEqual(dft.shape[0], numHarmonics)
def test_get_color_hash(self):
for color in ['red', 'green', 'violet', 'blue', 'Blue', 'non-determinable', None]:
for numDigits in [4, 6, 8]:
......@@ -68,6 +114,8 @@ class TestParticleFeatures(unittest.TestCase):
particleContainer: ParticleContainer = get_default_ParticleContainer()
features: np.ndarray = cmeth.get_particle_featurematrix(particleContainer)
self.assertEqual(features.shape[0], len(particleContainer.particles))
for entry in features[0, :]:
self.assertTrue(type(entry) in [float, np.float64])
class TestTrainedSubsampling(unittest.TestCase):
......@@ -79,8 +127,8 @@ class TestTrainedSubsampling(unittest.TestCase):
self.assertEqual(type(self.trainedSampling.label), str)
def test_load_classifier(self):
self.assertTrue(self.trainedSampling.clf is None)
self.assertTrue(self.trainedSampling.score is None)
# self.assertTrue(self.trainedSampling.clf is None)
# self.assertTrue(self.trainedSampling.score is None) # TODO: REIMPLEMENT
self.trainedSampling.clfPath = r'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.72.pkl'
self.trainedSampling._load_classifier()
......@@ -93,8 +141,6 @@ class TestTrainedSubsampling(unittest.TestCase):
self.assertEqual(self.trainedSampling.score, 0.7)
def test_get_measure_indices(self):
import time
t0 = time.time()
for mpFrac in [0.001, 0.01, 0.05]:
for numMPParticles in [1, 10, 100]:
numNonMP: int = int(numMPParticles * 1/mpFrac) - numMPParticles
......@@ -114,6 +160,40 @@ class TestTrainedSubsampling(unittest.TestCase):
for index in range(numMPParticles): # all MP Particles should be measured
self.assertTrue(index in indicesToMeasure)
# def test_get_theoretic_fraction(self):
# for frac in [0.1, 0.3, 0.5, 0.9, 1.0]:
# for score in [0.5, 0.7, 1.0]:
# self.trainedSampling.fraction = frac
# self.trainedSampling.score = score
#
# score: float = self.trainedSampling.score
# diff: float = 1 / self.trainedSampling.fraction - 1 # i.e., from 50 % score to 100 % score
# factor: float = 1 + (1 - score) / 0.5 * diff
# self.assertEqual(self.trainedSampling.get_theoretic_frac(), 1/factor)
# def test_make_subparticles_match_fraction(self):
# self.trainedSampling.desiredFraction = 0.5
# result: SubsamplingResult = SubsamplingResult(self.trainedSampling)
#
# allParticles: list = get_MP_particles(10) + get_non_MP_particles(990)
# subParticles: list = get_MP_particles(10) + get_non_MP_particles(490) # half of particles but ALL mp particles
# self.trainedSampling.particleContainer.particles = allParticles + subParticles
#
# self.trainedSampling.score = 1.0 # i.e., perfect prediction
# # modSubParticles: list = self.trainedSampling._make_subparticles_match_fraction(subParticles)
# result.add_result(subParticles, allParticles)
# self.assertEqual(result.mpCountError, 0)
#
# self.trainedSampling.score = 0.5 # i.e., completely random, no prediction quality
# # modSubParticles: list = self.trainedSampling._make_subparticles_match_fraction(subParticles)
# result.add_result(subParticles, allParticles)
# self.assertEqual(result.mpCountError, 100)
# class TestChemometricSubsampling(unittest.TestCase):
# def setUp(self) -> None:
......
This diff is collapsed.