Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
S
Subsampling
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Package Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Josef Brandt
Subsampling
Commits
7ca2dd08
Commit
7ca2dd08
authored
Apr 24, 2020
by
Josef Brandt
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
First Stage Trained Subsampling
parent
ec57b131
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
272 additions
and
98 deletions
+272
-98
chemometrics/chemometricMethods.py
chemometrics/chemometricMethods.py
+52
-6
chemometrics/classification.py
chemometrics/classification.py
+8
-2
evaluation.py
evaluation.py
+33
-20
input_output.py
input_output.py
+5
-1
methods.py
methods.py
+9
-0
subsampling.py
subsampling.py
+18
-16
tests/helpers_for_test.py
tests/helpers_for_test.py
+38
-0
tests/test_chemometricMethods.py
tests/test_chemometricMethods.py
+37
-3
tests/test_evaluation.py
tests/test_evaluation.py
+72
-50
No files found.
chemometrics/chemometricMethods.py
View file @
7ca2dd08
...
...
@@ -14,7 +14,9 @@ import sys
sys
.
path
.
append
(
"C://Users//xbrjos//Desktop//Python"
)
from
gepard.analysis.particleContainer
import
ParticleContainer
from
gepard.analysis
import
particleAndMeasurement
as
pm
from
gepard.analysis
import
particleCharacterization
as
pc
from
methods
import
SubsamplingMethod
from
helpers
import
timingDecorator
...
...
@@ -95,9 +97,9 @@ def get_solidity(contour: np.ndarray) -> float:
hull
:
np
.
ndarray
=
cv2
.
convexHull
(
contour
)
hull_area
:
float
=
cv2
.
contourArea
(
hull
)
if
area
==
0
or
hull_area
==
0
:
raise
ValueError
solidity
:
float
=
area
/
hull_area
solidity
:
float
=
0
else
:
solidity
:
float
=
area
/
hull_area
return
solidity
...
...
@@ -112,10 +114,11 @@ def get_aspect_ratio(contour: np.ndarray) -> float:
if
short
>
long
:
long
,
short
=
short
,
long
if
short
==
0.0
:
raise
InvalidParticleError
aspectRatio
:
float
=
1.0
if
short
>
0.0
:
aspectRatio
=
long
/
short
return
long
/
short
return
aspectRatio
def
get_extent
(
contour
:
np
.
ndarray
)
->
float
:
...
...
@@ -163,11 +166,42 @@ class TrainedSubsampling(SubsamplingMethod):
self
.
score
:
float
=
None
self
.
clf
=
None
self
.
clfPath
:
str
=
path
self
.
fraction
=
desiredFraction
# @property
# def fraction(self) -> float:
# return self.desiredFraction/2
def
equals
(
self
,
otherMethod
)
->
bool
:
isEqual
:
bool
=
False
if
type
(
otherMethod
)
==
TrainedSubsampling
and
otherMethod
.
fraction
==
self
.
fraction
:
if
otherMethod
.
score
==
self
.
score
and
otherMethod
.
clf
is
self
.
clf
:
isEqual
=
True
return
isEqual
@
property
def
label
(
self
)
->
str
:
return
'Trained Random Sampling'
def
get_maximum_achievable_fraction
(
self
)
->
float
:
return
1.0
def
apply_subsampling_method
(
self
)
->
list
:
self
.
_load_classifier
()
features
:
np
.
ndarray
=
get_particle_featurematrix
(
self
.
particleContainer
)
predictions
:
np
.
ndarray
=
self
.
clf
.
predict
(
features
)
indicesToSelect
:
set
=
self
.
_get_measure_indices
(
list
(
predictions
))
selectedParticles
:
list
=
[]
for
particle
in
self
.
particleContainer
.
particles
:
if
particle
.
index
in
indicesToSelect
:
selectedParticles
.
append
(
particle
)
return
selectedParticles
# def _make_subparticles_match_fraction(self, subParticles: list) -> list:
# return subParticles
def
_load_classifier
(
self
)
->
None
:
assert
os
.
path
.
exists
(
self
.
clfPath
)
fname
:
str
=
self
.
clfPath
...
...
@@ -195,6 +229,18 @@ class TrainedSubsampling(SubsamplingMethod):
return
indicesToMeasure
def
get_theoretic_frac
(
self
)
->
float
:
"""
The theoretical fraction that considers also the scoring of the trained model.
It is used for extrapolating the mpCount of the subsampled particle list.
:return:
"""
score
:
float
=
self
.
score
diff
:
float
=
1
/
self
.
fraction
-
1
# i.e., from 50 % score to 100 % score
factor
:
float
=
1
+
(
1
-
score
)
/
0.5
*
diff
return
1
/
factor
# return self.fraction
# class ChemometricSubsampling(SubsamplingMethod):
# # def __init__(self, particleContainer: ParticleContainer, desiredFraction: float):
...
...
chemometrics/classification.py
View file @
7ca2dd08
...
...
@@ -58,7 +58,7 @@ def test_classification_models(dataset: tuple) -> None:
if
__name__
==
'__main__'
:
recreateNew
:
bool
=
Tru
e
recreateNew
:
bool
=
Fals
e
if
recreateNew
:
pklsInFolders
:
dict
=
get_pkls_from_directory
(
r
'C:\Users\xbrjos\Desktop\temp MP\NewDatasets'
)
...
...
@@ -110,6 +110,12 @@ if __name__ == '__main__':
dset
:
tuple
=
pickle
.
load
(
fp
)
X
,
y
=
dset
with
open
(
r
'C:\Users\xbrjos\Desktop\Python\Subsampling\chemometrics\RandomForestClassifier, score 0.72.pkl'
,
"rb"
)
as
fp
:
clf
:
RandomForestClassifier
=
pickle
.
load
(
fp
)
y_predicted
=
clf
.
predict
(
X
)
# np.savetxt('Data.txt', X)
# np.savetxt('Assignments.txt', y)
# princComps = get_pca(X.transpose(), numComp=2)
...
...
@@ -121,4 +127,4 @@ if __name__ == '__main__':
# print(X_equalized.shape)
test_classification_models
((
X
,
y
))
#
test_classification_models((X, y))
evaluation.py
View file @
7ca2dd08
...
...
@@ -32,8 +32,8 @@ def get_methods_to_test(dataset: dataset.DataSet, fractions: list = [], maxTries
:return: list of measurement Objects that are applicable
"""
if
len
(
fractions
)
==
0
:
fractions
:
list
=
[
0.02
,
0.05
,
0.1
,
0.25
,
0.5
,
0.7
,
0.9
]
# fractions: list = [0.02, 0.1, 0.5, 0.9
]
#
fractions: list = [0.02, 0.05, 0.1, 0.25, 0.5, 0.7, 0.9]
fractions
:
list
=
[
0.1
,
0.3
,
0.5
]
methods
:
list
=
[]
particleContainer
=
dataset
.
particleContainer
...
...
@@ -46,6 +46,7 @@ def get_methods_to_test(dataset: dataset.DataSet, fractions: list = [], maxTries
methods
+=
boxCreator
.
get_spiralBoxSubsamplers_for_fraction
(
fraction
)
methods
+=
boxCreator
.
get_randomBoxSubsamplers_for_fraction
(
fraction
,
maxTries
=
maxTries
)
methods
+=
boxCreator
.
get_randomQuarterBoxSubsamplers_for_fraction
(
fraction
,
maxTries
=
maxTries
)
methods
.
append
(
cmeth
.
TrainedSubsampling
(
particleContainer
,
fraction
))
# methods.append(cmeth.ChemometricSubsampling(particleContainer, fraction))
return
methods
...
...
@@ -91,27 +92,34 @@ class TotalResults(object):
return
newResult
def
update_all
(
self
,
force
:
bool
=
False
)
->
None
:
def
update_all
(
self
,
force
:
bool
=
False
,
multiprocessing
:
bool
=
True
)
->
None
:
"""
Updates all samples with all methods and all fractions
:param force: Wether to force an update of an already existing method.
:param force: Whether to force an update of an already existing method.
:param multiprocessing: Whether to spawn multiple processes for computation
:return:
"""
forceList
:
list
=
[
force
]
*
len
(
self
.
sampleResults
)
indices
:
list
=
list
(
np
.
arange
(
len
(
self
.
sampleResults
)))
numSamples
:
int
=
len
(
forceList
)
numWorkers
:
int
=
4
# in case of quadcore processor that seams reasonable??
chunksize
:
int
=
int
(
round
(
numSamples
/
numWorkers
*
0.7
))
# we want to have slightly more chunks than workers
print
(
f
'multiprocessing with
{
numSamples
}
samples and chunksize of
{
chunksize
}
'
)
with
concurrent
.
futures
.
ProcessPoolExecutor
()
as
executor
:
results
=
executor
.
map
(
update_sample
,
self
.
sampleResults
,
forceList
,
indices
,
chunksize
=
chunksize
)
for
index
,
res
in
enumerate
(
results
):
updatedSample
,
processid
=
res
print
(
f
'returned from process
{
processid
}
, iteration index
{
index
}
'
)
self
.
sampleResults
[
index
]
=
updatedSample
if
multiprocessing
:
forceList
:
list
=
[
force
]
*
len
(
self
.
sampleResults
)
indices
:
list
=
list
(
np
.
arange
(
len
(
self
.
sampleResults
)))
numSamples
:
int
=
len
(
forceList
)
numWorkers
:
int
=
4
# in case of quadcore processor that seams reasonable??
chunksize
:
int
=
int
(
round
(
numSamples
/
numWorkers
*
0.7
))
# we want to have slightly more chunks than workers
print
(
f
'multiprocessing with
{
numSamples
}
samples and chunksize of
{
chunksize
}
'
)
with
concurrent
.
futures
.
ProcessPoolExecutor
()
as
executor
:
results
=
executor
.
map
(
update_sample
,
self
.
sampleResults
,
forceList
,
indices
,
chunksize
=
chunksize
)
for
index
,
res
in
enumerate
(
results
):
updatedSample
,
processid
=
res
print
(
f
'returned from process
{
processid
}
, iteration index
{
index
}
'
)
self
.
sampleResults
[
index
]
=
updatedSample
else
:
for
index
,
sampleResult
in
enumerate
(
self
.
sampleResults
):
updatedResult
,
i
=
update_sample
(
sampleResult
,
True
,
index
)
self
.
sampleResults
[
index
]
=
updatedResult
print
(
f
'done updating
{
updatedResult
.
dataset
.
name
}
at index
{
index
}
'
)
def
get_error_vs_fraction_data
(
self
,
attributes
:
list
=
[],
methods
:
list
=
[])
->
dict
:
"""
...
...
@@ -200,7 +208,12 @@ class SubsamplingResult(object):
:param subParticles:
:return:
"""
error
:
float
=
self
.
_get_mp_count_error
(
origParticles
,
subParticles
,
self
.
method
.
fraction
)
if
type
(
self
.
method
)
==
cmeth
.
TrainedSubsampling
:
fraction
=
self
.
method
.
get_theoretic_frac
()
else
:
fraction
=
self
.
method
.
fraction
error
:
float
=
self
.
_get_mp_count_error
(
origParticles
,
subParticles
,
fraction
)
self
.
origParticleCount
=
len
(
origParticles
)
self
.
mpCountErrors
.
append
(
error
)
...
...
input_output.py
View file @
7ca2dd08
...
...
@@ -2,7 +2,7 @@ import os
import
pickle
from
evaluation
import
TotalResults
from
helpers
import
timingDecorator
from
chemometrics.chemometricMethods
import
TrainedSubsampling
def
load_results
(
fname
:
str
)
->
TotalResults
:
res
:
TotalResults
=
None
...
...
@@ -17,6 +17,10 @@ def save_results(fname: str, result: TotalResults) -> None:
for
sampleRes
in
result
.
sampleResults
:
storedDsets
[
sampleRes
.
sampleName
]
=
sampleRes
.
dataset
sampleRes
.
dataset
=
None
for
subsamplingRes
in
sampleRes
.
results
:
subsamplingRes
.
method
.
particleContainer
=
None
if
type
(
subsamplingRes
.
method
)
==
TrainedSubsampling
:
subsamplingRes
.
method
.
clf
=
None
with
open
(
fname
,
"wb"
)
as
fp
:
pickle
.
dump
(
result
,
fp
,
protocol
=-
1
)
...
...
methods.py
View file @
7ca2dd08
...
...
@@ -18,6 +18,15 @@ class SubsamplingMethod(object):
self
.
particleContainer
=
particleConatainer
self
.
fraction
:
float
=
desiredFraction
# @property
# def fraction(self) -> float:
# """
# The TrainedSubsampling, e.g., changes its fraction depending on the quality of its training.
# All "regular" methods just return the desired Fraction.
# :return:
# """
# return self.desiredFraction
@
property
def
label
(
self
)
->
str
:
"""
...
...
subsampling.py
View file @
7ca2dd08
...
...
@@ -12,24 +12,26 @@ SET GEPARD TO EVALUATION BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF TH
"""
if
__name__
==
'__main__'
:
# results: TotalResults = TotalResults()
# pklsInFolders = get_pkls_from_directory(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets')
#
# for folder in pklsInFolders.keys():
# for samplePath in pklsInFolders[folder]:
# newSampleResult: SampleResult = results.add_sample(samplePath)
# for attr in get_attributes_from_foldername(folder):
# newSampleResult.set_attribute(attr)
#
# t0 = time.time()
# results.update_all()
# print('updating all took', time.time()-t0, 'seconds')
#
# save_results('results2_without_rot.res', results)
results
:
TotalResults
=
load_results
(
'results2.res'
)
results
:
TotalResults
=
TotalResults
()
pklsInFolders
=
get_pkls_from_directory
(
r
'C:\Users\xbrjos\Desktop\temp MP\NewDatasets'
)
counter
=
0
for
folder
in
pklsInFolders
.
keys
():
for
samplePath
in
pklsInFolders
[
folder
]:
if
counter
<
10
:
newSampleResult
:
SampleResult
=
results
.
add_sample
(
samplePath
)
for
attr
in
get_attributes_from_foldername
(
folder
):
newSampleResult
.
set_attribute
(
attr
)
counter
+=
1
t0
=
time
.
time
()
results
.
update_all
(
multiprocessing
=
False
)
print
(
'updating all took'
,
time
.
time
()
-
t0
,
'seconds'
)
save_results
(
'results_test.res'
,
results
)
# results: TotalResults = load_results('results2.res')
plot
:
Figure
=
get_error_vs_frac_plot
(
results
,
attributes
=
[[]],
methods
=
[[
'random
subs'
,
'sizebin'
,
'5 boxes'
,
'15'
]],
standarddevs
=
Fals
e
)
methods
=
[[
'random
'
,
'trained'
]],
standarddevs
=
Tru
e
)
# plot: Figure = get_error_vs_frac_plot(results, attributes=[['air', 'water'], ['sediment', 'soil', 'beach', 'slush']],
# methods=[['random layout (7', 'random layout (1']]*2)
# methods=[[]]*2)
...
...
tests/helpers_for_test.py
View file @
7ca2dd08
import
numpy
as
np
import
random
import
sys
sys
.
path
.
append
(
"C://Users//xbrjos//Desktop//Python"
)
import
gepard
from
gepard.dataset
import
DataSet
from
gepard.analysis.particleContainer
import
ParticleContainer
from
gepard.analysis.particleAndMeasurement
import
Particle
,
Measurement
def
setMaxDim
(
dataset
:
DataSet
,
imgSize
:
float
,
minX
:
float
,
maxX
:
float
,
minY
:
float
,
maxY
:
float
)
->
None
:
...
...
@@ -34,3 +36,39 @@ def get_default_ParticleContainer() -> ParticleContainer:
return
particleContainer
def
get_MP_particles
(
numParticles
)
->
list
:
mpParticles
=
[]
for
_
in
range
(
numParticles
):
mpParticles
.
append
(
get_MP_particle
())
return
mpParticles
def
get_non_MP_particles
(
numParticles
)
->
list
:
nonMPParticles
=
[]
for
_
in
range
(
numParticles
):
nonMPParticles
.
append
(
get_non_MP_particle
())
return
nonMPParticles
def
get_MP_particle
()
->
Particle
:
random
.
seed
(
15203018
)
polymerNames
=
[
'Poly (methyl methacrylate'
,
'Polyethylene'
,
'Silicone rubber'
,
'PB15'
,
'PY13'
,
'PR20'
]
polymName
=
random
.
sample
(
polymerNames
,
1
)[
0
]
newParticle
:
Particle
=
Particle
()
newMeas
=
Measurement
()
newMeas
.
setAssignment
(
polymName
)
newParticle
.
addMeasurement
(
newMeas
)
return
newParticle
def
get_non_MP_particle
()
->
Particle
:
newParticle
:
Particle
=
Particle
()
newParticle
.
addMeasurement
(
Measurement
())
return
newParticle
tests/test_chemometricMethods.py
View file @
7ca2dd08
...
...
@@ -16,7 +16,9 @@ from gepard.analysis.particleContainer import ParticleContainer
from
gepard
import
dataset
from
chemometrics
import
chemometricMethods
as
cmeth
from
helpers_for_test
import
get_default_ParticleContainer
from
helpers_for_test
import
get_default_ParticleContainer
,
get_non_MP_particles
,
get_MP_particles
from
evaluation
import
SubsamplingResult
class
TestParticleFeatures
(
unittest
.
TestCase
):
def
test_get_contour_moments
(
self
):
...
...
@@ -93,8 +95,6 @@ class TestTrainedSubsampling(unittest.TestCase):
self
.
assertEqual
(
self
.
trainedSampling
.
score
,
0.7
)
def
test_get_measure_indices
(
self
):
import
time
t0
=
time
.
time
()
for
mpFrac
in
[
0.001
,
0.01
,
0.05
]:
for
numMPParticles
in
[
1
,
10
,
100
]:
numNonMP
:
int
=
int
(
numMPParticles
*
1
/
mpFrac
)
-
numMPParticles
...
...
@@ -114,6 +114,40 @@ class TestTrainedSubsampling(unittest.TestCase):
for
index
in
range
(
numMPParticles
):
# all MP Particles should be measured
self
.
assertTrue
(
index
in
indicesToMeasure
)
def
test_get_theoretic_fraction
(
self
):
for
frac
in
[
0.1
,
0.3
,
0.5
,
0.9
,
1.0
]:
for
score
in
[
0.5
,
0.7
,
1.0
]:
self
.
trainedSampling
.
fraction
=
frac
self
.
trainedSampling
.
score
=
score
score
:
float
=
self
.
trainedSampling
.
score
diff
:
float
=
1
/
self
.
trainedSampling
.
fraction
-
1
# i.e., from 50 % score to 100 % score
factor
:
float
=
1
+
(
1
-
score
)
/
0.5
*
diff
self
.
assertEqual
(
self
.
trainedSampling
.
get_theoretic_frac
(),
1
/
factor
)
# def test_make_subparticles_match_fraction(self):
# self.trainedSampling.desiredFraction = 0.5
# result: SubsamplingResult = SubsamplingResult(self.trainedSampling)
#
# allParticles: list = get_MP_particles(10) + get_non_MP_particles(990)
# subParticles: list = get_MP_particles(10) + get_non_MP_particles(490) # half of particles but ALL mp particles
# self.trainedSampling.particleContainer.particles = allParticles + subParticles
#
# self.trainedSampling.score = 1.0 # i.e., perfect prediction
# # modSubParticles: list = self.trainedSampling._make_subparticles_match_fraction(subParticles)
# result.add_result(subParticles, allParticles)
# self.assertEqual(result.mpCountError, 0)
#
# self.trainedSampling.score = 0.5 # i.e., completely random, no prediction quality
# # modSubParticles: list = self.trainedSampling._make_subparticles_match_fraction(subParticles)
# result.add_result(subParticles, allParticles)
# self.assertEqual(result.mpCountError, 100)
# class TestChemometricSubsampling(unittest.TestCase):
# def setUp(self) -> None:
...
...
tests/test_evaluation.py
View file @
7ca2dd08
...
...
@@ -7,18 +7,16 @@ Created on Wed Jan 22 13:58:25 2020
"""
import
unittest
import
random
import
numpy
as
np
import
sys
sys
.
path
.
append
(
"C://Users//xbrjos//Desktop//Python"
)
import
gepard
from
gepard.analysis.particleContainer
import
ParticleContainer
from
gepard.analysis.particleAndMeasurement
import
Particle
,
Measurement
from
evaluation
import
TotalResults
,
SampleResult
,
SubsamplingResult
,
get_methods_to_test
import
methods
as
meth
import
geometricMethods
as
gmeth
from
helpers_for_test
import
get_default_ParticleContainer
,
get_default_DataSet
from
chemometrics.chemometricMethods
import
TrainedSubsampling
from
helpers_for_test
import
get_default_ParticleContainer
,
get_default_DataSet
,
get_MP_particles
,
get_non_MP_particles
,
get_MP_particle
,
get_non_MP_particle
class
TestTotalResults
(
unittest
.
TestCase
):
...
...
@@ -260,7 +258,7 @@ class TestSampleResult(unittest.TestCase):
possibleSpiralBoxMethods
=
3
possibleRandomBoxMethods
=
3
possibleQuarterRandomBoxMethods
=
3
possibleChemometricMethods
=
0
possibleChemometricMethods
=
1
totalPossible
=
possibleCrossBoxMethods
+
possibleRandomMethods
+
\
possibleSpiralBoxMethods
+
possibleChemometricMethods
+
\
possibleRandomBoxMethods
+
possibleQuarterRandomBoxMethods
...
...
@@ -279,7 +277,7 @@ class TestSampleResult(unittest.TestCase):
possibleSpiralBoxMethods
=
0
possibleRandomBoxMethods
=
0
possibleQuarterRandomBoxMethods
=
0
possibleChemometricMethods
=
0
possibleChemometricMethods
=
1
totalPossible
=
possibleCrossBoxMethods
+
possibleRandomMethods
+
\
possibleSpiralBoxMethods
+
possibleChemometricMethods
+
\
possibleRandomBoxMethods
+
possibleQuarterRandomBoxMethods
...
...
@@ -296,7 +294,7 @@ class TestSampleResult(unittest.TestCase):
possibleSpiralBoxMethods
=
0
possibleRandomBoxMethods
=
0
possibleQuarterRandomBoxMethods
=
0
possibleChemometricMethods
=
0
possibleChemometricMethods
=
1
totalPossible
=
possibleCrossBoxMethods
+
possibleRandomMethods
+
\
possibleSpiralBoxMethods
+
possibleChemometricMethods
+
\
possibleRandomBoxMethods
+
possibleQuarterRandomBoxMethods
...
...
@@ -311,7 +309,7 @@ class TestSampleResult(unittest.TestCase):
possibleRandomMethods
=
4
possibleCrossBoxMethods
=
3
possibleSpiralBoxMethods
=
3
possibleChemometricMethods
=
0
possibleChemometricMethods
=
2
possibleRandomBoxMethods
=
3
possibleQuarterRandomBoxMethods
=
3
totalPossible
=
possibleCrossBoxMethods
+
possibleRandomMethods
+
\
...
...
@@ -372,8 +370,8 @@ class TestSubsamplingResult(unittest.TestCase):
def
test_add_result
(
self
):
self
.
assertEqual
(
len
(
self
.
subsamplingResult
.
mpCountErrors
),
0
)
origParticles
:
list
=
self
.
_
get_MP_particles
(
100
)
subParticles
:
list
=
self
.
_
get_MP_particles
(
15
)
# at fraction of 0.1, 10 particles would be expected
origParticles
:
list
=
get_MP_particles
(
100
)
subParticles
:
list
=
get_MP_particles
(
15
)
# at fraction of 0.1, 10 particles would be expected
self
.
subsamplingResult
.
add_result
(
origParticles
,
subParticles
)
self
.
assertEqual
(
len
(
self
.
subsamplingResult
.
mpCountErrors
),
1
)
self
.
assertEqual
(
self
.
subsamplingResult
.
mpCountErrors
[
0
],
50
)
...
...
@@ -381,7 +379,7 @@ class TestSubsamplingResult(unittest.TestCase):
self
.
assertEqual
(
self
.
subsamplingResult
.
estimMPCounts
,
[
150
])
self
.
assertAlmostEqual
(
self
.
subsamplingResult
.
estimMPCount
,
150
)
subParticles
=
self
.
_
get_MP_particles
(
10
)
# at fraction of 0.1, 10 particles would be expected
subParticles
=
get_MP_particles
(
10
)
# at fraction of 0.1, 10 particles would be expected
self
.
subsamplingResult
.
add_result
(
origParticles
,
subParticles
)
self
.
assertEqual
(
len
(
self
.
subsamplingResult
.
mpCountErrors
),
2
)
self
.
assertEqual
(
self
.
subsamplingResult
.
mpCountErrors
[
0
],
50
)
...
...
@@ -390,6 +388,20 @@ class TestSubsamplingResult(unittest.TestCase):
self
.
assertEqual
(
self
.
subsamplingResult
.
estimMPCounts
,
[
150
,
100
])
self
.
assertEqual
(
self
.
subsamplingResult
.
estimMPCount
,
125
)
self
.
subsamplingResult
.
mpCountErrors
=
[]
self
.
subsamplingResult
.
estimMPCounts
=
[]
# TODO: REIMPLEMENT, WHEN trained sampling IS WORKING
# trainedSampling: TrainedSubsampling = TrainedSubsampling(get_default_ParticleContainer, 0.1)
# trainedSampling.score = 0.5
# self.subsamplingResult.method = trainedSampling
# self.subsamplingResult.add_result(origParticles, subParticles)
# self.assertEqual(self.subsamplingResult.mpCountErrors[0], 0)
#
# trainedSampling.score = 1.0
# self.subsamplingResult.add_result(origParticles, subParticles)
# self.assertEqual(self.subsamplingResult.mpCountErrors[1], 50)
def
test_reset_results
(
self
):
self
.
subsamplingResult
.
mpCountErrors
=
[
10
,
30
,
20
]
self
.
subsamplingResult
.
estimMPCounts
=
[
2
,
5
,
3
]
...
...
@@ -403,12 +415,12 @@ class TestSubsamplingResult(unittest.TestCase):
subParticles
=
[]
for
particleSize
in
particleSizes
:
for
_
in
range
(
numParticlesPerSizeFull
):
mpParticle
=
self
.
_
get_MP_particle
()
mpParticle
=
get_MP_particle
()
mpParticle
.
longSize
=
mpParticle
.
shortSize
=
particleSize
allParticles
.
append
(
mpParticle
)
for
_
in
range
(
numParticlesPerSizeSub
):
mpParticle
=
self
.
_
get_MP_particle
()
mpParticle
=
get_MP_particle
()
mpParticle
.
longSize
=
mpParticle
.
shortSize
=
particleSize
subParticles
.
append
(
mpParticle
)
...
...
@@ -437,10 +449,10 @@ class TestSubsamplingResult(unittest.TestCase):
self
.
assertEqual
(
binError
,
0
)
def
test_get_number_of_MP_particles
(
self
):
mpParticles
=
self
.
_
get_MP_particles
(
5
)
mpParticles
=
get_MP_particles
(
5
)
numMPParticles
=
len
(
mpParticles
)
nonMPparticles
=
self
.
_
get_non_MP_particles
(
50
)
nonMPparticles
=
get_non_MP_particles
(
50
)
allParticles
=
mpParticles
+
nonMPparticles
...
...
@@ -448,16 +460,16 @@ class TestSubsamplingResult(unittest.TestCase):
self
.
assertEqual
(
numMPParticles
,
calculatedNumMPParticles
)
def
test_get_mp_count_error
(
self
):
mpParticles1
=
self
.
_
get_MP_particles
(
20
)
nonMPparticles1
=
self
.
_
get_non_MP_particles
(
20
)
mpParticles1
=
get_MP_particles
(
20
)
nonMPparticles1
=
get_non_MP_particles
(
20
)
origParticles
=
mpParticles1
+
nonMPparticles1
mpParticles2
=
self
.
_
get_MP_particles
(
30
)
mpParticles2
=
get_MP_particles
(
30
)
estimateParticles
=
mpParticles2
+
nonMPparticles1
mpCountError
=
self
.
subsamplingResult
.
_get_mp_count_error
(
origParticles
,
estimateParticles
,
1.0
)
self
.
assertEqual
(
mpCountError
,
50
)
mpParticles2
=
self
.
_
get_MP_particles
(
20
)
mpParticles2
=
get_MP_particles
(
20
)
estimateParticles
=
mpParticles2
+
nonMPparticles1
mpCountError
=
self
.
subsamplingResult
.
_get_mp_count_error
(
origParticles
,
estimateParticles
,
1.0
)
self
.
assertEqual
(
mpCountError
,
0
)
...
...
@@ -492,35 +504,45 @@ class TestSubsamplingResult(unittest.TestCase):
exact
,
estimate
=
100
,
150
error
=
self
.
subsamplingResult
.
_get_error_from_values
(
exact
,
estimate
)
self
.
assertEqual
(
error
,
50
)
exact
,
estimate
=
100
,
200
error
=
self
.
subsamplingResult
.
_get_error_from_values
(
exact
,
estimate
)
self
.
assertEqual
(
error
,
100
)
exact
,
estimate
=
50
,
100
error
=
self
.
subsamplingResult
.
_get_error_from_values
(
exact
,
estimate
)
self
.
assertEqual
(
error
,
100
)
def
_get_MP_particles
(
self
,
numParticles
)
->
list
:
mpParticles
=
[]
for
_
in
range
(
numParticles
):
mpParticles
.
append
(
self
.
_get_MP_particle
())
return
mpParticles
def
_get_non_MP_particles
(
self
,
numParticles
)
->
list
:
nonMPParticles
=
[]
for
_
in
range
(
numParticles
):
nonMPParticles
.
append
(
self
.
_get_non_MP_particle
())
return
nonMPParticles
def
_get_MP_particle
(
self
)
->
Particle
:
random
.
seed
(
15203018
)
polymerNames
=
[
'Poly (methyl methacrylate'
,
'Polyethylene'
,
'Silicone rubber'
,
'PB15'
,
'PY13'
,
'PR20'
]
polymName
=
random
.
sample
(
polymerNames
,
1
)[
0
]
newParticle
:
Particle
=
Particle
()
newMeas
=
Measurement
()
newMeas
.
setAssignment
(
polymName
)
newParticle
.
addMeasurement
(
newMeas
)
return
newParticle
def
_get_non_MP_particle
(
self
)
->
Particle
:
newParticle
:
Particle
=
Particle
()
newParticle
.
addMeasurement
(
Measurement
())
return
newParticle
#
def _get_MP_particles(self, numParticles) -> list:
#
mpParticles = []
#
for _ in range(numParticles):
#
mpParticles.append(self._get_MP_particle())
#
return mpParticles
#
#
def _get_non_MP_particles(self, numParticles) -> list:
#
nonMPParticles = []
#
for _ in range(numParticles):
#
nonMPParticles.append(self._get_non_MP_particle())
#
return nonMPParticles
#
#
def _get_MP_particle(self) -> Particle:
#
random.seed(15203018)
#
polymerNames = ['Poly (methyl methacrylate',
#
'Polyethylene',
#
'Silicone rubber',
#
'PB15',
#
'PY13',
#
'PR20']
#
polymName = random.sample(polymerNames, 1)[0]
#
newParticle: Particle = Particle()
#
newMeas = Measurement()
#
newMeas.setAssignment(polymName)