Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Open sidebar
Josef Brandt
Subsampling
Commits
c509aa9c
Commit
c509aa9c
authored
Mar 16, 2020
by
Josef Brandt
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Filter results for sample attributes and/or methods
parent
5e54cb68
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
155 additions
and
35 deletions
+155
-35
.gitignore
.gitignore
+2
-0
evaluation.py
evaluation.py
+30
-14
methods.py
methods.py
+25
-1
subsampling.py
subsampling.py
+20
-16
tests/test_evaluation.py
tests/test_evaluation.py
+35
-3
tests/test_methods.py
tests/test_methods.py
+43
-1
No files found.
.gitignore
View file @
c509aa9c
...
...
@@ -4,3 +4,5 @@
__pycache__/
*.png
*.res
evaluation.py
View file @
c509aa9c
...
...
@@ -9,6 +9,7 @@ import pickle
import
sys
import
os
import
numpy
as
np
import
matplotlib.pyplot
as
plt
from
helpers
import
ParticleBinSorter
import
methods
as
meth
...
...
@@ -60,17 +61,23 @@ class TotalResults(object):
sample
.
update_result_with_method
(
curMethod
)
print
(
f
'processed
{
index
+
1
}
of
{
len
(
self
.
sampleResults
)
}
samples'
)
def
get_error_vs_fraction_data
(
self
)
->
dict
:
def
get_error_vs_fraction_data
(
self
,
attributes
:
list
=
[],
methods
:
list
=
[]
)
->
dict
:
"""
Returns Dict: Key: Method Label, Value: (Dict: Key:Measured Fraction, Value: averaged MPCountError over all samples)
:param attributes: A list of attributes that should be used for filtering the samples. Only samples with an
attribute from within that list are considered.
:return:
"""
result
:
dict
=
{}
for
sample
in
self
.
sampleResults
:
sample
:
SampleResult
=
sample
if
attributes
==
[]
or
sample
.
has_any_attribute
(
attributes
):
for
res
in
sample
.
results
:
res
:
SubsamplingResult
=
res
label
:
str
=
res
.
method
.
label
frac
:
float
=
res
.
method
.
fraction
method
:
meth
.
SubsamplingMethod
=
res
.
method
if
methods
==
[]
or
method
.
matches_any_pattern
(
methods
):
label
:
str
=
method
.
label
frac
:
float
=
method
.
fraction
error
:
float
=
res
.
mpCountError
if
label
not
in
result
.
keys
():
...
...
@@ -151,8 +158,17 @@ class SampleResult(object):
self
.
attributes
.
append
(
newAttribute
)
print
(
f
'sample
{
self
.
filepath
}
has now attribute
{
newAttribute
}
'
)
def
has_any_attribute
(
self
,
listOfAttributes
:
list
)
->
bool
:
hasAttr
:
bool
=
False
for
attr
in
listOfAttributes
:
if
self
.
has_attribute
(
attr
):
hasAttr
=
True
break
return
hasAttr
def
has_attribute
(
self
,
attribute
:
str
)
->
bool
:
return
attribute
in
self
.
attributes
attributes
:
list
=
[
attr
.
lower
()
for
attr
in
self
.
attributes
]
return
attribute
.
lower
()
in
attributes
def
_remove_result_of_method
(
self
,
method
:
meth
.
SubsamplingMethod
)
->
None
:
"""
...
...
methods.py
View file @
c509aa9c
...
...
@@ -41,6 +41,30 @@ class SubsamplingMethod(object):
"""
raise
NotImplementedError
def
matches_any_pattern
(
self
,
patternList
:
list
)
->
bool
:
"""
Tests. wether one of the given patterns is matching.
:param patternList:
:return:
"""
matches
:
bool
=
False
for
pattern
in
patternList
:
if
self
.
matches_pattern
(
pattern
):
matches
=
True
break
return
matches
def
matches_pattern
(
self
,
pattern
:
str
)
->
bool
:
"""
Tests, wether the method matches a given pattern. Strings of at least 4 characters are required!
:param pattern: The string to test against
:return matchesThePattern:
"""
matches
:
bool
=
False
if
len
(
pattern
)
>
3
and
not
pattern
==
'layout'
:
matches
=
(
self
.
label
.
lower
().
find
(
pattern
.
lower
())
!=
-
1
)
return
matches
class
RandomSampling
(
SubsamplingMethod
):
@
property
...
...
@@ -67,7 +91,7 @@ class SizeBinFractioning(SubsamplingMethod):
@
property
def
label
(
self
)
->
str
:
return
'SizeBin
Random
Subsampling'
return
'SizeBin Subsampling'
def
apply_subsampling_method
(
self
)
->
list
:
subParticlesPerBin
:
list
=
self
.
_get_subParticles_per_bin
(
self
.
particleContainer
.
particles
)
...
...
subsampling.py
View file @
c509aa9c
...
...
@@ -2,7 +2,7 @@ import matplotlib.pyplot as plt
import
time
from
evaluation
import
TotalResults
,
SampleResult
from
input_output
import
get_pkls_from_directory
,
get_attributes_from_foldername
from
input_output
import
get_pkls_from_directory
,
get_attributes_from_foldername
,
save_results
,
load_results
"""
IMPORTANT!!!
...
...
@@ -10,29 +10,33 @@ SET GEPARD TO EVALUATION BRANCH (WITHOUT THE TILING STUFF), OTHERWISE SOME OF TH
"""
results
:
TotalResults
=
TotalResults
()
pklsInFolders
=
get_pkls_from_directory
(
r
'C:\Users\xbrjos\Desktop\temp MP\NewDatasets'
)
for
folder
in
pklsInFolders
.
keys
():
for
samplePath
in
pklsInFolders
[
folder
]:
newSampleResult
:
SampleResult
=
results
.
add_sample
(
samplePath
)
for
attr
in
get_attributes_from_foldername
(
folder
):
newSampleResult
.
set_attribute
(
attr
)
t0
=
time
.
time
()
results
.
update_all
()
print
(
'updating all took'
,
time
.
time
()
-
t0
,
'seconds'
)
errorPerFraction
:
dict
=
results
.
get_error_vs_fraction_data
()
# results: TotalResults = TotalResults()
# pklsInFolders = get_pkls_from_directory(r'C:\Users\xbrjos\Desktop\temp MP\NewDatasets')
#
# for folder in pklsInFolders.keys():
# for samplePath in pklsInFolders[folder]:
# newSampleResult: SampleResult = results.add_sample(samplePath)
# for attr in get_attributes_from_foldername(folder):
# newSampleResult.set_attribute(attr)
#
# t0 = time.time()
# results.update_all()
# print('updating all took', time.time()-t0, 'seconds')
#
# save_results('results1.res', results)
results
:
TotalResults
=
load_results
(
'results1.res'
)
errorPerFraction
:
dict
=
results
.
get_error_vs_fraction_data
(
methods
=
[
'spiral'
,
'cross'
])
plt
.
clf
()
for
methodLabel
in
errorPerFraction
.
keys
():
fractions
:
list
=
list
(
errorPerFraction
[
methodLabel
].
keys
())
errors
:
list
=
list
(
errorPerFraction
[
methodLabel
].
values
())
plt
.
plot
(
fractions
,
errors
,
label
=
methodLabel
)
plt
.
title
(
'Spiral or Box Layouts'
)
plt
.
xscale
(
'log'
)
plt
.
xlabel
(
'measured fraction'
)
plt
.
ylabel
(
'mpCountError'
)
plt
.
legend
()
plt
.
show
()
print
(
'done'
)
\ No newline at end of file
tests/test_evaluation.py
View file @
c509aa9c
...
...
@@ -95,7 +95,9 @@ class TestTotalResults(unittest.TestCase):
def
test_get_error_vs_fraction_data
(
self
):
firstSample
:
SampleResult
=
self
.
totalResults
.
add_sample
(
'sample1.pkl'
)
firstSample
.
set_attribute
(
'to be used'
)
secondSample
:
SampleResult
=
self
.
totalResults
.
add_sample
(
'sample2.pkl'
)
secondSample
.
set_attribute
(
'not to be used'
)
firstMethod
:
meth
.
RandomSampling
=
meth
.
RandomSampling
(
None
,
0.1
)
firstResult
:
SubsamplingResult
=
SubsamplingResult
(
firstMethod
)
...
...
@@ -141,9 +143,30 @@ class TestTotalResults(unittest.TestCase):
self
.
assertEqual
(
list
(
res
.
keys
()),
[
0.1
,
0.2
])
self
.
assertAlmostEqual
(
res
[
0.1
],
0.6
)
# i.e., mean([0.4, 0.8])
self
.
assertAlmostEqual
(
res
[
0.2
],
0.5
)
# if i == 3:
# self.assertEqual(list(res.keys()), [0.1, 0.2])
# self.assertAlmostEqual(res[0.1], )
filteredResultDict
:
dict
=
self
.
totalResults
.
get_error_vs_fraction_data
(
attributes
=
[
'to be used'
])
self
.
assertEqual
(
list
(
filteredResultDict
.
keys
()),
[
firstMethod
.
label
,
secondMethod
.
label
,
thirdMethod
.
label
])
for
i
in
range
(
3
):
res
:
dict
=
list
(
filteredResultDict
.
values
())[
i
]
if
i
==
0
:
self
.
assertEqual
(
list
(
res
.
keys
()),
[
0.1
])
self
.
assertAlmostEqual
(
res
[
0.1
],
0.8
)
if
i
==
1
:
self
.
assertEqual
(
list
(
res
.
keys
()),
[
0.1
])
self
.
assertAlmostEqual
(
res
[
0.1
],
0.6
)
if
i
==
2
:
self
.
assertEqual
(
list
(
res
.
keys
()),
[
0.1
,
0.2
])
self
.
assertAlmostEqual
(
res
[
0.1
],
0.4
)
# only the result from the first sample is used, as filtered..
self
.
assertAlmostEqual
(
res
[
0.2
],
0.5
)
filteredResultDict
:
dict
=
self
.
totalResults
.
get_error_vs_fraction_data
(
methods
=
[
'cross'
])
self
.
assertEqual
(
list
(
filteredResultDict
.
keys
()),
[
secondMethod
.
label
,
thirdMethod
.
label
])
filteredResultDict
:
dict
=
self
.
totalResults
.
get_error_vs_fraction_data
(
methods
=
[
'Cross'
])
self
.
assertEqual
(
list
(
filteredResultDict
.
keys
()),
[
secondMethod
.
label
,
thirdMethod
.
label
])
filteredResultDict
:
dict
=
self
.
totalResults
.
get_error_vs_fraction_data
(
methods
=
[
'random'
])
self
.
assertEqual
(
list
(
filteredResultDict
.
keys
()),
[
firstMethod
.
label
])
class
TestSampleResult
(
unittest
.
TestCase
):
...
...
@@ -212,6 +235,8 @@ class TestSampleResult(unittest.TestCase):
def
test_attributes
(
self
):
self
.
sampleResult
.
set_attribute
(
'soil'
)
self
.
assertTrue
(
self
.
sampleResult
.
has_attribute
(
'soil'
))
self
.
assertTrue
(
self
.
sampleResult
.
has_attribute
(
'Soil'
))
# we want to be case insensitive
self
.
assertTrue
(
self
.
sampleResult
.
has_attribute
(
'SOIL'
))
self
.
sampleResult
.
set_attribute
(
'soil'
)
# the attribute is already there and shall not be added again
self
.
assertEqual
(
len
(
self
.
sampleResult
.
attributes
),
1
)
...
...
@@ -220,6 +245,13 @@ class TestSampleResult(unittest.TestCase):
self
.
assertEqual
(
len
(
self
.
sampleResult
.
attributes
),
2
)
self
.
assertTrue
(
self
.
sampleResult
.
has_attribute
(
'10µmFilter'
))
self
.
assertTrue
(
self
.
sampleResult
.
has_any_attribute
([
'soil'
,
'water'
]))
self
.
assertTrue
(
self
.
sampleResult
.
has_any_attribute
([
'soil'
,
'water'
,
'10µmFilter'
]))
self
.
assertTrue
(
self
.
sampleResult
.
has_any_attribute
([
'water'
,
'10µmFilter'
]))
self
.
assertFalse
(
self
.
sampleResult
.
has_any_attribute
([
'water'
,
'sediment'
]))
self
.
assertFalse
(
self
.
sampleResult
.
has_any_attribute
([
'beach'
]))
class
TestSubsamplingResult
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
subsamplingResult
:
SubsamplingResult
=
SubsamplingResult
(
meth
.
RandomSampling
(
None
,
0.1
))
...
...
tests/test_methods.py
View file @
c509aa9c
...
...
@@ -67,7 +67,7 @@ class TestSizeBinFractioning(unittest.TestCase):
class
TestMethodEquality
(
unittest
.
TestCase
):
def
test_methodEquality
(
self
):
def
test_methodEquality
_and_patterns
(
self
):
method1_1
:
RandomSampling
=
RandomSampling
(
None
,
0.1
)
method1_2
:
RandomSampling
=
RandomSampling
(
None
,
0.2
)
...
...
@@ -103,3 +103,45 @@ class TestMethodEquality(unittest.TestCase):
self
.
assertTrue
(
method1
.
equals
(
method2
))
else
:
self
.
assertFalse
(
method1
.
equals
(
method2
))
randomPatterns
:
list
=
[
'random'
,
'ranDOm'
]
sizeBinPatterns
:
list
=
[
'size'
,
'Size'
,
'sizeBin'
]
crossBoxPatterns
:
list
=
[
'cross'
,
'crossLayout'
]
spiralBoxPatterns
:
list
=
[
'spiral'
,
'spiralLayout'
]
antiPatterns
:
list
=
[
'bin'
,
'box'
,
'crossBox'
,
'layout'
]
# pattern 'layout' is ambiguous...
for
randomMeth
in
[
method1_1
,
method1_2
]:
for
pos
in
randomPatterns
:
self
.
assertTrue
(
randomMeth
.
matches_pattern
(
pos
))
negPatterns
=
sizeBinPatterns
+
crossBoxPatterns
+
spiralBoxPatterns
+
antiPatterns
for
neg
in
negPatterns
:
self
.
assertFalse
(
randomMeth
.
matches_pattern
(
neg
))
self
.
assertTrue
(
randomMeth
.
matches_any_pattern
(
randomPatterns
+
negPatterns
))
self
.
assertFalse
(
randomMeth
.
matches_any_pattern
(
negPatterns
))
for
sizeBinMeth
in
[
method2_1
,
method2_2
]:
for
pos
in
sizeBinPatterns
:
self
.
assertTrue
(
sizeBinMeth
.
matches_pattern
(
pos
))
negPatterns
=
randomPatterns
+
crossBoxPatterns
+
spiralBoxPatterns
+
antiPatterns
for
neg
in
negPatterns
:
self
.
assertFalse
(
sizeBinMeth
.
matches_pattern
(
neg
))
self
.
assertTrue
(
sizeBinMeth
.
matches_any_pattern
(
sizeBinPatterns
+
negPatterns
))
self
.
assertFalse
(
sizeBinMeth
.
matches_any_pattern
(
negPatterns
))
for
crossBoxMethod
in
[
method3_1_1
,
method3_1_2
,
method3_2_1
,
method3_2_2
]:
for
pos
in
crossBoxPatterns
:
self
.
assertTrue
(
crossBoxMethod
.
matches_pattern
(
pos
))
negPatterns
=
randomPatterns
+
sizeBinPatterns
+
spiralBoxPatterns
+
antiPatterns
for
neg
in
negPatterns
:
self
.
assertFalse
(
crossBoxMethod
.
matches_pattern
(
neg
))
self
.
assertTrue
(
crossBoxMethod
.
matches_any_pattern
(
crossBoxPatterns
+
negPatterns
))
self
.
assertFalse
(
crossBoxMethod
.
matches_any_pattern
(
negPatterns
))
for
spiralBoxMethod
in
[
method4_1_1
,
method4_1_2
,
method4_2_1
,
method4_2_2
]:
for
pos
in
spiralBoxPatterns
:
self
.
assertTrue
(
spiralBoxMethod
.
matches_pattern
(
pos
))
negPatterns
=
randomPatterns
+
sizeBinPatterns
+
crossBoxPatterns
+
antiPatterns
for
neg
in
negPatterns
:
self
.
assertFalse
(
spiralBoxMethod
.
matches_pattern
(
neg
))
self
.
assertTrue
(
spiralBoxMethod
.
matches_any_pattern
(
spiralBoxPatterns
+
negPatterns
))
self
.
assertFalse
(
spiralBoxMethod
.
matches_any_pattern
(
negPatterns
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment