Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
S
Subsampling
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Package Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Josef Brandt
Subsampling
Commits
5d22e3c7
Commit
5d22e3c7
authored
Mar 19, 2020
by
Josef Brandt
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Converted KennardStone to Cython
parent
5dec089f
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
82 additions
and
48 deletions
+82
-48
.gitignore
.gitignore
+8
-0
chemometricMethods.py
chemometricMethods.py
+15
-43
cythonModules/kennardStone.pyx
cythonModules/kennardStone.pyx
+36
-0
cythonModules/setuptKennardStone.py
cythonModules/setuptKennardStone.py
+22
-0
tests/test_chemometricMethods.py
tests/test_chemometricMethods.py
+1
-5
No files found.
.gitignore
View file @
5d22e3c7
...
...
@@ -6,3 +6,11 @@ __pycache__/
*.png
*.res
cythonModules/build/
*.c
*.pyd
*.html
chemometricMethods.py
View file @
5d22e3c7
...
...
@@ -10,6 +10,7 @@ sys.path.append("C://Users//xbrjos//Desktop//Python")
from
gepard.analysis.particleContainer
import
ParticleContainer
from
gepard.analysis
import
particleAndMeasurement
as
pm
from
methods
import
SubsamplingMethod
from
cythonModules.kennardStone
import
find_furthest_indices
def
get_pca
(
data
:
np
.
ndarray
,
numComp
:
int
=
2
)
->
np
.
ndarray
:
...
...
@@ -27,7 +28,15 @@ class ChemometricSubsampling(SubsamplingMethod):
return
'Chemometric Selection'
def
apply_subsampling_method
(
self
)
->
list
:
return
[]
vectors
:
np
.
ndarray
=
self
.
_get_particle_featurematrix
()
kennardStone
:
KennardStone
=
KennardStone
(
vectors
,
self
.
fraction
)
indices
:
list
=
kennardStone
.
get_sampled_indices
()
selectedParticles
:
list
=
[]
for
particle
in
self
.
particleContainer
.
particles
:
if
particle
.
index
in
selectedParticles
:
selectedParticles
.
append
(
particle
)
return
selectedParticles
def
_get_particle_featurematrix
(
self
)
->
np
.
ndarray
:
vectors
:
list
=
[]
...
...
@@ -65,59 +74,22 @@ class KennardStone(object):
self
.
fraction
:
float
=
desiredFraction
def
get_sampled_indices
(
self
)
->
list
:
"""
Adapted from https://github.com/karoka/Kennard-Stone-Algorithm/blob/master/kenStone.py
:return"""
t0
=
time
.
time
()
selectedIndices
:
set
=
set
([])
numIndices
:
int
=
round
(
self
.
data
.
shape
[
0
]
*
self
.
fraction
)
if
numIndices
<
2
:
raise
ValueError
(
f
'Not enough indices to generate (min = 2), requested:
{
numIndices
}
'
)
else
:
distMat
=
spatial
.
distance_matrix
(
self
.
data
,
self
.
data
)
i
,
j
=
np
.
unravel_index
(
distMat
.
argmax
(),
distMat
.
shape
)
remainingIndices
:
set
=
set
(
np
.
arange
(
self
.
data
.
shape
[
0
]))
selectedIndices
.
add
(
i
)
selectedIndices
.
add
(
j
)
remainingIndices
.
remove
(
i
)
remainingIndices
.
remove
(
j
)
for
_
in
range
(
numIndices
-
2
):
minDist
=
0.0
for
j
in
remainingIndices
:
dist
=
np
.
min
([
distMat
[
j
][
i
]
for
i
in
selectedIndices
])
if
dist
>
minDist
:
minj
=
j
minDist
=
dist
selectedIndices
.
add
(
minj
)
remainingIndices
.
remove
(
minj
)
selectedIndices
=
find_furthest_indices
(
distMat
,
int
(
numIndices
),
i
,
j
)
assert
len
(
np
.
unique
(
list
(
selectedIndices
)))
==
len
(
selectedIndices
)
print
(
'selecting indices time:'
,
np
.
round
(
time
.
time
()
-
t0
,
2
),
'seconds'
)
return
list
(
selectedIndices
)
# def get_sampled_indices(self) -> list:
# t0 = time.time()
# numIndices: int = round(self.data.shape[0] * self.fraction)
# if numIndices < 2:
# raise ValueError(f'Not enough indices to generate (min = 2), requested: {numIndices}')
# else:
# startInd = self._get_start_indices()
# selectedPoints = np.zeros((numIndices, 2))
# selectedPoints[0, :] = self.data[startInd[0], :]
# selectedPoints[1, :] = self.data[startInd[1], :]
#
# if numIndices > 2:
# data: np.ndarray = np.delete(self.data, startInd, 0)
# for i in range(numIndices-2):
# newIndex: int = self._get_point_furthest_from_other_points(selectedPoints[:i+2, :], data)
# selectedPoints[i+2, :] = data[newIndex, :]
# data = np.delete(data, newIndex, 0)
#
# selectedIndices = []
# assert numIndices == selectedPoints.shape[0]
# for i in range(numIndices):
# newInd = np.where(self.data == selectedPoints[i])[0][0]
# selectedIndices.append(newInd)
#
# assert len(np.unique(selectedIndices)) == len(selectedIndices)
# print('selecting indices time:', np.round(time.time()-t0, 2), 'seconds')
# return selectedIndices
def
_get_start_indices
(
self
)
->
list
:
"""
...
...
cythonModules/kennardStone.pyx
0 → 100644
View file @
5d22e3c7
import
numpy
as
np
cimport
numpy
as
np
cimport
cython
DTYPE
=
np
.
float
ctypedef
np
.
float_t
DTYPE_t
ctypedef
np
.
int32_t
INT32_t
ctypedef
np
.
int64_t
INT64_t
def
find_furthest_indices
(
np
.
ndarray
[
DTYPE_t
,
ndim
=
2
]
distMat
,
int
numIndices
,
int
index0
,
int
index1
):
cdef
int
i
,
j
cdef
double
dist
,
minDist
,
curDist
cdef
np
.
ndarray
[
INT32_t
,
ndim
=
1
]
selectedIndices
=
np
.
empty
(
numIndices
,
dtype
=
np
.
int32
)
cdef
np
.
ndarray
[
INT32_t
,
ndim
=
1
]
remainingIndices
=
np
.
arange
(
numIndices
,
dtype
=
np
.
int32
)
selectedIndices
[
0
]
=
index0
selectedIndices
[
1
]
=
index1
for
i
in
range
(
numIndices
-
2
):
minDist
=
0.0
for
j
in
remainingIndices
:
dist
=
np
.
inf
for
k
in
selectedIndices
[:
i
+
1
]:
curDist
=
distMat
[
j
][
k
]
if
curDist
<
dist
:
dist
=
curDist
if
dist
>
minDist
:
minj
=
j
minDist
=
dist
selectedIndices
[
i
+
2
]
=
minj
remainingIndices
=
remainingIndices
[
remainingIndices
!=
minj
]
return
selectedIndices
\ No newline at end of file
cythonModules/setuptKennardStone.py
0 → 100644
View file @
5d22e3c7
try
:
from
setuptools
import
setup
from
setuptools
import
Extension
except
ImportError
:
from
distutils.core
import
setup
from
distutils.extension
import
Extension
from
Cython.Build
import
cythonize
import
numpy
as
np
import
sys
if
len
(
sys
.
argv
)
==
1
:
sys
.
argv
.
append
(
"build_ext"
)
sys
.
argv
.
append
(
"--inplace"
)
ext
=
Extension
(
"kennardStone"
,
[
"kennardStone.pyx"
],
extra_compile_args
=
[
'-O3'
],)
setup
(
name
=
"kennardStone algorithm"
,
ext_modules
=
cythonize
([
ext
],
annotate
=
True
),
# accepts a glob pattern
include_dirs
=
[
np
.
get_include
()]
)
\ No newline at end of file
tests/test_chemometricMethods.py
View file @
5d22e3c7
...
...
@@ -53,17 +53,13 @@ class TestKennardStone(unittest.TestCase):
self
.
kennardStone
:
cmeth
.
KennardStone
=
cmeth
.
KennardStone
(
np
.
array
([]),
0.1
)
def
test_get_sampled_indices
(
self
):
numDataSets
:
int
=
4
00
numDataSets
:
int
=
10
00
self
.
kennardStone
.
data
=
np
.
random
.
rand
(
numDataSets
,
2
)
self
.
kennardStone
.
fraction
=
0.1
selectedIndices
=
self
.
kennardStone
.
get_sampled_indices
()
self
.
assertEqual
(
len
(
selectedIndices
),
numDataSets
*
self
.
kennardStone
.
fraction
)
self
.
assertEqual
(
len
(
np
.
unique
(
selectedIndices
)),
numDataSets
*
self
.
kennardStone
.
fraction
)
plt
.
scatter
(
self
.
kennardStone
.
data
[:,
0
],
self
.
kennardStone
.
data
[:,
1
])
plt
.
scatter
(
self
.
kennardStone
.
data
[
selectedIndices
,
0
],
self
.
kennardStone
.
data
[
selectedIndices
,
1
])
plt
.
show
()
self
.
kennardStone
.
fraction
=
0.1
numDataSets
=
2
self
.
kennardStone
.
data
=
np
.
random
.
rand
(
numDataSets
,
2
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment