evaluation.py 9.63 KB
Newer Older
1 2 3 4 5 6 7
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 22 13:57:28 2020

@author: luna
"""
8
import pickle
Josef Brandt's avatar
Josef Brandt committed
9
import sys
10
import os
11 12
import numpy as np

13
from helpers import ParticleBinSorter
Josef Brandt's avatar
Josef Brandt committed
14 15 16 17
import methods as meth
import geometricMethods as gmeth
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset
18

Josef Brandt's avatar
Josef Brandt committed
19

Josef Brandt's avatar
Josef Brandt committed
20 21 22 23 24 25 26
def get_name_from_directory(dirPath: str) -> str:
    return str(os.path.basename(dirPath).split('.')[0])


class TotalResults(object):
    methods: list = [meth.RandomSampling, meth.SizeBinFractioning, gmeth.CrossBoxSubSampling,
                     gmeth.SpiralBoxSubsampling]
27
    measuredFreactions: list = [0.05, 0.1, 0.15, 0.2, 0.3, 0.5, 0.9]
Josef Brandt's avatar
Josef Brandt committed
28 29 30 31 32

    def __init__(self):
        super(TotalResults, self).__init__()
        self.sampleResults: list = []

33
    def add_sample(self, filePath: str):
Josef Brandt's avatar
Josef Brandt committed
34 35 36 37 38
        """
        Adds a new sampleResult object, if a .pkl file is given and if the sample name is not already present.
        :param filePath:
        :return:
        """
39
        newResult: SampleResult = None
Josef Brandt's avatar
Josef Brandt committed
40 41 42 43
        sampleName: str = get_name_from_directory(filePath)
        presentSampleNames: list = [res.sampleName for res in self.sampleResults]
        if sampleName not in presentSampleNames:
            if os.path.basename(filePath).split('.')[-1] == 'pkl':
44 45 46 47
                newResult = SampleResult(filePath)
                self.sampleResults.append(newResult)

        return newResult
Josef Brandt's avatar
Josef Brandt committed
48 49 50 51 52 53

    def update_all(self) -> None:
        """
        Updates all samples with all methods and all fractions
        :return:
        """
54
        for index, sample in enumerate(self.sampleResults):
Josef Brandt's avatar
Josef Brandt committed
55 56 57 58
            sample.load_dataset()
            for fraction in self.measuredFreactions:
                possibleMethods = self._get_methods_for_fraction(sample.dataset, fraction)
                for curMethod in possibleMethods:
59
                    # print(f'updating {sample.sampleName} with {curMethod.label} at fraction {fraction}')
Josef Brandt's avatar
Josef Brandt committed
60
                    sample.update_result_with_method(curMethod)
61
            print(f'processed {index+1} of {len(self.sampleResults)} samples')
Josef Brandt's avatar
Josef Brandt committed
62

63
    def get_error_vs_fraction_data(self) -> dict:
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
        """
        Returns Dict: Key: Method Label, Value: (Dict: Key:Measured Fraction, Value: averaged MPCountError over all samples)
        :return:
        """
        result: dict = {}
        for sample in self.sampleResults:
            for res in sample.results:
                res: SubsamplingResult = res
                label: str = res.method.label
                frac: float = res.method.fraction
                error: float = res.mpCountError

                if label not in result.keys():
                    result[label] = {frac: [error]}
                elif frac not in result[label].keys():
                    result[label][frac] = [error]
                else:
                    result[label][frac].append(error)

        for method in result.keys():
            methodRes: dict = result[method]
            for fraction in methodRes.keys():
                methodRes[fraction] = np.mean(methodRes[fraction])

        return result
89

Josef Brandt's avatar
Josef Brandt committed
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
    def _get_methods_for_fraction(self, dataset: dataset.DataSet, fraction: float) -> list:
        """
        :param fraction: The desired fraction to measure
        :return: list of measurement Objects that are applicable
        """
        particleContainer = dataset.particleContainer
        methods: list = [meth.RandomSampling(particleContainer, fraction),
                         meth.SizeBinFractioning(particleContainer, fraction)]

        boxCreator: gmeth.BoxSelectionCreator = gmeth.BoxSelectionCreator(dataset)
        methods += boxCreator.get_crossBoxSubsamplers_for_fraction(fraction)
        methods += boxCreator.get_spiralBoxSubsamplers_for_fraction(fraction)
        return methods


class SampleResult(object):
106 107 108 109
    """
    An object the actually stores all generated results per sample and can update and report on them.
    """
    def __init__(self, filepath: str):
Josef Brandt's avatar
Josef Brandt committed
110
        super(SampleResult, self).__init__()
111
        self.filepath: str = filepath
Josef Brandt's avatar
Josef Brandt committed
112 113
        self.dataset: dataset.DataSet = None
        self.results: list = []
114
        self.attributes: list = []
115 116 117

    @property
    def sampleName(self) -> str:
Josef Brandt's avatar
Josef Brandt committed
118
        return get_name_from_directory(self.filepath)
119

Josef Brandt's avatar
Josef Brandt committed
120 121
    def load_dataset(self) -> None:
        self.dataset = dataset.loadData(self.filepath)
122
        assert self.dataset is not None
123

Josef Brandt's avatar
Josef Brandt committed
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
    def update_result_with_method(self, method: meth.SubsamplingMethod, force: bool = False) -> None:
        """
        Updates result with the given method (contains desiredFraction already)
        :param method: The SubsamplingMethod Object
        :param force: Wether to force an update. If False, the result is not updated, if it is already present.
        :return:
        """

        if not self._result_is_already_present(method) or force:
            if force:
                self._remove_result_of_method(method)

            if self.dataset is None:
                self.load_dataset()

            method.particleContainer = self.dataset.particleContainer
            newResult: SubsamplingResult = SubsamplingResult(method)
            self.results.append(newResult)
            newResult.update()

144 145 146 147 148 149 150 151 152 153 154 155 156
    def set_attribute(self, newAttribute: str) -> None:
        """
        Adds a new attribute to the sample, if it does not contain the attribute already
        :param newAttribute:
        :return:
        """
        if not self.has_attribute(newAttribute):
            self.attributes.append(newAttribute)
            print(f'sample {self.filepath} has now attribute {newAttribute}')

    def has_attribute(self, attribute: str) -> bool:
        return attribute in self.attributes

Josef Brandt's avatar
Josef Brandt committed
157 158 159 160 161 162 163
    def _remove_result_of_method(self, method: meth.SubsamplingMethod) -> None:
        """
        Removes the specified result from the list
        :param method:
        :return:
        """
        for result in self.results:
164
            if method.equals(result.method):
Josef Brandt's avatar
Josef Brandt committed
165 166 167 168 169 170 171 172 173 174
                self.results.remove(result)

    def _result_is_already_present(self, method: meth.SubsamplingMethod) -> bool:
        """
        Checks, if a result with the given method (method type AND measured fraction) is already present.
        :param method: The method object, specifying the subsampling method and the measured fraction
        :return:
        """
        isPresent: bool = False
        for result in self.results:
175
            if method.equals(result.method):
Josef Brandt's avatar
Josef Brandt committed
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
                isPresent = True
                break
        return isPresent


class SubsamplingResult(object):
    """
    Stores all interesting results from a subsampling experiment
    """
    def __init__(self, subsamplingMethod: meth.SubsamplingMethod):
        super(SubsamplingResult, self).__init__()
        self.method: meth.SubsamplingMethod = subsamplingMethod
        self.fraction = self.method.fraction
        self.origParticleCount: int = None
        self.subSampledParticleCount: int = None
        self.mpCountError: float = None
        self.mpCountErrorPerBin: tuple = None
193

Josef Brandt's avatar
Josef Brandt committed
194 195 196 197 198 199 200 201 202 203 204
    def update(self) -> None:
        """
        Updates all results from the method.
        :return:
        """
        assert self.method.particleContainer is not None
        origParticles: list = self.method.particleContainer.particles
        self.origParticleCount = len(origParticles)
        subParticles: list = self.method.apply_subsampling_method()
        self.subSampledParticleCount = len(subParticles)
        fraction: float = self.method.fraction
205

Josef Brandt's avatar
Josef Brandt committed
206
        self.mpCountError = self._get_mp_count_error(origParticles, subParticles, fraction)
207
        # print(f'{self.origParticleCount} particles, thereof {self.subSampledParticleCount} measured, error: {self.mpCountError}')
Josef Brandt's avatar
Josef Brandt committed
208
        self.mpCountErrorPerBin = self._get_mp_count_error_per_bin(origParticles, subParticles, fraction)
209
        # print(f'method {self.method.label} updated, result is {self.mpCountError}')
210

Josef Brandt's avatar
Josef Brandt committed
211
    def _get_mp_count_error_per_bin(self, allParticles: list, subParticles: list, fractionMeasured: float) -> tuple:
212 213 214 215 216 217 218 219
        binSorter = ParticleBinSorter()
        allParticlesInBins = binSorter.sort_particles_into_bins(allParticles)
        subParticlesInBins = binSorter.sort_particles_into_bins(subParticles)
        mpCountErrorsPerBin = []
        for allParticleBin, subParticleBin in zip(allParticlesInBins, subParticlesInBins):
            mpCountErrorsPerBin.append(self._get_mp_count_error(allParticleBin, subParticleBin, fractionMeasured))
        return binSorter.bins, mpCountErrorsPerBin
        
Josef Brandt's avatar
Josef Brandt committed
220
    def _get_mp_count_error(self, allParticles: list, subParticles: list, fractionMeasured: float) -> float:
221 222 223 224 225 226 227 228
        numMPOrig = self._get_number_of_MP_particles(allParticles)
        numMPEstimate = self._get_number_of_MP_particles(subParticles) / fractionMeasured
        
        if numMPOrig != 0:
            mpCountError = self._get_error_from_values(numMPOrig, numMPEstimate)
        elif numMPEstimate == 0:
            mpCountError = 0
        else:
Josef Brandt's avatar
Josef Brandt committed
229
            raise Exception  # >0 particles in subsample, whereas none in entire sample. This cannot be!
230 231 232
            
        return mpCountError
    
Josef Brandt's avatar
Josef Brandt committed
233
    def _get_error_from_values(self, exact: float, estimate: float) -> float:
234 235 236
        assert(exact != 0)
        return abs(exact - estimate) / exact
    
Josef Brandt's avatar
Josef Brandt committed
237
    def _get_number_of_MP_particles(self, particleList: list) -> int:
238 239 240 241 242 243 244 245 246 247
        mpPatterns = ['poly', 'rubber', 'pb', 'pr', 'pg', 'py', 'pv']
        numMPParticles = 0
        
        for particle in particleList:
            assignment = particle.getParticleAssignment()
            for pattern in mpPatterns:
                if assignment.lower().find(pattern) != -1:
                    numMPParticles += 1
                    break

248
        return numMPParticles