datastats.py 9.76 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
# -*- coding: utf-8 -*-
"""
GEPARD - Gepard-Enabled PARticle Detection
Copyright (C) 2018  Lars Bittrich and Josef Brandt, Leibniz-Institut für 
Polymerforschung Dresden e. V. <bittrich-lars@ipfdd.de>    

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program, see COPYING.  
If not, see <https://www.gnu.org/licenses/>.
"""
import os
import numpy as np
import operator
Lars Bittrich's avatar
Lars Bittrich committed
24
from dataset import loadData, recursiveDictCompare
25 26 27 28 29 30 31 32 33 34 35 36


def readDataStats(fname):
    ds = loadData(fname)
    datastats = DataStats(ds)
    datastats.update()
    datastats.loadParticleData()
    minHQI = datastats.dataset.resultParams['minHQI']
    compHQI = datastats.dataset.resultParams['compHQI']
    datastats.formatResults(minHQI, compHQI)
    datastats.createHistogramData()
    return datastats
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148

class DataStats(object):
    def __init__(self, dataset):
        self.dataset = dataset
        
        self.spectraResults = None                  #entire List of all spectra assignments
        self.additiveResults = None                 #entire List of all additives
        self.particleResults = None                 #final assignment for each particle
        
        self.currentPolymers = None                 #list of polymers after setting entries with low hqi to unknown
        self.currentAdditives = None                #same thing for the additives
        self.spectra = None                         #acquired spectra
        self.indices = None                         #assignment of what spectra-indices belong to what substance
        
        self.particles2spectra = None
        self.manualPolymers = {}
        self.manualAdditives = {}
        
    def resetResults(self, spectraResults, additiveResults, hqis, addhqis):
        self.spectraResults = spectraResults
        self.additiveResults = additiveResults
        self.hqis = hqis
        self.addhqis = addhqis
        
    def update(self):
        print('updating data from', self.dataset.name)
        self.spectraResults = self.dataset.results['polymers']
        self.additiveResults = self.dataset.results['additives']
        self.hqis = self.dataset.results['hqis']
        self.addhqis = self.dataset.results['additive_hqis']
                
        self.colorSeed = self.dataset.colorSeed
        if type(self.colorSeed) != str:
            self.colorSeed = 'default'
        
        #load Spectra
        if self.dataset.spectraPath is None:
            fname = os.path.join(self.dataset.path, self.dataset.name + '_000_Spec.Data 1.txt')
        else:
            fname = self.dataset.spectraPath
        return self.loadSpectra(fname)
        
    def loadSpectra(self, fname):
        import time
        t0 = time.time()
        specfilename = self.dataset.fname.split('.pkl')[0] + '_spectra.npy'
        specs = None
        if os.path.exists(specfilename):
            specs = np.load(specfilename)
        else:
            try:
                specs = np.loadtxt(fname)
                #if spectra are already in correct format (WITec, first column: wavenumbers, other columns, intensities), 
                #we take them, otherwise we have to convert from Renishaw export format...
                if not len(np.unique(specs[:, 0])) == len(specs[:, 0]): #--> only unique numbers -> this is the wavenumber column, we have the witec format
                    #Renishaw Convert  
                    #columns 0 and 1 are x and y coordinates. We dont need them...
                    startWavenumber = specs[0, 2]
                    startIndices = np.where(specs[:, 2] == startWavenumber)[0]
                    
                    spectra = np.zeros((startIndices[1], len(startIndices)+1))   #create array with shape (numWavenumbers, numSpectra+1) (first column holds wavenumbers)
                    spectra[:, 0] = specs[startIndices[0]:startIndices[1], 2]
                    for i in range(len(startIndices)-1):
                        spectra[:, i+1] = specs[startIndices[i]:startIndices[i+1], 3]
                    #aaand the last spectrum:
                    spectra[:, -1] = specs[startIndices[-1]:, 3]
                    specs = np.flip(spectra, 0)    #Renishaw goes from highest to lowest wavenumber, out of whatever reason...
                    
                #write spectra to binary file, that makes reloading them in future significantly faster
                np.save(specfilename, specs)
                print('loading specs:', time.time()-t0)     
                self.dataset.spectraPath = fname
            except:
                pass
        self.spectra = specs
        return specs
    
    def loadParticleData(self):
        self.particles2spectra = self.dataset.particles2spectra
        
        sortindices = self.dataset.ramanscansortindex
        if self.particles2spectra is None:
            print('creating default particles2spectra list')
            #no assignment found, so we assume one measurement per particle and use ramanscansortindex for assignment
            self.particles2spectra = [[int(np.where(sortindices == i)[0])] for i in range(len(sortindices))]
        
        #check, if dataset already contains results. Otherwise load them...
        return not (self.spectraResults is None or (len(self.spectraResults) != len(sortindices)))

    def invalidateSpectra(self):
        self.spectraResults = ['empty']*(self.spectra.shape[1]-1)
        self.hqis = [100]*(self.spectra.shape[1]-1)
            
    def formatResults(self, hqi, compHqi):
        if self.spectraResults is not None:
            del self.currentPolymers, self.currentAdditives
            
            #convert to arrays (makes indexing easier...)
            self.currentPolymers, self.hqis = np.array(self.spectraResults), np.array(self.hqis)
            
            if self.additiveResults is not None:
                self.currentAdditives, self.addhqis = np.array(self.additiveResults), np.array(self.addhqis)
                self.compHqiSpinBox.setDisabled(False)
            else:
                self.currentAdditives = None
            
            #set poor HQI results to unknown
            self.currentPolymers[self.hqis < hqi] = 'unknown'
            
            if self.currentAdditives is not None:
                self.currentAdditives[self.addhqis < compHqi] = 'unknown'
            
149 150 151 152 153
    def getUniquePolymers(self):
        if self.currentPolymers is None:
            return None
        return self.uniquePolymers
    
154 155 156 157 158 159 160 161
    def getParticleStats(self):
        particlestats = np.array(self.dataset.particlestats)
        pixelscale = self.dataset.getPixelScale()
        #convert to mikrometer scale
        particlestats[:,:5] *= pixelscale
        particlestats[:,4] *= pixelscale   #again for the area...
        return particlestats
    
162
    def createHistogramData(self):
163
        particlestats = self.getParticleStats()
164
        self.uniquePolymers = np.unique(self.currentPolymers)
165
        self.particleResults = [None]*len(particlestats)
166 167
        self.typehistogram = {i: 0 for i in self.uniquePolymers}
        
168
        if len(self.particles2spectra) != len(particlestats):
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
            return False
        
        for particleID, specList in enumerate(self.particles2spectra):
            assignment = self.currentPolymers[specList[0]]   #we take the first result as particle result. Hence, all spectra per particle have to have the same result
            self.particleResults[particleID] = assignment
            self.typehistogram[assignment] += 1
            
        self.particleResults = np.array(self.particleResults)
        
        ##sort typehistogram, it will be converted into a list!!
        self.typehistogram = sorted(self.typehistogram.items(), key = operator.itemgetter(1), reverse = True)
        
        self.uniquePolymers = [i[0] for i in self.typehistogram]
        
        self.indices = []      #what particles belong to which polymer type?
        for polymer in self.uniquePolymers:
            self.indices.append(list(np.where(self.particleResults == polymer)[0]))
            

        ###generate additive array for each type in typehistogram:
        if self.currentAdditives is None:
            self.sorted_additives = None
        else:
            self.sorted_additives = []
            
            for polymer in self.typehistogram:  #get additives of each polymer type
                self.sorted_additives.append(self.currentAdditives[np.where(self.currentPolymers == polymer[0])])
            for i in range(len(self.sorted_additives)):         #sort out 'none' entries
                nonentries = np.where(self.sorted_additives[i] == 'none')
                self.sorted_additives[i] = np.delete(self.sorted_additives[i], nonentries)
        return True
        
    def saveAnalysisResults(self, minHQI, compHQI):
        self.dataset.results = {'polymers': self.spectraResults,
                                'hqis': self.hqis,
                                'additives': self.additiveResults,
                                'additive_hqis': self.addhqis}
        
        self.dataset.resultParams = {'minHQI': minHQI,
                                     'compHQI': compHQI}
        self.dataset.save()
Lars Bittrich's avatar
Lars Bittrich committed
210 211 212
        testresult = self.testRead()
        print('saved dataset; Valid:', testresult)
        return testresult
213 214 215 216
        
        
    def testRead(self):
        statsread = readDataStats(self.dataset.fname)
Lars Bittrich's avatar
Lars Bittrich committed
217
        return recursiveDictCompare(self.__dict__, statsread.__dict__)
218