subsampling.py 3.11 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
import numpy as np
import matplotlib.pyplot as plt
import time
import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset

from methods import IvlevaSubsampling, RandomSampling, SizeBinFractioning
from helpers import ParticleBinSorter
from evaluation import ResultComparer

fname: str = r'C:\Users\xbrjos\Desktop\temp MP\190313_Soil_5_A_50_5_1_50_1\190313_Soil_5_A_50_5_1_50_1.pkl'
# fname: str = r'C:\Users\xbrjos\Desktop\temp MP\190326_MCII_WWTP_SB_50_2\190326_MCII_WWTP_SB_50_2.pkl'
# fname: str = r'C:\Users\xbrjos\Desktop\temp MP\190326_MCII_WWTP_SB_50_1\190326_MCII_WWTP_SB_50_1.pkl'
# fname: str = r'C:\Users\xbrjos\Desktop\temp MP\KWS_CT_3_ds1_all_10_2\KWS_CT_3_ds1_all_10_2.pkl'  #legacy convert not working..
# fname: str = r'C:\Users\xbrjos\Desktop\temp MP\190201_BSB_Stroomi_ds2_R1_R2_50\190201_BSB_Stroomi_ds2_R1_R2_50.pkl'  #zvalues image missing, legacy convert fails..


dset = dataset.loadData(fname)
print('loaded dataset')

pc = dset.particleContainer
origParticles = pc.particles

resultComparer = ResultComparer()
numOrigMP = resultComparer._get_number_of_MP_particles(origParticles)
print(f'orig particles: {len(origParticles)}, of which are mp: {numOrigMP}')
# ivlevaSampling = IvlevaSubsampling(pc)
# ivlevaFraction, ivlevaParticles = ivlevaSampling.apply_subsampling_method()

t0 = time.time()
fractions = np.arange(0.05, .55, 0.05)
errors = []
binErrors = []
numIterations = 1000

for fraction in fractions:
    print('random sampling, fraction:', fraction)
#    randomSampling = RandomSampling(pc, desiredFraction=fraction)
    randomSampling = SizeBinFractioning(pc, fraction)
    iterErrors = []
    binIterErrors = []
    for _ in range(numIterations):
        randomFraction, randomParticles = randomSampling.apply_subsampling_method()
        iterErrors.append(resultComparer._get_mp_count_error(origParticles, randomParticles, randomFraction))
        bins, errorsPerBin = resultComparer._get_mp_count_error_per_bin(origParticles, randomParticles, randomFraction)
        binIterErrors.append(errorsPerBin)
        
    errors.append(round(np.mean(iterErrors)*100)) #from fraction to %
    fractionBinErrors = []
    for binIndex in range(len(bins)+1):
        binError = round(np.mean([binIterErrors[i][binIndex] for i in range(numIterations)]) * 100)
        fractionBinErrors.append(binError)
    binErrors.append(fractionBinErrors)

print('random sampling took', np.round(time.time()-t0, 2), 'seonds')
binLowerLimits = bins.copy()
binLowerLimits.insert(0, 0)
plt.subplot(121)
plt.plot(fractions, errors)
# plt.title(f'Random Sampling, averaged from {numIterations} trials, orig particle count: {len(origParticles)}')
plt.xlabel('Fraction measured')
plt.ylabel('Average error in MP particle count (%)')

plt.subplot(122)
for fracMeas, curBinErrors in zip(fractions, binErrors):
    plt.plot(binLowerLimits, curBinErrors, label=np.round(fracMeas, 1))
# plt.title('Error in MP count (%) per size bin')
plt.xlabel('particle size')
plt.ylabel('Average error in MP particle count (%)')
plt.legend()
plt.show()


# sizeBinSampling = SizeBinFractioning(pc)
# sizeBinParticles = sizeBinSampling.apply_subsampling_method()