subsampling.py 3.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
import numpy as np
import matplotlib.pyplot as plt
import time
import sys
sys.path.append("C://Users//xbrjos//Desktop//Python")
from gepard import dataset

from methods import IvlevaSubsampling, RandomSampling, SizeBinFractioning
from helpers import ParticleBinSorter
from evaluation import ResultComparer

fname: str = r'C:\Users\xbrjos\Desktop\temp MP\190313_Soil_5_A_50_5_1_50_1\190313_Soil_5_A_50_5_1_50_1.pkl'
# fname: str = r'C:\Users\xbrjos\Desktop\temp MP\190326_MCII_WWTP_SB_50_2\190326_MCII_WWTP_SB_50_2.pkl'
# fname: str = r'C:\Users\xbrjos\Desktop\temp MP\190326_MCII_WWTP_SB_50_1\190326_MCII_WWTP_SB_50_1.pkl'
# fname: str = r'C:\Users\xbrjos\Desktop\temp MP\KWS_CT_3_ds1_all_10_2\KWS_CT_3_ds1_all_10_2.pkl'  #legacy convert not working..
# fname: str = r'C:\Users\xbrjos\Desktop\temp MP\190201_BSB_Stroomi_ds2_R1_R2_50\190201_BSB_Stroomi_ds2_R1_R2_50.pkl'  #zvalues image missing, legacy convert fails..


dset = dataset.loadData(fname)
print('loaded dataset')

pc = dset.particleContainer
origParticles = pc.particles

resultComparer = ResultComparer()
numOrigMP = resultComparer._get_number_of_MP_particles(origParticles)
print(f'orig particles: {len(origParticles)}, of which are mp: {numOrigMP}')
# ivlevaSampling = IvlevaSubsampling(pc)
# ivlevaFraction, ivlevaParticles = ivlevaSampling.apply_subsampling_method()

t0 = time.time()
fractions = np.arange(0.05, .55, 0.05)
errors = []
binErrors = []
numIterations = 1000

for fraction in fractions:
    print('random sampling, fraction:', fraction)
#    randomSampling = RandomSampling(pc, desiredFraction=fraction)
    randomSampling = SizeBinFractioning(pc, fraction)
    iterErrors = []
    binIterErrors = []
    for _ in range(numIterations):
        randomFraction, randomParticles = randomSampling.apply_subsampling_method()
        iterErrors.append(resultComparer._get_mp_count_error(origParticles, randomParticles, randomFraction))
        bins, errorsPerBin = resultComparer._get_mp_count_error_per_bin(origParticles, randomParticles, randomFraction)
        binIterErrors.append(errorsPerBin)
Josef Brandt's avatar
Josef Brandt committed
48

49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
    errors.append(round(np.mean(iterErrors)*100)) #from fraction to %
    fractionBinErrors = []
    for binIndex in range(len(bins)+1):
        binError = round(np.mean([binIterErrors[i][binIndex] for i in range(numIterations)]) * 100)
        fractionBinErrors.append(binError)
    binErrors.append(fractionBinErrors)

print('random sampling took', np.round(time.time()-t0, 2), 'seonds')
binLowerLimits = bins.copy()
binLowerLimits.insert(0, 0)
plt.subplot(121)
plt.plot(fractions, errors)
# plt.title(f'Random Sampling, averaged from {numIterations} trials, orig particle count: {len(origParticles)}')
plt.xlabel('Fraction measured')
plt.ylabel('Average error in MP particle count (%)')

plt.subplot(122)
for fracMeas, curBinErrors in zip(fractions, binErrors):
    plt.plot(binLowerLimits, curBinErrors, label=np.round(fracMeas, 1))
# plt.title('Error in MP count (%) per size bin')
plt.xlabel('particle size')
plt.ylabel('Average error in MP particle count (%)')
plt.legend()
plt.show()


# sizeBinSampling = SizeBinFractioning(pc)
# sizeBinParticles = sizeBinSampling.apply_subsampling_method()