Commit cda2aeb1 authored by Josef Brandt's avatar Josef Brandt

include anonymized datasets

parent 6430025b
......@@ -273,7 +273,7 @@ class TrainedSubsampling(SubsamplingMethod):
@property
def label(self) -> str:
label: str = 'Dummy Trained Random Sampling'
label: str = 'Dummy Trained Sampling'
if self.fakeClassifier:
label += f' (score {self.fakeScore})'
else:
......
......@@ -17,7 +17,7 @@ def get_error_vs_frac_plot(totalResults: TotalResults, attributes: list = [], me
elif len(methods) == 0 and len(attributes) != 0:
methods = [[]]*len(attributes)
if len(partCounts) == 0:
patchiness = [[]]*len(attributes)
partCounts = [[]]*len(attributes)
assert len(attributes) == len(methods)
numRows: int = 1
......@@ -86,9 +86,6 @@ def get_error_vs_frac_plot(totalResults: TotalResults, attributes: list = [], me
title: str = ''
if len(attrs) > 0:
for i in range(len(attrs)):
if attrs[i] == 'slush':
attrs[i] = 'sludge'
title = ', '.join(attr for attr in attrs)
elif pcounts != []:
......@@ -97,19 +94,20 @@ def get_error_vs_frac_plot(totalResults: TotalResults, attributes: list = [], me
meanNumMP: int = int(round(meanParticleCount * meanMPFrac/100, 0))
title += f' ({numSamples} filters)\nAverage: {meanParticleCount} particles, {meanMPFrac} % MP, {meanNumMP} MP particles'
ax.set_title(title, fontSize=13)
ax.set_title(title, fontSize=15)
ax.set_xscale('log')
ax.xaxis.set_major_formatter(ScalarFormatter())
# ax.xaxis.set_major_locator(FixedLocator([0.02, 0.05, 0.1, 0.2, 0.5, 1.0]))
ax.xaxis.set_major_locator(FixedLocator([2, 5, 10, 20, 50, 100]))
ax.set_xlabel('measured fraction (%)', fontsize=12)
ax.set_ylabel('subsampling-error (%)', fontsize=12)
ax.set_xlabel('measured fraction (%)', fontsize=13)
ax.set_ylabel('subsampling-error (%)', fontsize=13)
ax.tick_params(axis='both', which='both', labelsize=13)
minX, maxX = 0.9 * min(fractions), 105
ax.hlines([20, 40, 60, 80], minX, maxX, colors='gray', alpha=0.5)
ax.set_xlim([minX, maxX])
ax.set_ylim([0, 100])
ax.legend()
ax.legend(fontsize=13)
index += 1
......@@ -129,10 +127,10 @@ def get_distance_hist_plots(totalResults: TotalResults, attributes: list = []) -
numRows = 2
numCols = np.ceil(len(attributes) / numRows)
onlyMP: bool = True
ax = fig.add_subplot()
onlyMP: bool = False
# ax = fig.add_subplot()
for index, attrs in enumerate(attributes):
# ax = fig.add_subplot(numRows, numCols, index + 1)
ax = fig.add_subplot(numRows, numCols, index + 1)
allParticles: list = []
densities: list = []
particleCounts: list = []
......@@ -154,67 +152,58 @@ def get_distance_hist_plots(totalResults: TotalResults, attributes: list = []) -
else:
particleCounts.append(len(dset.particleContainer.particles))
# for particle in dset.particleContainer.particles:
# allParticles.append(particle)
#
# offset, diameter, [width, height] = get_filterDimensions_from_dataset(dset)
# center = get_center_from_filter_dimensions(offset, diameter)
# center[0] = convert_length_to_pixels(dset, center[0])
# center[1] = convert_length_to_pixels(dset, center[1])
for particle in dset.particleContainer.particles:
allParticles.append(particle)
offset, diameter, [width, height] = get_filterDimensions_from_dataset(dset)
center = get_center_from_filter_dimensions(offset, diameter)
center[0] = convert_length_to_pixels(dset, center[0])
center[1] = convert_length_to_pixels(dset, center[1])
# histdata = get_distance_point_histogramdata(dset.particleContainer.particles, center)
# densities.append(histdata[1])
histdata = get_distance_point_histogramdata(dset.particleContainer.particles, center)
densities.append(histdata[1])
# ax.plot(histdata[0], histdata[1])
for i in range(len(attrs)):
if attrs[i] == 'slush':
attrs[i] = 'sludge'
ax.scatter(particleCounts, pathinesses, label=', '.join(attr for attr in attrs))
# numSamples = len(densities)
# partCounts: list = [len(i) for i in allParticles]
# meanParticleCount: float = round(len(allParticles) / numSamples)
# meanParticleCount: float = round(np.mean(partCounts))
# stdParticleCount: float = round(np.std(partCounts))
# mpFracs: list = [get_number_of_MP_particles(i)/len(i) for i in allParticles]
# meanMPFrac: float = round(np.mean(mpFracs) * 100, 1)
# stdMPFrac: float = round(np.std(mpFracs) * 100, 1)
# numMPParticles: float = get_number_of_MP_particles(allParticles)
# meanMPFrac: float = round(numMPParticles / len(allParticles) * 100, 1)
# meanPatchiness: float = round(np.mean(pathinesses), 2)
# title: str = ''
# if len(attrs) > 0:
# title = ', '.join(attr for attr in attrs)
# title += f'\n({numSamples} filters, avg. {meanParticleCount} particles, {meanMPFrac} % MP,'
# title += f'\navg. Particle Patchiness {meanPatchiness})'
# ax.set_title(title, fontSize=13)
# densities: np.ndarray = np.mean(np.array(densities), axis=0)
# densities /= densities.max()
# distances = np.array(histdata[0], dtype=np.float) * dset.pixelscale_df
# ax.plot(distances / 1000, densities)
# ax.set_xlabel('distance from filter center (mm)', fontsize=12)
# ax.set_xlim([0, 6])
# ax.set_ylabel('normalized particle density', fontsize=12)
# ax.set_ylim([0.0, 1.05])
ax.legend(fontsize=15)
ax.set_xscale('log')
if not onlyMP:
ax.set_xticks([1000, 5000, 10000, 50000, 100000])
ax.set_xlabel('Particle Count', fontsize=15)
else:
ax.set_xticks([10, 50, 100, 500])
ax.set_xlabel('MP Particle Count', fontsize=15)
ax.xaxis.set_major_formatter(ScalarFormatter())
ax.set_ylabel('Patchiness', fontsize=15)
for tick in ax.xaxis.get_major_ticks():
tick.label.set_fontsize(15)
for tick in ax.yaxis.get_major_ticks():
tick.label.set_fontsize(15)
# ax.scatter(particleCounts, pathinesses, label=', '.join(attr for attr in attrs))
# ax.set_title('All Particles', fontsize=17)
numSamples = len(densities)
meanParticleCount: float = round(len(allParticles) / numSamples)
numMPParticles: float = get_number_of_MP_particles(allParticles)
meanMPFrac: float = round(numMPParticles / len(allParticles) * 100, 1)
meanPatchiness: float = round(np.mean(pathinesses), 2)
title: str = ''
if len(attrs) > 0:
title = ', '.join(attr for attr in attrs)
title += f'\n({numSamples} filters, avg. {meanParticleCount} particles, {meanMPFrac} % MP,'
title += f'\navg. Particle Patchiness {meanPatchiness})'
ax.set_title(title, fontSize=13)
densities: np.ndarray = np.mean(np.array(densities), axis=0)
densities /= densities.max()
distances = np.array(histdata[0], dtype=np.float) * dset.pixelscale_df
ax.plot(distances / 1000, densities)
ax.set_xlabel('distance from filter center (mm)', fontsize=12)
ax.set_xlim([0, 6])
ax.set_ylabel('normalized particle count', fontsize=12)
ax.set_ylim([0.0, 1.05])
# ax.legend(fontsize=15)
# ax.set_xscale('log')
# if not onlyMP:
# ax.set_xticks([1000, 5000, 10000, 50000, 100000])
# ax.set_xlabel('Particle Count', fontsize=15)
# else:
# ax.set_xticks([10, 50, 100, 500])
# ax.set_xlabel('MP Particle Count', fontsize=15)
# ax.xaxis.set_major_formatter(ScalarFormatter())
# ax.set_ylabel('Patchiness', fontsize=15)
# for tick in ax.xaxis.get_major_ticks():
# tick.label.set_fontsize(15)
# for tick in ax.yaxis.get_major_ticks():
# tick.label.set_fontsize(15)
fig.tight_layout()
return fig
......@@ -241,58 +230,36 @@ def get_error_vs_mpfrac_plot(totalResults: TotalResults, attributes: list = [])
totalParticleCount: int = len(particles)
numMPParticles: float = get_number_of_MP_particles(particles)
mpfrac: float = numMPParticles / totalParticleCount * 100
if mpfrac > 0:
fracsMeasured: np.ndarray = np.unique([result.method.fraction for result in sampleRes.results])
for particlesMeasured in [1000, 3000, 7000]:
if particlesMeasured <= totalParticleCount:
fracMeasured: float = particlesMeasured/totalParticleCount
indexOfFracToEvaluate = np.argmin(np.abs(fracsMeasured - fracMeasured))
fracToEvaluate: float = fracsMeasured[indexOfFracToEvaluate]
allErrorsOfThisFrac: list = []
for result in sampleRes.results:
if result.method.label.find('Random Subsampling') != -1 and result.method.fraction == fracToEvaluate:
allErrorsOfThisFrac.append(result.mpCountError)
fracsMeasured: np.ndarray = np.unique([result.method.fraction for result in sampleRes.results])
fracsToPlot: dict = {0.03: 0.03,
0.04: 0.03,
0.05: 0.03,
0.06: 0.1,
0.1: 0.1,
0.2: 0.1,
0.25: 0.1,
0.3: 0.5,
0.5: 0.5,
0.7: 0.8,
0.9: 0.8
}
numParticlesMeasured: list = [1000, 2500, 5000]
usedSamples: list = []
for particlesMeasured in numParticlesMeasured:
if particlesMeasured <= totalParticleCount:
fracMeasured: float = particlesMeasured/totalParticleCount
indexOfFracToEvaluate = np.argmin(np.abs(fracsMeasured - fracMeasured))
fracToEvaluate: float = fracsMeasured[indexOfFracToEvaluate]
if particlesMeasured not in dataWithNumbers:
dataWithNumbers[particlesMeasured] = [(mpfrac, np.mean(allErrorsOfThisFrac))]
else:
dataWithNumbers[particlesMeasured].append((mpfrac, np.mean(allErrorsOfThisFrac)))
for fracToEvaluate in [0.1, 0.5, 0.9]:
allErrorsOfThisFrac: list = []
for result in sampleRes.results:
if result.method.label.find('Random Subsampling') != -1 and result.method.fraction == fracToEvaluate:
allErrorsOfThisFrac.append(result.mpCountError)
if mpfrac != 0.0:
if particlesMeasured not in dataWithNumbers:
dataWithNumbers[particlesMeasured] = [(mpfrac, np.mean(allErrorsOfThisFrac))]
else:
dataWithNumbers[particlesMeasured].append((mpfrac, np.mean(allErrorsOfThisFrac)))
fracToEvaluate = fracsToPlot[fracToEvaluate]
if mpfrac > 0.0:
if fracToEvaluate not in dataWithFractions.keys():
dataWithFractions[fracToEvaluate] = [(mpfrac, np.mean(allErrorsOfThisFrac))]
else:
dataWithFractions[fracToEvaluate].append((mpfrac, np.mean(allErrorsOfThisFrac)))
# fracsToProcess: list = [0.03, 0.1, 0.5, 0.8]
# for fracToEvaluate in fracsToProcess:
# allErrorsOfThisFrac: list = []
# for result in sampleRes.results:
# if result.method.label.find('Random Subsampling') != -1 and result.method.fraction == fracToEvaluate:
# allErrorsOfThisFrac.append(result.mpCountError)
#
# if mpfrac != 0.0:
# if fracToEvaluate not in dataWithFractions.keys():
# dataWithFractions[fracToEvaluate] = [(mpfrac, np.mean(allErrorsOfThisFrac))]
# else:
# dataWithFractions[fracToEvaluate].append((mpfrac, np.mean(allErrorsOfThisFrac)))
for frac in sorted(dataWithFractions.keys()):
mpfracs: np.ndarray = np.array([i[0] for i in dataWithFractions[frac]])
errors: np.ndarray = np.array([i[1] for i in dataWithFractions[frac]])
......@@ -302,11 +269,7 @@ def get_error_vs_mpfrac_plot(totalResults: TotalResults, attributes: list = [])
ax1.plot(np.sort(mpfracs), errors)
else:
x_for_fit = np.log10(mpfracs)
try:
params, _ = optimize.curve_fit(quadratic_fit, x_for_fit, errors)
except ValueError:
print('break')
continue
params, _ = optimize.curve_fit(quadratic_fit, x_for_fit, errors)
ax1.plot(np.sort(mpfracs), quadratic_fit(np.sort(x_for_fit), params[0], params[1], params[2]))
for numParticles in sorted(dataWithNumbers.keys()):
......@@ -324,7 +287,7 @@ def get_error_vs_mpfrac_plot(totalResults: TotalResults, attributes: list = [])
axis.set_xlim([0.08, 15])
axis.set_xscale('log')
axis.set_ylabel('subsampling error (%)', fontsize=15)
axis.set_ylim([0, 120])
axis.set_ylim([0, 100])
axis.xaxis.set_major_formatter(ScalarFormatter())
axis.hlines([20], 0.08, 15, colors='gray', alpha=0.5)
axis.text(2.5, 22, 'recommended limit', fontsize=14, alpha=0.5)
......@@ -369,7 +332,8 @@ def get_distance_point_histogramdata(particles: list, center: np.ndarray) -> tup
data, binMaxima = np.histogram(distancesToPoints, bins)
densities: np.ndarray = np.zeros_like(data, dtype=np.float)
for i in range(len(data)):
densities[i] = float(data[i]) / get_area_of_circle_ring(binMaxima[i], binMaxima[i+1])
# densities[i] = float(data[i]) / get_area_of_circle_ring(binMaxima[i], binMaxima[i+1])
densities[i] = float(data[i])
binCenters: list = [np.mean([binMaxima[i], binMaxima[i+1]]) for i in range(len(binMaxima)-1)]
return binCenters, densities
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment