From 3f0e8c31bcf4eb9c1e9346a4275d535f2101b65c Mon Sep 17 00:00:00 2001 From: HombreLaser Date: Mon, 23 Oct 2023 17:53:59 -0600 Subject: Improve dataset class --- dataset.py | 33 +++++++++++++++++++++------------ neural_network.py | 6 +----- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/dataset.py b/dataset.py index 78d4691..e81e7ec 100644 --- a/dataset.py +++ b/dataset.py @@ -1,26 +1,35 @@ import numpy as np from PIL import Image from pathlib import Path +from alphabet import CYRILLIC_ALPHABET +import random """Class to interface the training and testing data.""" +DATASET_SIZE=15480 class Dataset: def __init__(self) -> None: self.data_path = Path('./data') + self.already_used = set() - """Convert the dataset to a 2 dimension array.""" - def data(self): - for dir in self.data_path.iterdir(): - if not dir.is_dir(): - continue + """ + Yield a random sample of the dataset with each call. + """ + def data(self, batch_size=DATASET_SIZE): + for i in range(batch_size): + random_letter = random.choice(CYRILLIC_ALPHABET) + images = list((self.data_path/random_letter).glob('*.png')) + file_to_yield = random.choice(images).name - for file in dir.glob('*.png'): - image = Image.open(str(file)) - image_array = self._img_to_array(image) - # Return the image's pixel values as an array alongside - # the character that it represents. - yield (dir.name, image_array) + if file_to_yield in self.already_used: + continue + + self.already_used.add(file_to_yield) + image = Image.open(str(self.data_path/random_letter/file_to_yield)) + image_array = self._img_to_array(image) + + yield (random_letter, image_array) """ Get an image from the dataset. @@ -37,7 +46,7 @@ class Dataset: Grab the image in RGB, add a white background, and return it as a black and white array. """ - def _img_to_array(self, image: Image): + def _img_to_array(self, image): fill_color = (255, 255, 255) # White background. background = Image.new(image.mode[:-1], image.size, fill_color) background.paste(image, image.split()[-1]) diff --git a/neural_network.py b/neural_network.py index 6c44ddb..2163173 100644 --- a/neural_network.py +++ b/neural_network.py @@ -2,11 +2,7 @@ import numpy as np import math from scipy.special import expit from secrets import token_hex - -CYRILLIC_ALPHABET = ['I', 'А', 'Б', 'В', 'Г', 'Д', 'Е', 'Ë', 'Ж', 'З', - 'И', 'Й', 'К', 'Л', 'М', 'Н', 'О', 'П', 'Р', 'С', - 'Т', 'У', 'Ф', 'Х', 'Ц', 'Ч', 'Ш', 'Щ', 'Ъ', 'Ы', - 'Ь', 'Э', 'Ю', 'Я'] +from alphabet import CYRILLIC_ALPHABET """The neural network class.""" -- cgit v1.2.3