import numpy as np from PIL import Image from pathlib import Path from alphabet import CYRILLIC_ALPHABET import random """Class to interface the training and testing data.""" DATASET_SIZE=15480 class Dataset: def __init__(self) -> None: self.data_path = Path('./data') self.already_used = set() """ Yield a random sample of the dataset with each call. """ def data(self, batch_size=DATASET_SIZE): for i in range(batch_size): random_letter = random.choice(CYRILLIC_ALPHABET) images = list((self.data_path/random_letter).glob('*.png')) file_to_yield = random.choice(images).name if file_to_yield in self.already_used: continue self.already_used.add(file_to_yield) image = Image.open(str(self.data_path/random_letter/file_to_yield)) image_array = self._img_to_array(image) yield (random_letter, image_array) """ Get an image from the dataset. """ def get_image(self, path: str): image = Image.open(f"{self.data_path}/{path}") return self._img_to_array(image) def get_random_sample(self): pass """ Grab the image in RGB, add a white background, and return it as a black and white array. """ def _img_to_array(self, image): fill_color = (255, 255, 255) # White background. background = Image.new(image.mode[:-1], image.size, fill_color) background.paste(image, image.split()[-1]) return np.asarray(background.convert(mode='1'))