diff options
Diffstat (limited to 'dataset.py')
-rw-r--r-- | dataset.py | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/dataset.py b/dataset.py new file mode 100644 index 0000000..78d4691 --- /dev/null +++ b/dataset.py @@ -0,0 +1,45 @@ +import numpy as np +from PIL import Image +from pathlib import Path + +"""Class to interface the training and testing data.""" + + +class Dataset: + def __init__(self) -> None: + self.data_path = Path('./data') + + """Convert the dataset to a 2 dimension array.""" + def data(self): + for dir in self.data_path.iterdir(): + if not dir.is_dir(): + continue + + for file in dir.glob('*.png'): + image = Image.open(str(file)) + image_array = self._img_to_array(image) + # Return the image's pixel values as an array alongside + # the character that it represents. + yield (dir.name, image_array) + + """ + Get an image from the dataset. + """ + def get_image(self, path: str): + image = Image.open(f"{self.data_path}/{path}") + + return self._img_to_array(image) + + def get_random_sample(self): + pass + + """ + Grab the image in RGB, add a white background, and return it as + a black and white array. + """ + def _img_to_array(self, image: Image): + fill_color = (255, 255, 255) # White background. + background = Image.new(image.mode[:-1], image.size, fill_color) + background.paste(image, image.split()[-1]) + + return np.asarray(background.convert(mode='1')) |