summaryrefslogtreecommitdiff
path: root/dataset.py
blob: 78d4691761c6dc359e422623baad4e628ec36b70 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import numpy as np
from PIL import Image
from pathlib import Path

"""Class to interface the training and testing data."""


class Dataset:
    def __init__(self) -> None:
        self.data_path = Path('./data')

    """Convert the dataset to a 2 dimension array."""
    def data(self):
        for dir in self.data_path.iterdir():
            if not dir.is_dir():
                continue

            for file in dir.glob('*.png'):
                image = Image.open(str(file))
                image_array = self._img_to_array(image)
                # Return the image's pixel values as an array alongside
                # the character that it represents.
                yield (dir.name, image_array)

    """
    Get an image from the dataset.
    """
    def get_image(self, path: str):
        image = Image.open(f"{self.data_path}/{path}")

        return self._img_to_array(image)

    def get_random_sample(self):
        pass

    """
    Grab the image in RGB, add a white background, and return it as
    a black and white array.
    """
    def _img_to_array(self, image: Image):
        fill_color = (255, 255, 255)  # White background.
        background = Image.new(image.mode[:-1], image.size, fill_color)
        background.paste(image, image.split()[-1])

        return np.asarray(background.convert(mode='1'))