ImageFolder¶
- class paddle.vision.datasets. ImageFolder ( root, loader=None, extensions=None, transform=None, is_valid_file=None ) [source]
-
A generic data loader where the samples are arranged in this way:
root/1.ext root/2.ext root/sub_dir/3.ext
- Parameters
-
root (str) – Root directory path.
loader (Callable, optional) – A function to load a sample given its path. Default: None.
extensions (list[str]|tuple[str], optional) – A list of allowed extensions. Both
extensions
andis_valid_file
should not be passed. If this value is not set, the default is to use (‘.jpg’, ‘.jpeg’, ‘.png’, ‘.ppm’, ‘.bmp’, ‘.pgm’, ‘.tif’, ‘.tiff’, ‘.webp’). Default: None.transform (Callable, optional) – A function/transform that takes in a sample and returns a transformed version. Default: None.
is_valid_file (Callable, optional) – A function that takes path of a file and check if the file is a valid file. Both
extensions
andis_valid_file
should not be passed. Default: None.
- Returns
-
Dataset. An instance of ImageFolder.
- samples
-
List of sample path.
- Type
-
list[str]
Example
import shutil import tempfile import cv2 import numpy as np import paddle.vision.transforms as T from pathlib import Path from paddle.vision.datasets import ImageFolder def make_fake_file(img_path: str): if img_path.endswith((".jpg", ".png", ".jpeg")): fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8) cv2.imwrite(img_path, fake_img) elif img_path.endswith(".txt"): with open(img_path, "w") as f: f.write("This is a fake file.") def make_directory(root, directory_hierarchy, file_maker=make_fake_file): root = Path(root) root.mkdir(parents=True, exist_ok=True) for subpath in directory_hierarchy: if isinstance(subpath, str): filepath = root / subpath file_maker(str(filepath)) else: dirname = list(subpath.keys())[0] make_directory(root / dirname, subpath[dirname]) directory_hierarchy = [ "abc.jpg", "def.png", {"ghi": [ "jkl.jpeg", {"mno": [ "pqr.jpg"]}]}, "this_will_be_ignored.txt", ] # You can replace this with any directory to explore the structure # of generated data. e.g. fake_data_dir = "./temp_dir" fake_data_dir = tempfile.mkdtemp() make_directory(fake_data_dir, directory_hierarchy) image_folder_1 = ImageFolder(fake_data_dir) print(image_folder_1.samples) # ['./temp_dir/abc.jpg', './temp_dir/def.png', # './temp_dir/ghi/jkl.jpeg', './temp_dir/ghi/mno/pqr.jpg'] print(len(image_folder_1)) # 4 for i in range(len(image_folder_1)): (img,) = image_folder_1[i] # do something with img print(type(img), img.size) # <class 'PIL.Image.Image'> (32, 32) transform = T.Compose( [ T.Resize(64), T.ToTensor(), T.Normalize( mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], to_rgb=True, ), ] ) image_folder_2 = ImageFolder( fake_data_dir, loader=lambda x: cv2.imread(x), # load image with OpenCV extensions=(".jpg",), # only load *.jpg files transform=transform, # apply transform to every image ) print(image_folder_2.samples) # ['./temp_dir/abc.jpg', './temp_dir/ghi/mno/pqr.jpg'] print(len(image_folder_2)) # 2 for (img,) in iter(image_folder_2): # do something with img print(type(img), img.shape) # <class 'paddle.Tensor'> [3, 64, 64] shutil.rmtree(fake_data_dir)