ImageFolder¶
- class paddle.vision.datasets. ImageFolder ( root, loader=None, extensions=None, transform=None, is_valid_file=None ) [source]
-
A generic data loader where the samples are arranged in this way:
root/1.ext root/2.ext root/sub_dir/3.ext
- Parameters
-
root (str) – Root directory path.
loader (Callable, optional) – A function to load a sample given its path. Default: None.
extensions (list[str]|tuple[str], optional) – A list of allowed extensions. Both
extensions
andis_valid_file
should not be passed. If this value is not set, the default is to use (‘.jpg’, ‘.jpeg’, ‘.png’, ‘.ppm’, ‘.bmp’, ‘.pgm’, ‘.tif’, ‘.tiff’, ‘.webp’). Default: None.transform (Callable, optional) – A function/transform that takes in a sample and returns a transformed version. Default: None.
is_valid_file (Callable, optional) – A function that takes path of a file and check if the file is a valid file. Both
extensions
andis_valid_file
should not be passed. Default: None.
- Returns
-
Dataset. An instance of ImageFolder.
- samples
-
List of sample path.
- Type
-
list[str]
Example
>>> import shutil >>> import tempfile >>> import cv2 >>> import numpy as np >>> import paddle.vision.transforms as T >>> from pathlib import Path >>> from paddle.vision.datasets import ImageFolder >>> def make_fake_file(img_path: str): ... if img_path.endswith((".jpg", ".png", ".jpeg")): ... fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8) ... cv2.imwrite(img_path, fake_img) ... elif img_path.endswith(".txt"): ... with open(img_path, "w") as f: ... f.write("This is a fake file.") >>> def make_directory(root, directory_hierarchy, file_maker=make_fake_file): ... root = Path(root) ... root.mkdir(parents=True, exist_ok=True) ... for subpath in directory_hierarchy: ... if isinstance(subpath, str): ... filepath = root / subpath ... file_maker(str(filepath)) ... else: ... dirname = list(subpath.keys())[0] ... make_directory(root / dirname, subpath[dirname]) >>> directory_hierarchy = [ ... "abc.jpg", ... "def.png", ... {"ghi": [ ... "jkl.jpeg", ... {"mno": [ ... "pqr.jpg"]}]}, ... "this_will_be_ignored.txt", ... ] >>> # You can replace this with any directory to explore the structure >>> # of generated data. e.g. fake_data_dir = "./temp_dir" >>> fake_data_dir = tempfile.mkdtemp() >>> make_directory(fake_data_dir, directory_hierarchy) >>> image_folder_1 = ImageFolder(fake_data_dir) >>> print(image_folder_1.samples) >>> ['./temp_dir/abc.jpg', './temp_dir/def.png', './temp_dir/ghi/jkl.jpeg', './temp_dir/ghi/mno/pqr.jpg'] >>> >>> print(len(image_folder_1)) 4 >>> for i in range(len(image_folder_1)): ... (img,) = image_folder_1[i] ... # do something with img ... print(type(img), img.size) ... # <class 'PIL.Image.Image'> (32, 32) >>> transform = T.Compose( ... [ ... T.Resize(64), ... T.ToTensor(), ... T.Normalize( ... mean=[0.5, 0.5, 0.5], ... std=[0.5, 0.5, 0.5], ... to_rgb=True, ... ), ... ] ... ) >>> image_folder_2 = ImageFolder( ... fake_data_dir, ... loader=lambda x: cv2.imread(x), # load image with OpenCV ... extensions=(".jpg",), # only load *.jpg files ... transform=transform, # apply transform to every image ... ) >>> print(image_folder_2.samples) >>> ['./temp_dir/abc.jpg', './temp_dir/ghi/mno/pqr.jpg'] >>> >>> print(len(image_folder_2)) 2 >>> for (img,) in iter(image_folder_2): ... # do something with img ... print(type(img), img.shape) ... # <class 'paddle.Tensor'> [3, 64, 64] >>> shutil.rmtree(fake_data_dir)