ImageFolder¶
- class paddle.vision.datasets. ImageFolder ( root, loader=None, extensions=None, transform=None, is_valid_file=None ) [源代码] ¶
一种通用的数据加载方式,数据需要以如下的格式存放:
root/1.ext
root/2.ext
root/sub_dir/3.ext
参数¶
root (str) - 根目录路径。
loader (Callable,可选) - 可以加载数据路径的一个函数,如果该值没有设定,默认使用
cv2.imread
。默认值为 None。extensions (list[str]|tuple[str],可选) - 允许的数据后缀列表,
extensions
和is_valid_file
不可以同时设置。如果该值没有设定,默认为('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp')
。默认值为 None。transform (Callable,可选) - 图片数据的预处理,若为
None
即为不做预处理。默认值为None
。is_valid_file (Callable,可选) - 根据每条数据的路径来判断是否合法的一个函数。
extensions
和is_valid_file
不可以同时设置。默认值为 None。
属性¶
samples (list[str]) - 样本路径列表。
代码示例¶
>>> import shutil
>>> import tempfile
>>> import cv2
>>> import numpy as np
>>> import paddle.vision.transforms as T
>>> from pathlib import Path
>>> from paddle.vision.datasets import ImageFolder
>>> def make_fake_file(img_path: str):
... if img_path.endswith((".jpg", ".png", ".jpeg")):
... fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8)
... cv2.imwrite(img_path, fake_img)
... elif img_path.endswith(".txt"):
... with open(img_path, "w") as f:
... f.write("This is a fake file.")
>>> def make_directory(root, directory_hierarchy, file_maker=make_fake_file):
... root = Path(root)
... root.mkdir(parents=True, exist_ok=True)
... for subpath in directory_hierarchy:
... if isinstance(subpath, str):
... filepath = root / subpath
... file_maker(str(filepath))
... else:
... dirname = list(subpath.keys())[0]
... make_directory(root / dirname, subpath[dirname])
>>> directory_hierarchy = [
... "abc.jpg",
... "def.png",
... {"ghi": [
... "jkl.jpeg",
... {"mno": [
... "pqr.jpg"]}]},
... "this_will_be_ignored.txt",
... ]
>>> # You can replace this with any directory to explore the structure
>>> # of generated data. e.g. fake_data_dir = "./temp_dir"
>>> fake_data_dir = tempfile.mkdtemp()
>>> make_directory(fake_data_dir, directory_hierarchy)
>>> image_folder_1 = ImageFolder(fake_data_dir)
>>> print(image_folder_1.samples)
['./temp_dir/abc.jpg', './temp_dir/def.png',
'./temp_dir/ghi/jkl.jpeg', './temp_dir/ghi/mno/pqr.jpg']
>>> print(len(image_folder_1))
4
>>> for i in range(len(image_folder_1)):
... (img,) = image_folder_1[i]
... # do something with img
... print(type(img), img.size)
... # <class 'PIL.Image.Image'> (32, 32)
>>> transform = T.Compose(
... [
... T.Resize(64),
... T.ToTensor(),
... T.Normalize(
... mean=[0.5, 0.5, 0.5],
... std=[0.5, 0.5, 0.5],
... to_rgb=True,
... ),
... ]
... )
>>> image_folder_2 = ImageFolder(
... fake_data_dir,
... loader=lambda x: cv2.imread(x), # load image with OpenCV
... extensions=(".jpg",), # only load *.jpg files
... transform=transform, # apply transform to every image
... )
>>> print(image_folder_2.samples)
['./temp_dir/abc.jpg', './temp_dir/ghi/mno/pqr.jpg']
>>> print(len(image_folder_2))
2
>>> for (img,) in iter(image_folder_2):
... # do something with img
... print(type(img), img.shape)
... # <class 'paddle.Tensor'> [3, 64, 64]
>>> shutil.rmtree(fake_data_dir)