ImageFolder

class paddle.vision.datasets. ImageFolder ( root, loader=None, extensions=None, transform=None, is_valid_file=None ) [源代码]

一种通用的数据加载方式,数据需要以如下的格式存放:

root/1.ext
root/2.ext
root/sub_dir/3.ext

参数

  • root (str) - 根目录路径。

  • loader (Callable,可选) - 可以加载数据路径的一个函数,如果该值没有设定,默认使用 cv2.imread。默认值为 None。

  • extensions (list[str]|tuple[str],可选) - 允许的数据后缀列表,extensionsis_valid_file 不可以同时设置。如果该值没有设定,默认为 ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp')。默认值为 None。

  • transform (Callable,可选) - 图片数据的预处理,若为 None 即为不做预处理。默认值为 None

  • is_valid_file (Callable,可选) - 根据每条数据的路径来判断是否合法的一个函数。extensionsis_valid_file 不可以同时设置。默认值为 None。

返回

Dataset,ImageFolder 实例。

属性

  • samples (list[str]) - 样本路径列表。

代码示例

import shutil
import tempfile
import cv2
import numpy as np
import paddle.vision.transforms as T
from pathlib import Path
from paddle.vision.datasets import ImageFolder


def make_fake_file(img_path: str):
    if img_path.endswith((".jpg", ".png", ".jpeg")):
        fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8)
        cv2.imwrite(img_path, fake_img)
    elif img_path.endswith(".txt"):
        with open(img_path, "w") as f:
            f.write("This is a fake file.")

def make_directory(root, directory_hierarchy, file_maker=make_fake_file):
    root = Path(root)
    root.mkdir(parents=True, exist_ok=True)
    for subpath in directory_hierarchy:
        if isinstance(subpath, str):
            filepath = root / subpath
            file_maker(str(filepath))
        else:
            dirname = list(subpath.keys())[0]
            make_directory(root / dirname, subpath[dirname])

directory_hierarchy = [
    "abc.jpg",
    "def.png",
    {"ghi": [
        "jkl.jpeg",
        {"mno": [
            "pqr.jpg"]}]},
    "this_will_be_ignored.txt",
]

# You can replace this with any directory to explore the structure
# of generated data. e.g. fake_data_dir = "./temp_dir"
fake_data_dir = tempfile.mkdtemp()
make_directory(fake_data_dir, directory_hierarchy)
image_folder_1 = ImageFolder(fake_data_dir)
print(image_folder_1.samples)
# ['./temp_dir/abc.jpg', './temp_dir/def.png',
#  './temp_dir/ghi/jkl.jpeg', './temp_dir/ghi/mno/pqr.jpg']
print(len(image_folder_1))
# 4

for i in range(len(image_folder_1)):
    (img,) = image_folder_1[i]
    # do something with img
    print(type(img), img.size)
    # <class 'PIL.Image.Image'> (32, 32)


transform = T.Compose(
    [
        T.Resize(64),
        T.ToTensor(),
        T.Normalize(
            mean=[0.5, 0.5, 0.5],
            std=[0.5, 0.5, 0.5],
            to_rgb=True,
        ),
    ]
)

image_folder_2 = ImageFolder(
    fake_data_dir,
    loader=lambda x: cv2.imread(x),  # load image with OpenCV
    extensions=(".jpg",),  # only load *.jpg files
    transform=transform,  # apply transform to every image
)

print(image_folder_2.samples)
# ['./temp_dir/abc.jpg', './temp_dir/ghi/mno/pqr.jpg']
print(len(image_folder_2))
# 2

for (img,) in iter(image_folder_2):
    # do something with img
    print(type(img), img.shape)
    # <class 'paddle.Tensor'> [3, 64, 64]

shutil.rmtree(fake_data_dir)