PyReader¶
- class paddle.fluid.reader. PyReader ( feed_list=None, capacity=None, use_double_buffer=True, iterable=True, return_list=False ) [source]
-
Create a reader object for data feeding in Python. Data would be prefetched using Python thread and be pushed into a queue asynchronously. Data in the queue would be extracted automatically when Executor.run(…) is called.
- Parameters
-
feed_list (list(Variable)|tuple(Variable)) – feed variable list. The variables should be created by
fluid.layers.data()
.capacity (int) – capacity of the queue maintained in PyReader. The unit is batch number. Set larger capacity if your reader is fast.
use_double_buffer (bool) – whether to use double_buffer_reader. If use_double_buffer=True, PyReader would prefetch next batch data asynchronously, so it would speed up data feeding and occupies a little more CPU or GPU memory, i.e., the memory of one batch input data.
iterable (bool) – whether the created PyReader is iterable.
return_list (bool) – whether the return value on each device is presented as a list. It is only valid when iterable=True. If return_list=False, the return value on each device would be a dict of str -> LoDTensor, where the key of the dict is the name of each fed variables. If return_list=True, the return value on each device would be a list(LoDTensor). It is recommended to use return_list=False in static graph mode and use return_list=True in dygraph mode.
- Returns
-
the created reader object.
- Return type:
-
reader(Reader)
Examples
If iterable = False, the created PyReader object is almost the same as
fluid.layers.py_reader()
. Operators would be inserted into the program. User should callstart()
before each epoch and catchfluid.core.EOFException
thrown byExecutor.run()
when epoch ends. Once the exception is caught, user should callreset()
to reset the reader manually.
import paddle import paddle.fluid as fluid import numpy as np EPOCH_NUM = 3 ITER_NUM = 5 BATCH_SIZE = 3 def network(image, label): # User-defined network, here is an example of softmax regression. predict = fluid.layers.fc(input=image, size=10, act='softmax') return fluid.layers.cross_entropy(input=predict, label=label) def reader_creator_random_image_and_label(height, width): def reader(): for i in range(ITER_NUM): fake_image = np.random.uniform(low=0, high=255, size=[height, width]) fake_label = np.ones([1]) yield fake_image, fake_label return reader image = fluid.data(name='image', shape=[None, 784, 784], dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=False) user_defined_reader = reader_creator_random_image_and_label(784, 784) reader.decorate_sample_list_generator( paddle.batch(user_defined_reader, batch_size=BATCH_SIZE)) loss = network(image, label) executor = fluid.Executor(fluid.CPUPlace()) executor.run(fluid.default_startup_program()) for i in range(EPOCH_NUM): reader.start() while True: try: executor.run(feed=None) except fluid.core.EOFException: reader.reset() break
If iterable=True, the created PyReader object is decoupled with the program. No operator would be inserted into the program. In this case, the created reader is a Python generator, which is iterable. User should feed the data yielded from PyReader object into
Executor.run(feed=...)
.
import paddle import paddle.fluid as fluid import numpy as np EPOCH_NUM = 3 ITER_NUM = 5 BATCH_SIZE = 10 def network(image, label): # User-defined network, here is an example of softmax regression. predict = fluid.layers.fc(input=image, size=10, act='softmax') return fluid.layers.cross_entropy(input=predict, label=label) def reader_creator_random_image(height, width): def reader(): for i in range(ITER_NUM): fake_image = np.random.uniform(low=0, high=255, size=[height, width]) fake_label = np.ones([1]) yield fake_image, fake_label return reader image = fluid.data(name='image', shape=[None, 784, 784], dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True, return_list=False) user_defined_reader = reader_creator_random_image(784, 784) reader.decorate_sample_list_generator( paddle.batch(user_defined_reader, batch_size=BATCH_SIZE), fluid.core.CPUPlace()) loss = network(image, label) executor = fluid.Executor(fluid.CPUPlace()) executor.run(fluid.default_startup_program()) for _ in range(EPOCH_NUM): for data in reader(): executor.run(feed=data, fetch_list=[loss])
If return_list=True, the return values would be presented as list instead of dict. This is usually used in dygraph mode.
import paddle import paddle.fluid as fluid import numpy as np ITER_NUM = 5 BATCH_SIZE = 10 def reader_creator_random_image(height, width): def reader(): for i in range(ITER_NUM): yield np.random.uniform(low=0, high=255, size=[height, width]), \ np.random.random_integers(low=0, high=9, size=[1]) return reader place = fluid.CPUPlace() with fluid.dygraph.guard(place): py_reader = fluid.io.PyReader(capacity=2, return_list=True) user_defined_reader = reader_creator_random_image(784, 784) py_reader.decorate_sample_list_generator( paddle.batch(user_defined_reader, batch_size=BATCH_SIZE), place) for image, label in py_reader(): relu = fluid.layers.relu(image)
-
start
(
)
start¶
-
Start the data feeding thread. Can only call when the reader object is not iterable.
Example
import paddle import paddle.fluid as fluid import numpy as np BATCH_SIZE = 10 def generator(): for i in range(5): yield np.random.uniform(low=0, high=255, size=[784, 784]), image = fluid.data(name='image', shape=[None, 784, 784], dtype='float32') reader = fluid.io.PyReader(feed_list=[image], capacity=4, iterable=False) reader.decorate_sample_list_generator( paddle.batch(generator, batch_size=BATCH_SIZE)) executor = fluid.Executor(fluid.CPUPlace()) executor.run(fluid.default_startup_program()) for i in range(3): reader.start() while True: try: executor.run(feed=None) except fluid.core.EOFException: reader.reset() break
-
next
(
)
next¶
-
Get the next item in the DataLoader object. This method should not be called by users directly. It is used for implementing iterator protocol of Python 2.x inside PaddlePaddle framework.
-
reset
(
)
reset¶
-
Reset the reader object when
fluid.core.EOFException
raises. Can only call when the reader object is not iterable.Example
import paddle import paddle.fluid as fluid import numpy as np BATCH_SIZE = 10 def generator(): for i in range(5): yield np.random.uniform(low=0, high=255, size=[784, 784]), image = fluid.data(name='image', shape=[None, 784, 784], dtype='float32') reader = fluid.io.PyReader(feed_list=[image], capacity=4, iterable=False) reader.decorate_sample_list_generator( paddle.batch(generator, batch_size=BATCH_SIZE)) executor = fluid.Executor(fluid.CPUPlace()) executor.run(fluid.default_startup_program()) for i in range(3): reader.start() while True: try: executor.run(feed=None) except fluid.core.EOFException: reader.reset() break
-
decorate_sample_generator
(
sample_generator,
batch_size,
drop_last=True,
places=None
)
decorate_sample_generator¶
-
Set the data source of the PyReader object.
The provided
sample_generator
should be a Python generator, which yields list(numpy.ndarray)-typed data of each sample.places
must be set when the PyReader object is iterable.If all inputs have no lods, this method is faster than
decorate_sample_list_generator(paddle.batch(sample_generator, ...))
.- Parameters
-
sample_generator (generator) – Python generator that yields list(numpy.ndarray)-typed sample data.
batch_size (int) – batch size. Must be larger than 0.
drop_last (bool) – Whether to drop the last batch when sample number is less than batch_size.
places (None|list(CUDAPlace)|list(CPUPlace)) – place list. Must be provided when PyReader is iterable.
Example
import paddle.fluid as fluid import numpy as np EPOCH_NUM = 3 ITER_NUM = 15 BATCH_SIZE = 3 def network(image, label): # User-defined network, here is an example of softmax regression. predict = fluid.layers.fc(input=image, size=10, act='softmax') return fluid.layers.cross_entropy(input=predict, label=label) def random_image_and_label_generator(height, width): def generator(): for i in range(ITER_NUM): fake_image = np.random.uniform(low=0, high=255, size=[height, width]) fake_label = np.array([1]) yield fake_image, fake_label return generator image = fluid.data(name='image', shape=[None, 784, 784], dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True) user_defined_generator = random_image_and_label_generator(784, 784) reader.decorate_sample_generator(user_defined_generator, batch_size=BATCH_SIZE, places=[fluid.CPUPlace()]) loss = network(image, label) executor = fluid.Executor(fluid.CPUPlace()) executor.run(fluid.default_startup_program()) for _ in range(EPOCH_NUM): for data in reader(): executor.run(feed=data, fetch_list=[loss])
-
decorate_sample_list_generator
(
reader,
places=None
)
decorate_sample_list_generator¶
-
Set the data source of the PyReader object.
The provided
reader
should be a Python generator, which yields list(numpy.ndarray) typed batched data.places
must be set when the PyReader object is iterable.- Parameters
-
reader (generator) – Python generator that yields list(numpy.ndarray)-typed batched data.
places (None|list(CUDAPlace)|list(CPUPlace)) – place list. Must be provided when PyReader is iterable.
Example
import paddle import paddle.fluid as fluid import numpy as np EPOCH_NUM = 3 ITER_NUM = 15 BATCH_SIZE = 3 def network(image, label): # User-defined network, here is an example of softmax regression. predict = fluid.layers.fc(input=image, size=10, act='softmax') return fluid.layers.cross_entropy(input=predict, label=label) def random_image_and_label_generator(height, width): def generator(): for i in range(ITER_NUM): fake_image = np.random.uniform(low=0, high=255, size=[height, width]) fake_label = np.ones([1]) yield fake_image, fake_label return generator image = fluid.data(name='image', shape=[None, 784, 784], dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True) user_defined_generator = random_image_and_label_generator(784, 784) reader.decorate_sample_list_generator( paddle.batch(user_defined_generator, batch_size=BATCH_SIZE), fluid.core.CPUPlace()) loss = network(image, label) executor = fluid.Executor(fluid.core.CPUPlace()) executor.run(fluid.default_startup_program()) for _ in range(EPOCH_NUM): for data in reader(): executor.run(feed=data, fetch_list=[loss])
-
decorate_batch_generator
(
reader,
places=None
)
decorate_batch_generator¶
-
Set the data source of the PyReader object.
The provided
reader
should be a Python generator, which yields numpy.ndarray-typed or LoDTensor-typed batched data.places
must be set when the PyReader object is iterable.- Parameters
-
reader (generator) – Python generator that yields LoDTensor-typed batched data.
places (None|list(CUDAPlace)|list(CPUPlace)) – place list. Must be provided when PyReader is iterable.
Example
import paddle.fluid as fluid import numpy as np EPOCH_NUM = 3 ITER_NUM = 15 BATCH_SIZE = 3 def network(image, label): # User-defined network, here is an example of softmax regression. predict = fluid.layers.fc(input=image, size=10, act='softmax') return fluid.layers.cross_entropy(input=predict, label=label) def random_image_and_label_generator(height, width): def generator(): for i in range(ITER_NUM): batch_image = np.random.uniform(low=0, high=255, size=[BATCH_SIZE, height, width]) batch_label = np.ones([BATCH_SIZE, 1]) batch_image = batch_image.astype('float32') batch_label = batch_label.astype('int64') yield batch_image, batch_label return generator image = fluid.data(name='image', shape=[None, 784, 784], dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True) user_defined_generator = random_image_and_label_generator(784, 784) reader.decorate_batch_generator(user_defined_generator, fluid.CPUPlace()) loss = network(image, label) executor = fluid.Executor(fluid.CPUPlace()) executor.run(fluid.default_startup_program()) for _ in range(EPOCH_NUM): for data in reader(): executor.run(feed=data, fetch_list=[loss])