caffe.proto中DataParameter部分
message DataParameter { //输入数据使用的DB类型 enum DB { LEVELDB = 0;//使用LEVELDB LMDB = 1; //使用LMDB } // Specify the data source.源数据的路径 optional string source = 1; // Specify the batch size.一个批量数据包含的图片数目 optional uint32 batch_size = 4; // The rand_skip variable is for the data layer to skip a few data points // to avoid all asynchronous sgd clients to start at the same point. The skip // point would be set as rand_skip * rand(0,1). Note that rand_skip should not // be larger than the number of keys in the database. // DEPRECATED. Each solver accesses a different subset of the database. // 随机跳过若干图片。防止SGD从同一起点开始。已弃用。 optional uint32 rand_skip = 7 [default = 0]; optional DB backend = 8 [default = LEVELDB];//默认输入数据使用DB类型。默认LEVELDB // DEPRECATED. See TransformationParameter. For data pre-processing, we can do // simple scaling and subtracting the data mean, if provided. Note that the // mean subtraction is always carried out before scaling. // 弃用。使用TransformationParameter optional float scale = 2 [default = 1]; optional string mean_file = 3; // DEPRECATED. See TransformationParameter. Specify if we would like to randomly // crop an image. optional uint32 crop_size = 5 [default = 0]; // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror // data. optional bool mirror = 6 [default = false]; // Force the encoded image to have 3 color channels 强制编码图像为三通道彩色图像 optional bool force_encoded_color = 9 [default = false]; // Prefetch queue (Increase if data feeding bandwidth varies, within the // limit of device memory for GPU training) // 预取队列 (在硬件设备允许的情况下,预先放到主机内存中的批量数,默认为4个batch) optional uint32 prefetch = 10 [default = 4];}
include/caffe/layers/base_data_layer.hpp
1 #ifndef CAFFE_DATA_LAYERS_HPP_ 2 #define CAFFE_DATA_LAYERS_HPP_ 3 4 #include5 6 #include "caffe/blob.hpp" 7 #include "caffe/data_transformer.hpp" 8 #include "caffe/internal_thread.hpp" 9 #include "caffe/layer.hpp"10 #include "caffe/proto/caffe.pb.h"11 #include "caffe/util/blocking_queue.hpp"12 13 namespace caffe {14 15 /**16 * @brief Provides base for data layers that feed blobs to the Net.17 *18 * TODO(dox): thorough documentation for Forward and proto params.19 */20 //基本数据层,派生于Layer21 template 22 class BaseDataLayer : public Layer {23 public:24 explicit BaseDataLayer(const LayerParameter& param);25 // LayerSetUp: implements common data layer setup functionality, and calls26 // DataLayerSetUp to do special data layer setup for individual layer types.27 // This method may not be overridden except by the BasePrefetchingDataLayer.28 //通用层配置功能。之后调用DataLayerSetUp进行数据读取层的特殊配置29 virtual void LayerSetUp(const vector *>& bottom,30 const vector *>& top);31 virtual void DataLayerSetUp(const vector *>& bottom,32 const vector *>& top) {}33 // Data layers have no bottoms, so reshaping is trivial.34 //数据读取层没有输入Bottom Blob,变形操作不是很重要35 virtual void Reshape(const vector *>& bottom,36 const vector *>& top) {}37 //反向传播函数不需要做任何事情38 virtual void Backward_cpu(const vector *>& top,39 const vector & propagate_down, const vector *>& bottom) {}40 virtual void Backward_gpu(const vector *>& top,41 const vector & propagate_down, const vector *>& bottom) {}42 43 protected:44 TransformationParameter transform_param_;//数据预处理变换器参数45 shared_ptr > data_transformer_;//数据预处理变换器46 bool output_labels_;//是否输出标签数据47 };48 49 //批量数据,用于存放数据读取层输出50 template 51 class Batch {52 public:53 Blob data_, label_;//两个Blob分别用来存储图片数据和标签54 };55 56 //带预取功能的数据读取层,派生于BaseDataLayer和InternalThread57 template 58 class BasePrefetchingDataLayer :59 public BaseDataLayer , public InternalThread {60 public:61 explicit BasePrefetchingDataLayer(const LayerParameter& param);62 // LayerSetUp: implements common data layer setup functionality, and calls63 // DataLayerSetUp to do special data layer setup for individual layer types.64 // This method may not be overridden.层设置65 void LayerSetUp(const vector *>& bottom,66 const vector *>& top);67 68 virtual void Forward_cpu(const vector *>& bottom,69 const vector *>& top);//前向70 virtual void Forward_gpu(const vector *>& bottom,71 const vector *>& top);72 73 protected:74 virtual void InternalThreadEntry();//内部线程入口75 virtual void load_batch(Batch * batch) = 0;//载入批量数据,纯虚函数76 77 vector > > prefetch_;//抓取78 BlockingQueue *> prefetch_free_;//空闲Batch队列79 BlockingQueue *> prefetch_full_;//已加载Batch队列80 Batch * prefetch_current_;81 82 Blob transformed_data_;//变换后的数据83 };84 85 } // namespace caffe86 87 #endif // CAFFE_DATA_LAYERS_HPP_
src/caffe/layers/base_data_layer.cpp
1 #include2 #include 3 4 #include "caffe/blob.hpp" 5 #include "caffe/data_transformer.hpp" 6 #include "caffe/internal_thread.hpp" 7 #include "caffe/layer.hpp" 8 #include "caffe/layers/base_data_layer.hpp" 9 #include "caffe/proto/caffe.pb.h" 10 #include "caffe/util/blocking_queue.hpp" 11 12 namespace caffe { 13 14 //构造函数。初始化Layer参数、数据变换器参数 15 template 16 BaseDataLayer ::BaseDataLayer(const LayerParameter& param) 17 : Layer (param), 18 transform_param_(param.transform_param()) { 19 } 20 21 //BaseDataLayer层设置 22 template 23 void BaseDataLayer ::LayerSetUp(const vector *>& bottom, 24 const vector *>& top) { 25 if (top.size() == 1) { //判断输出Blob数目。1则只输出data,2则输出data和label 26 output_labels_ = false; 27 } else { 28 output_labels_ = true; 29 } 30 //初始化数据变换器对象 31 data_transformer_.reset( 32 new DataTransformer (transform_param_, this->phase_)); 33 data_transformer_->InitRand(); 34 // The subclasses should setup the size of bottom and top 35 DataLayerSetUp(bottom, top);//子类负责设置Top Blob形状 36 } 37 38 //BasePrefetchingDataLayer构造函数 39 template 40 BasePrefetchingDataLayer ::BasePrefetchingDataLayer( 41 const LayerParameter& param) 42 : BaseDataLayer (param), 43 prefetch_(param.data_param().prefetch()), 44 prefetch_free_(), prefetch_full_(), prefetch_current_() { 45 for (int i = 0; i < prefetch_.size(); ++i) { 46 prefetch_[i].reset(new Batch ()); 47 prefetch_free_.push(prefetch_[i].get());//将batch对象都放入空闲队列 48 } 49 } 50 51 //BasePrefetchingDataLayer层配置函数 52 template 53 void BasePrefetchingDataLayer ::LayerSetUp( 54 const vector *>& bottom, const vector *>& top) { 55 BaseDataLayer ::LayerSetUp(bottom, top); 56 57 // Before starting the prefetch thread, we make cpu_data and gpu_data 58 // calls so that the prefetch thread does not accidentally make simultaneous 59 // cudaMalloc calls when the main thread is running. In some GPUs this 60 // seems to cause failures if we do not so. 61 //在开启数据预取线程前,通过调用Blob相应函数先进行cudaMalloc, 62 //避免多线程情况下同时进行cudaMalloc,会导致cuda API调用失败 63 for (int i = 0; i < prefetch_.size(); ++i) { 64 prefetch_[i]->data_.mutable_cpu_data(); 65 if (this->output_labels_) { 66 prefetch_[i]->label_.mutable_cpu_data(); 67 } 68 } 69 //GPU 70 #ifndef CPU_ONLY 71 if (Caffe::mode() == Caffe::GPU) { 72 for (int i = 0; i < prefetch_.size(); ++i) { 73 prefetch_[i]->data_.mutable_gpu_data(); 74 if (this->output_labels_) { 75 prefetch_[i]->label_.mutable_gpu_data(); 76 } 77 } 78 } 79 #endif 80 DLOG(INFO) << "Initializing prefetch"; 81 this->data_transformer_->InitRand(); 82 StartInternalThread();//开启内部预取线程 83 DLOG(INFO) << "Prefetch initialized."; 84 } 85 86 //内部线程入口 87 template 88 void BasePrefetchingDataLayer ::InternalThreadEntry() { 89 //创建CUDA Stream,非阻塞类型 90 #ifndef CPU_ONLY 91 cudaStream_t stream; 92 if (Caffe::mode() == Caffe::GPU) { 93 CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); 94 } 95 #endif 96 97 try { 98 while (!must_stop()) { //循环载入批量数据 99 Batch * batch = prefetch_free_.pop();//得到一个空闲batch100 load_batch(batch);//载入批量数据101 #ifndef CPU_ONLY102 if (Caffe::mode() == Caffe::GPU) {103 batch->data_.data().get()->async_gpu_push(stream);104 if (this->output_labels_) {105 batch->label_.data().get()->async_gpu_push(stream);106 }107 CUDA_CHECK(cudaStreamSynchronize(stream));//同步到GPU108 }109 #endif110 prefetch_full_.push(batch);//加入到带负载的Batch队列中111 }112 } catch (boost::thread_interrupted&) { //捕获到异常则退出循环113 // Interrupted exception is expected on shutdown114 }115 #ifndef CPU_ONLY116 if (Caffe::mode() == Caffe::GPU) {117 CUDA_CHECK(cudaStreamDestroy(stream));//销毁CUDA Stream118 }119 #endif120 }121 122 //前向传播函数123 template 124 void BasePrefetchingDataLayer ::Forward_cpu(125 const vector *>& bottom, const vector *>& top) {126 if (prefetch_current_) {127 prefetch_free_.push(prefetch_current_);128 }129 //从带负载的Batch中取出一个Batch对象130 prefetch_current_ = prefetch_full_.pop("Waiting for data");131 // Reshape to loaded data.输出Top Blob根据Batch形状进行变形132 top[0]->ReshapeLike(prefetch_current_->data_);133 top[0]->set_cpu_data(prefetch_current_->data_.mutable_cpu_data());134 if (this->output_labels_) { //如果需要输出便签数据135 // Reshape to loaded labels.136 top[1]->ReshapeLike(prefetch_current_->label_);//Top Blob根据Batch中lable_形状进行变形137 top[1]->set_cpu_data(prefetch_current_->label_.mutable_cpu_data());138 }139 }140 141 #ifdef CPU_ONLY142 STUB_GPU_FORWARD(BasePrefetchingDataLayer, Forward);143 #endif144 145 INSTANTIATE_CLASS(BaseDataLayer);146 INSTANTIATE_CLASS(BasePrefetchingDataLayer);147 148 } // namespace caffe
摘抄参看赵永科《21天实战caffe》