forked from BVLC/caffe
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbase_data_layer.cpp
More file actions
128 lines (111 loc) · 3.54 KB
/
Copy pathbase_data_layer.cpp
File metadata and controls
128 lines (111 loc) · 3.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <string>
#include <vector>
#include "caffe/data_layers.hpp"
#include "caffe/util/io.hpp"
namespace caffe {
template <typename Dtype>
BaseDataLayer<Dtype>::BaseDataLayer(const LayerParameter& param)
: Layer<Dtype>(param),
transform_param_(param.transform_param()),
data_transformer_(transform_param_) {
}
template <typename Dtype>
void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
if (top.size() == 1) {
output_labels_ = false;
} else {
output_labels_ = true;
}
// The subclasses should setup the size of bottom and top
DataLayerSetUp(bottom, top);
data_transformer_.InitRand();
}
template <typename Dtype>
BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer(
const LayerParameter& param)
: BaseDataLayer<Dtype>(param),
prefetch_free_(), prefetch_full_(), device_() {
for(int i = 0; i < PREFETCH_COUNT; ++i)
prefetch_free_.push(&prefetch_[i]);
}
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
BaseDataLayer<Dtype>::LayerSetUp(bottom, top);
// Before starting the prefetch thread, we make cpu_data and gpu_data
// calls so that the prefetch thread does not accidentally make simultaneous
// cudaMalloc calls when the main thread is running. In some GPUs this
// seems to cause failures if we do not so.
for(int i = 0; i < PREFETCH_COUNT; ++i) {
prefetch_[i].data_.mutable_cpu_data();
if (this->output_labels_) {
prefetch_[i].label_.mutable_cpu_data();
}
}
switch (Caffe::mode()) {
case Caffe::CPU:
device_ = -1;
break;
case Caffe::GPU:
#ifndef CPU_ONLY
for(int i = 0; i < PREFETCH_COUNT; ++i) {
prefetch_[i].data_.mutable_gpu_data();
if (this->output_labels_) {
prefetch_[i].label_.mutable_gpu_data();
}
}
CUDA_CHECK(cudaGetDevice(&device_));
#endif
break;
}
DLOG(INFO) << "Initializing prefetch";
this->phase_ = Caffe::phase();
this->data_transformer_.InitRand();
CHECK(StartInternalThread()) << "Thread execution failed";
DLOG(INFO) << "Prefetch initialized.";
}
template <typename Dtype>
BasePrefetchingDataLayer<Dtype>::~BasePrefetchingDataLayer() {
CHECK(StopInternalThread()) << "Stop thread failed";
}
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
#ifndef CPU_ONLY
cudaStream_t stream;
if(device_ >= 0) {
CUDA_CHECK(cudaSetDevice(device_));
cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
}
#endif
while(!must_stop()) {
Batch<Dtype>* batch = prefetch_free_.pop();
load_batch(batch);
#ifndef CPU_ONLY
if(device_ >= 0) {
batch->data_.data().get()->async_gpu_push(stream);
cudaStreamSynchronize(stream);
}
#endif
prefetch_full_.push(batch);
}
}
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
Batch<Dtype>* batch = prefetch_full_.pop("Data layer prefetch queue empty");
caffe_copy(batch->data_.count(), batch->data_.cpu_data(),
top[0]->mutable_cpu_data());
if (this->output_labels_) {
caffe_copy(batch->label_.count(), batch->label_.cpu_data(),
top[1]->mutable_cpu_data());
}
prefetch_free_.push(batch);
}
#ifdef CPU_ONLY
STUB_GPU_FORWARD(BasePrefetchingDataLayer, Forward);
#endif
INSTANTIATE_CLASS(BaseDataLayer);
INSTANTIATE_CLASS(Batch);
INSTANTIATE_CLASS(BasePrefetchingDataLayer);
} // namespace caffe