Caffe_Code_Analysis/caffe/tools/caffe.cpp at master · lpye/Caffe_Code_Analysis

475 lines (440 loc) · 16.5 KB
#ifdef WITH_PYTHON_LAYER
#include "boost/python.hpp"
namespace bp = boost::python;
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <cstring>
#include <map>
#include <string>
#include <vector>
#include "boost/algorithm/string.hpp"
#include "caffe/caffe.hpp"
#include "caffe/util/signal_handler.h"
using caffe::Blob;
using caffe::Caffe;
using caffe::Net;
using caffe::Layer;
using caffe::Solver;
using caffe::shared_ptr;
using caffe::string;
using caffe::Timer;
using caffe::vector;
using std::ostringstream;
using std::cout;
using std::endl;
 * DEFINE_xxx(name, default_value, instruction);，这样就定义了一个xxx类型名为
 * FLAGS_name的标志，如果用户没有在Command Line中提供其值，那么会默认为default_value，
 * instruction是这个标志含义的说明
DEFINE_string(gpu, "",
    "Optional; run in GPU mode on given device IDs separated by ','."
    "Use '-gpu all' to run on all available GPUs. The effective training "
    "batch size is multiplied by the number of devices.");
DEFINE_string(solver, "",
    "The solver definition protocol buffer text file.");
DEFINE_string(model, "",
    "The model definition protocol buffer text file.");
DEFINE_string(snapshot, "",
    "Optional; the snapshot solver state to resume training.");
DEFINE_string(weights, "",
    "Optional; the pretrained weights to initialize finetuning, "
    "separated by ','. Cannot be set simultaneously with snapshot.");
DEFINE_int32(iterations, 50,
    "The number of iterations to run.");
DEFINE_string(sigint_effect, "stop",
             "Optional; action to take when a SIGINT signal is received: "
              "snapshot, stop or none.");
DEFINE_string(sighup_effect, "snapshot",
             "Optional; action to take when a SIGHUP signal is received: "
             "snapshot, stop or none.");
// A simple registry for caffe commands.
//BrewFunction是一个函数指针类型，指向的是传入参数为空，返回值为int的函数
//也就是train/test/time/device_query这四个函数的类型
typedef int (*BrewFunction)();
typedef std::map<caffe::string, BrewFunction> BrewMap;
//g_brew_map是一个key为string类型，value为BrewFunction类型的一个map类型的全局变量
BrewMap g_brew_map;
 * 以train函数为例子，RegisterBrewFunction(train)这个宏的作用是定义了一个名为
 * __Register_train的类，在定义完这个类之后，定义了一个这个类的变量，会调用构造函数，
 * 这个类的构造函数在前面提到的g_brew_map中添加了key为”train”，value为指向train函数
 * 的指针的一个元素。
 *新建全局变量g_registerer_##func，利用全局变量的构造函数，执行g_brew_map[#func] = &func;
 *把train/test/time/device_query这四个函数的函数指针传递到g_brew_map里
#define RegisterBrewFunction(func) \
namespace { \
class __Registerer_##func { \
 public: /* NOLINT */ \
  __Registerer_##func() { \
    g_brew_map[#func] = &func; \
__Registerer_##func g_registerer_##func; \
static BrewFunction GetBrewFunction(const caffe::string& name) {
  if (g_brew_map.count(name)) {
    LOG(INFO)<<"You choose:"<<name;
    return g_brew_map[name];
    LOG(ERROR) << "Available caffe actions:";
    for (BrewMap::iterator it = g_brew_map.begin();
         it != g_brew_map.end(); ++it) {
      LOG(ERROR) << "\t" << it->first;
    LOG(FATAL) << "Unknown action: " << name;
    return NULL;  // not reachable, just to suppress old compiler warnings.
// Parse GPU ids or use all available devices
static void get_gpus(vector<int>* gpus) {
  if (FLAGS_gpu == "all") {
    int count = 0;
#ifndef CPU_ONLY
    CUDA_CHECK(cudaGetDeviceCount(&count));
    NO_GPU;
    for (int i = 0; i < count; ++i) {
      gpus->push_back(i);
  } else if (FLAGS_gpu.size()) {
    vector<string> strings;
    boost::split(strings, FLAGS_gpu, boost::is_any_of(","));
    for (int i = 0; i < strings.size(); ++i) {
      gpus->push_back(boost::lexical_cast<int>(strings[i]));
    CHECK_EQ(gpus->size(), 0);
// caffe commands to call by
//     caffe <command> <args>
// To add a command, define a function "int command()" and register it with
// RegisterBrewFunction(action);
// Device Query: show diagnostic information for a GPU device.
int device_query() {
  LOG(INFO) << "Querying GPUs " << FLAGS_gpu;
  vector<int> gpus;
  get_gpus(&gpus);
  for (int i = 0; i < gpus.size(); ++i) {
    caffe::Caffe::SetDevice(gpus[i]);
    caffe::Caffe::DeviceQuery();
  return 0;
RegisterBrewFunction(device_query);
// Load the weights from the specified caffemodel(s) into the train and
// test nets.
void CopyLayers(caffe::Solver<float>* solver, const std::string& model_list) {
  std::vector<std::string> model_names;
  boost::split(model_names, model_list, boost::is_any_of(",") );
  for (int i = 0; i < model_names.size(); ++i) {
    LOG(INFO) << "Finetuning from " << model_names[i];
    solver->net()->CopyTrainedLayersFrom(model_names[i]);
    for (int j = 0; j < solver->test_nets().size(); ++j) {
      solver->test_nets()[j]->CopyTrainedLayersFrom(model_names[i]);
// Translate the signal effect the user specified on the command-line to the
// corresponding enumeration.
caffe::SolverAction::Enum GetRequestedAction(
    const std::string& flag_value) {
  if (flag_value == "stop") {
    return caffe::SolverAction::STOP;
  if (flag_value == "snapshot") {
    return caffe::SolverAction::SNAPSHOT;
  if (flag_value == "none") {
    return caffe::SolverAction::NONE;
  LOG(FATAL) << "Invalid signal effect \""<< flag_value << "\" was specified";
// Train / Finetune a model.
int train() {
// glog的CHECK_GT宏（含义为check greater than），检查FLAGS_solver的size是否大于0，
// 如果小于或等于0则输出提示：”Need a solver definition to train”。
  CHECK_GT(FLAGS_solver.size(), 0) << "Need a solver definition to train.";
 * 第二行代码是确保用户没有同时提供snapshot和weights参数，这两个参数都是继续之前的训练
 * 或者进行fine-tuning的，如果同时指明了这两个标志，则不知道到底应该从哪个路径的文件去
 * 读入模型的相关参数更为合适。
  CHECK(!FLAGS_snapshot.size() || !FLAGS_weights.size())
      << "Give a snapshot to resume training or weights to finetune "
      "but not both.";
  //SolverParameter是通过Google Protocol Buffer自动生成的一个类
  caffe::SolverParameter solver_param;
  //从文件中读取solver参数
  caffe::ReadSolverParamsFromTextFileOrDie(FLAGS_solver, &solver_param);
  // If the gpus flag is not provided, allow the mode and device to be set
  // in the solver prototxt.
   * 首先是判断用户在Command Line中是否输入了gpu相关的参数，如果没有(FLAGS_gpu.size()
   * ==0)但是用户在solver的prototxt定义中提供了相关的参数，那就把相关的参数放到
   * FLAGS_gpu中，如果用户仅仅是选择了在solver的prototxt定义中选择了GPU模式，
   * 但是没有指明具体的gpu_id，那么就默认设置为0。
  if (FLAGS_gpu.size() == 0
      && solver_param.solver_mode() == caffe::SolverParameter_SolverMode_GPU) {
      if (solver_param.has_device_id()) {
          FLAGS_gpu = "" +
              boost::lexical_cast<string>(solver_param.device_id());
      } else {  // Set default GPU if unspecified
          FLAGS_gpu = "" + boost::lexical_cast<string>(0);
  vector<int> gpus;
  get_gpus(&gpus);
  if (gpus.size() == 0) {
    LOG(INFO) << "Use CPU.";
    Caffe::set_mode(Caffe::CPU);
    ostringstream s;
    for (int i = 0; i < gpus.size(); ++i) {
      s << (i ? ", " : "") << gpus[i];
    LOG(INFO) << "Using GPUs " << s.str();
#ifndef CPU_ONLY
    cudaDeviceProp device_prop;
    for (int i = 0; i < gpus.size(); ++i) {
      cudaGetDeviceProperties(&device_prop, gpus[i]);
      LOG(INFO) << "GPU " << gpus[i] << ": " << device_prop.name;
    solver_param.set_device_id(gpus[0]);
    Caffe::SetDevice(gpus[0]);
    Caffe::set_mode(Caffe::GPU);
    Caffe::set_solver_count(gpus.size());
  caffe::SignalHandler signal_handler(
        GetRequestedAction(FLAGS_sigint_effect),
        GetRequestedAction(FLAGS_sighup_effect));
   * caffe.cpp中的train函数中通过下面的代码定义了一个指向Solver<float>的shared_ptr。
   * 其中主要是通过调用SolverRegistry这个类的静态成员函数CreateSolver得到一个指向Solver
   * 的指针来构造shared_ptr类型的solver。而且由于C++多态的特性，尽管solver是一个指向基类
   * Solver类型的指针，通过solver这个智能指针来调用各个成员函数会调用到各个子类(SGDSolver等)
   * 的函数。http://alanse7en.github.io/caffedai-ma-jie-xi-4/
  shared_ptr<caffe::Solver<float> > //初始化
      solver(caffe::SolverRegistry<float>::CreateSolver(solver_param));
  solver->SetActionFunction(signal_handler.GetActionFunction());
   * 接下来判断了一下用户是否定义了snapshot或者weights这两个参数中的一个，
   * 如果定义了则需要通过Solver提供的接口从snapshot或者weights文件中去读
   *   取已经训练好的网络的参数
  if (FLAGS_snapshot.size()) {
    LOG(INFO) << "Resuming from " << FLAGS_snapshot;
    solver->Restore(FLAGS_snapshot.c_str());
  } else if (FLAGS_weights.size()) {
    CopyLayers(solver.get(), FLAGS_weights);
  if (gpus.size() > 1) {
    caffe::P2PSync<float> sync(solver, NULL, solver->param());
    sync.Run(gpus);
    LOG(INFO) << "Starting Optimization";
    // 开始优化
    solver->Solve();
  LOG(INFO) << "Optimization Done.";
  return 0;
RegisterBrewFunction(train);
// Test: score a model.
int test() {
  CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to score.";
  CHECK_GT(FLAGS_weights.size(), 0) << "Need model weights to score.";
  // Set device id and mode
  vector<int> gpus;
  get_gpus(&gpus);
  if (gpus.size() != 0) {
    LOG(INFO) << "Use GPU with device ID " << gpus[0];
#ifndef CPU_ONLY
    cudaDeviceProp device_prop;
    cudaGetDeviceProperties(&device_prop, gpus[0]);
    LOG(INFO) << "GPU device name: " << device_prop.name;
    Caffe::SetDevice(gpus[0]);
    Caffe::set_mode(Caffe::GPU);
    LOG(INFO) << "Use CPU.";
    Caffe::set_mode(Caffe::CPU);
  // Instantiate the caffe net.
  Net<float> caffe_net(FLAGS_model, caffe::TEST);
  caffe_net.CopyTrainedLayersFrom(FLAGS_weights);
  LOG(INFO) << "Running for " << FLAGS_iterations << " iterations.";
  vector<int> test_score_output_id;
  vector<float> test_score;
  float loss = 0;
  for (int i = 0; i < FLAGS_iterations; ++i) {
    float iter_loss;
    const vector<Blob<float>*>& result =
        caffe_net.Forward(&iter_loss);
    loss += iter_loss;
    int idx = 0;
    for (int j = 0; j < result.size(); ++j) {
      const float* result_vec = result[j]->cpu_data();
      for (int k = 0; k < result[j]->count(); ++k, ++idx) {
        const float score = result_vec[k];
        if (i == 0) {
          test_score.push_back(score);
          test_score_output_id.push_back(j);
        } else {
          test_score[idx] += score;
        const std::string& output_name = caffe_net.blob_names()[
            caffe_net.output_blob_indices()[j]];
        LOG(INFO) << "Batch " << i << ", " << output_name << " = " << score;
  loss /= FLAGS_iterations;
  LOG(INFO) << "Loss: " << loss;
  for (int i = 0; i < test_score.size(); ++i) {
    const std::string& output_name = caffe_net.blob_names()[
        caffe_net.output_blob_indices()[test_score_output_id[i]]];
    const float loss_weight = caffe_net.blob_loss_weights()[
        caffe_net.output_blob_indices()[test_score_output_id[i]]];
    std::ostringstream loss_msg_stream;
    const float mean_score = test_score[i] / FLAGS_iterations;
    if (loss_weight) {
      loss_msg_stream << " (* " << loss_weight
                      << " = " << loss_weight * mean_score << " loss)";
    LOG(INFO) << output_name << " = " << mean_score << loss_msg_stream.str();
  return 0;
RegisterBrewFunction(test);
// Time: benchmark the execution time of a model.
int time() {
  CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to time.";
  // Set device id and mode
  vector<int> gpus;
  get_gpus(&gpus);
  if (gpus.size() != 0) {
    LOG(INFO) << "Use GPU with device ID " << gpus[0];
    Caffe::SetDevice(gpus[0]);
    Caffe::set_mode(Caffe::GPU);
    LOG(INFO) << "Use CPU.";
    Caffe::set_mode(Caffe::CPU);
  // Instantiate the caffe net.
  Net<float> caffe_net(FLAGS_model, caffe::TRAIN);
  // Do a clean forward and backward pass, so that memory allocation are done
  // and future iterations will be more stable.
  LOG(INFO) << "Performing Forward";
  // Note that for the speed benchmark, we will assume that the network does
  // not take any input blobs.
  float initial_loss;
  caffe_net.Forward(&initial_loss);
  LOG(INFO) << "Initial loss: " << initial_loss;
  LOG(INFO) << "Performing Backward";
  caffe_net.Backward();
  const vector<shared_ptr<Layer<float> > >& layers = caffe_net.layers();
  const vector<vector<Blob<float>*> >& bottom_vecs = caffe_net.bottom_vecs();
  const vector<vector<Blob<float>*> >& top_vecs = caffe_net.top_vecs();
  const vector<vector<bool> >& bottom_need_backward =
      caffe_net.bottom_need_backward();
  LOG(INFO) << "*** Benchmark begins ***";
  LOG(INFO) << "Testing for " << FLAGS_iterations << " iterations.";
  Timer total_timer;
  total_timer.Start();
  Timer forward_timer;
  Timer backward_timer;
  Timer timer;
  std::vector<double> forward_time_per_layer(layers.size(), 0.0);
  std::vector<double> backward_time_per_layer(layers.size(), 0.0);
  double forward_time = 0.0;
  double backward_time = 0.0;
  for (int j = 0; j < FLAGS_iterations; ++j) {
    Timer iter_timer;
    iter_timer.Start();
    forward_timer.Start();
    for (int i = 0; i < layers.size(); ++i) {
      timer.Start();
      layers[i]->Forward(bottom_vecs[i], top_vecs[i]);
      forward_time_per_layer[i] += timer.MicroSeconds();
    forward_time += forward_timer.MicroSeconds();
    backward_timer.Start();
    for (int i = layers.size() - 1; i >= 0; --i) {
      timer.Start();
      layers[i]->Backward(top_vecs[i], bottom_need_backward[i],
                          bottom_vecs[i]);
      backward_time_per_layer[i] += timer.MicroSeconds();
    backward_time += backward_timer.MicroSeconds();
    LOG(INFO) << "Iteration: " << j + 1 << " forward-backward time: "
      << iter_timer.MilliSeconds() << " ms.";
  LOG(INFO) << "Average time per layer: ";
  for (int i = 0; i < layers.size(); ++i) {
    const caffe::string& layername = layers[i]->layer_param().name();
    LOG(INFO) << std::setfill(' ') << std::setw(10) << layername <<
      "\tforward: " << forward_time_per_layer[i] / 1000 /
      FLAGS_iterations << " ms.";
    LOG(INFO) << std::setfill(' ') << std::setw(10) << layername  <<
      "\tbackward: " << backward_time_per_layer[i] / 1000 /
      FLAGS_iterations << " ms.";
  total_timer.Stop();
  LOG(INFO) << "Average Forward pass: " << forward_time / 1000 /
    FLAGS_iterations << " ms.";
  LOG(INFO) << "Average Backward pass: " << backward_time / 1000 /
    FLAGS_iterations << " ms.";
  LOG(INFO) << "Average Forward-Backward: " << total_timer.MilliSeconds() /
    FLAGS_iterations << " ms.";
  LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() << " ms.";
  LOG(INFO) << "*** Benchmark ends ***";
  return 0;
RegisterBrewFunction(time);
int main(int argc, char** argv) {
  // Print output to stderr (while still logging).
  cout<<"This is test"<<std::endl;
  FLAGS_alsologtostderr = 1;
  // Set version
  gflags::SetVersionString(AS_STRING(CAFFE_VERSION));
  // Usage message.
  gflags::SetUsageMessage("command line brew\n"
      "usage: caffe <command> <args>\n\n"
      "commands:\n"
      "  train           train or finetune a model\n"
      "  test            score a model\n"
      "  device_query    show GPU diagnostic information\n"
      "  time            benchmark model execution time");
  // Run tool or show usage.
  // 解析输入参数
  caffe::GlobalInit(&argc, &argv);
  if (argc == 2) {
#ifdef WITH_PYTHON_LAYER
      cout<<"~~~~~~~~~~~~~~~~~~"<<endl;
      // 注意GetBrewFunction返回的是函数指针，然后通过"()"调用操作符执行函数
      return GetBrewFunction(caffe::string(argv[1])) ();
      cout<<"~~~~~~~~~~~~~~~~~~"<<endl;
#ifdef WITH_PYTHON_LAYER
    } catch (bp::error_already_set) {
      cout<<"heheda"<<endl;
      PyErr_Print();
      return 1;
    gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/caffe");
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

caffe.cpp

Latest commit

History

caffe.cpp

File metadata and controls