NVIDIA/caffe build memo

Here is the brief memo where I've installed NVIDIA/DIGITS and Caffe

Tips

  • Make sure boost.python is built
  • Don't mix protobuf 2.x and 3.x.  It works with protobuf-3.1.0 as long as relevant modules are using 3.1.0 version consistantly.
  • Dependency.cmake under caffe/cmake directory mssing ${HDF5_HL_LIBRARIES} at line 28 that leads link error as described bellow.
  • nccl module need to be built for multi-GPU enabled.  See https://github.com/NVIDIA/nccl
  • Make sure pip installed (see below), and do 'sudo pip install -r python/requirement.txt'

Download ez_setup.py from https://pypi.python.org/pypi/setuptools (see "Installation Instructions" section) then:

$ sudo python ez_setup.py

Errors and resolutions

I got exceptions while 'pip install -r ./python/requierment.txt', that was resolved by;

$ sudo python -m pip install --upgrade --force setuptools
$ sudo python -m pip install --upgrade --force pip 

NVIDIA fork of caffe 'https://github.com/NVIDIA/DIGITS/blob/master/docs/BuildCaffe.md' failed with following errors:

[ 87%] Linking CXX executable device_query
../lib/libcaffe-nv.so.0.15.14: undefined reference to `H5LTget_dataset_ndims'
../lib/libcaffe-nv.so.0.15.14: undefined reference to `H5LTmake_dataset_double'
../lib/libcaffe-nv.so.0.15.14: undefined reference to `H5LTmake_dataset_int'
../lib/libcaffe-nv.so.0.15.14: undefined reference to `H5LTread_dataset_float'
../lib/libcaffe-nv.so.0.15.14: undefined reference to `H5LTmake_dataset_float'
../lib/libcaffe-nv.so.0.15.14: undefined reference to `H5LTread_dataset_double'
../lib/libcaffe-nv.so.0.15.14: undefined reference to `H5LTmake_dataset_string'
../lib/libcaffe-nv.so.0.15.14: undefined reference to `H5LTread_dataset_int'
../lib/libcaffe-nv.so.0.15.14: undefined reference to `H5LTfind_dataset'
../lib/libcaffe-nv.so.0.15.14: undefined reference to `H5LTget_dataset_info'
../lib/libcaffe-nv.so.0.15.14: undefined reference to `H5LTread_dataset_string'
collect2: error: ld returned 1 exit status

 

This can be fixed by edit "caffe/cmake/Dependencies.cmake", at line 28

list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})

to

list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES})

This problem does not exist on original BVLC/caffe.

Protobuf version issue

After successful build of NVIDIA/caffe, I got following error;

$ ./digits-devserver 
  ___ ___ ___ ___ _____ ___
 |   \_ _/ __|_ _|_   _/ __|
 | |) | | (_ || |  | | \__ \
 |___/___\___|___| |_| |___/ 5.1-dev

"/home/toshi/src/NVIDIA/caffe" from CAFFE_ROOT does not point to a valid installation of Caffe.
Use the envvar CAFFE_ROOT to indicate a valid installation.
Traceback (most recent call last):
  File "/usr/lib/python2.7/runpy.py", line 162, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/media/toshi/EXT4/src/NVIDIA/digits/digits/__main__.py", line 70, in <module>
    main()
  File "/media/toshi/EXT4/src/NVIDIA/digits/digits/__main__.py", line 53, in main
    import digits.config
  File "digits/config/__init__.py", line 7, in <module>
    from . import (  # noqa
  File "digits/config/caffe.py", line 226, in <module>
    executable, version, flavor = load_from_envvar('CAFFE_ROOT')
  File "digits/config/caffe.py", line 37, in load_from_envvar
    import_pycaffe(python_dir)
  File "digits/config/caffe.py", line 126, in import_pycaffe
    import caffe
  File "/home/toshi/src/NVIDIA/caffe/python/caffe/__init__.py", line 1, in <module>
    from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver
  File "/home/toshi/src/NVIDIA/caffe/python/caffe/pycaffe.py", line 15, in <module>
    import caffe.io
  File "/home/toshi/src/NVIDIA/caffe/python/caffe/io.py", line 8, in <module>
    from caffe.proto import caffe_pb2
  File "/home/toshi/src/NVIDIA/caffe/python/caffe/proto/caffe_pb2.py", line 23, in <module>
    \x10\x64\x65t_bg_threshold\x18\x37 \x01(\x02:\x03\x30.5\x12\x1d\n\x0f\x64\x65t_fg_fraction\x18\x38 \x01(\x02:\x04\x30.25\x12\x1a\n\x0f\x64\x65t_context_pad\x18: \x01(\r:\x01\x30\x12\x1b\n\rdet_crop_mode\x18; \x01(\t:\x04warp\x12\x12\n\x07new_num\x18< \x01(\x05:\x01\x30\x12\x17\n\x0cnew_channels\x18= \x01(\x05:\x01\x30\x12\x15\n\nnew_height\x18> \x01(\x05:\x01\x30\x12\x14\n\tnew_width\x18? \x01(\x05:\x01\x30\x12\x1d\n\x0eshuffle_images\x18@ \x01(\x08:\x05\x66\x61lse\x12\x15\n\nconcat_dim\x18\x41 \x01(\r:\x01\x31\x12\x36\n\x11hdf5_output_param\x18\xe9\x07 \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"W\n\x0ePReLUParameter\x12&\n\x06\x66iller\x18\x01 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1d\n\x0e\x63hannel_shared\x18\x02 \x01(\x08:\x05\x66\x61lse*\x1c\n\x05Phase\x12\t\n\x05TRAIN\x10\x00\x12\x08\n\x04TEST\x10\x01')
TypeError: __init__() got an unexpected keyword argument 'syntax'

This was caused due to protobuf 3.1.0 c++ module installed, but python modules uses protobuf 2.x.

To fix this, edit "caffe/python/requirement.txt" as follwoing;

protobuf>=2.5.0

to 

protobuf>=3.1.0

Then, exec "sudo pip install -r ./python/requirements.txt"

That's it.

 

 

Undefined

User login