To my surprise, it was actually the GPU that was lagging behind this time, at least for the current network that I am training. I would like to share how I found out whether GPU or CPU was lagging. Below is the code, most of which is taken from Patrick Rodriguez's repository keras-multiprocess-image-data-generator.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import datetime | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from collections import OrderedDict | |
import psutil | |
import threading | |
from pynvml import (nvmlInit, | |
nvmlDeviceGetCount, | |
nvmlDeviceGetHandleByIndex, | |
nvmlDeviceGetUtilizationRates, | |
nvmlDeviceGetName) | |
def gpu_info(): | |
"Returns a tuple of (GPU ID, GPU Description, GPU % Utilization)" | |
nvmlInit() | |
deviceCount = nvmlDeviceGetCount() | |
info = [] | |
for i in range(0, deviceCount): | |
handle = nvmlDeviceGetHandleByIndex(i) | |
util = nvmlDeviceGetUtilizationRates(handle) | |
desc = nvmlDeviceGetName(handle) | |
info.append((i, desc, util.gpu)) #['GPU %i - %s' % (i, desc)] = util.gpu | |
return info | |
class SysMonitor(threading.Thread): | |
shutdown = False | |
def __init__(self): | |
self.utils = [] | |
self.start_time = time.time() | |
self.duration = 0 | |
threading.Thread.__init__(self) | |
def run(self): | |
while not self.shutdown: | |
dt = datetime.datetime.now() | |
util = gpu_info() | |
cpu_percent = psutil.cpu_percent() | |
self.utils.append([dt] + [x[2] for x in util] + [cpu_percent]) | |
time.sleep(.1) | |
def stop(self): | |
self.shutdown = True | |
self.duration = time.time() - self.start_time | |
def plot(self, title, vert=False): | |
if vert: | |
fig, ax = plt.subplots(2, 1, figsize=(15, 6)) | |
else: | |
fig, ax = plt.subplots(1, 2, figsize=(15, 6)) | |
fig.suptitle(title, size=24) | |
ax[0].title.set_text('GPU Utilization') | |
ax[0].plot([u[1] for u in self.utils]) | |
ax[0].set_ylim([0, 100]) | |
ax[1].title.set_text('CPU Utilization') | |
ax[1].plot([u[2] for u in self.utils]) | |
ax[1].set_ylim([0, 100]) | |
plt.tight_layout(rect=[0, 0.03, 1, 0.9]) | |
plt.show() | |
if __name__ == '__main__': | |
t1 = SysMonitor() | |
t1.start() | |
time.sleep(10) | |
t1.stop() | |
t1.plot('System Resources Usage') |
To run the script, you first need to install necessary modules. Save the following as requirement.txt
cycler
functools32
matplotlib
numpy
nvidia-ml-py
pkg-resources
psutil
pyparsing
python-dateutil
pyt
six
subprocess32
Next, run the command below to automate installing all the necessary modules:
$ pip install -r requirement.txt
Lastly, you also need python-tk module, so install it via
$ sudo apt-get install python-tk
Now, you can run the script
$ python sysmonitor.py
Note that you must have NVIDIA GPU in order for the script to work.
No comments:
Post a Comment