简介
HTTPS是在HTTP的基础上和SSL/TLS证书结合起来的一种协议,保证了传输过程中的安全性,减少了被恶意劫持的可能.很好的解决了解决了HTTP的三个缺点(被监听、被篡改、被伪装)
HTTPS是在HTTP的基础上和SSL/TLS证书结合起来的一种协议,保证了传输过程中的安全性,减少了被恶意劫持的可能.很好的解决了解决了HTTP的三个缺点(被监听、被篡改、被伪装)
TIME_WAIT状态原理
----------------------------
通信双方建立TCP连接后,主动关闭连接的一方就会进入TIME_WAIT状态。
客户端主动关闭连接时,会发送最后一个ACK后,然后会进入TIME_WAIT状态,再停留2个MSL时间(后有MSL的解释),进入CLOSED状态。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
__version__ = "0.1" __all__ = ["SimpleHTTPRequestHandler"] import html import http.server import mimetypes import os import posixpath import re import shutil import urllib.error import urllib.parse import urllib.request from io import BytesIO class SimpleHTTPRequestHandler(http.server.BaseHTTPRequestHandler): """简单的http文件服务器,支持上传下载 """ server_version = "SimpleHTTPWithUpload/" + __version__ def do_GET(self): f = self.send_head() if f: self.copyfile(f, self.wfile) f.close() def do_HEAD(self): f = self.send_head() if f: f.close() def do_POST(self): r, info = self.deal_post_data() print((r, info, "by: ", self.client_address)) f = BytesIO() f.write(b'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">') f.write(b"<html>\n<title>Upload Result Page</title>\n") f.write(b"<body>\n<h2>Upload Result Page</h2>\n") f.write(b"<hr>\n") if r: f.write(b"<strong>Success:</strong>") else: f.write(b"<strong>Failed:</strong>") f.write(info.encode()) f.write(("<br><a href=\"%s\">back</a>" % self.headers['referer']).encode()) f.write(b"</body>\n</html>\n") length = f.tell() f.seek(0) self.send_response(200) self.send_header("Content-type", "text/html") self.send_header("Content-Length", str(length)) self.end_headers() if f: self.copyfile(f, self.wfile) f.close() def deal_post_data(self): content_type = self.headers['content-type'] if not content_type: return (False, "Content-Type header doesn't contain boundary") boundary = content_type.split("=")[1].encode() remainbytes = int(self.headers['content-length']) line = self.rfile.readline() remainbytes -= len(line) if not boundary in line: return (False, "Content NOT begin with boundary") line = self.rfile.readline() remainbytes -= len(line) fn = re.findall( r'Content-Disposition.*name="file"; filename="(.*)"', line.decode()) if not fn: return (False, "Can't find out file name...") path = self.translate_path(self.path) fn = os.path.join(path, fn[0]) line = self.rfile.readline() remainbytes -= len(line) line = self.rfile.readline() remainbytes -= len(line) try: out = open(fn, 'wb') except IOError: return (False, "Can't create file to write, do you have permission to write?") preline = self.rfile.readline() remainbytes -= len(preline) while remainbytes > 0: line = self.rfile.readline() remainbytes -= len(line) if boundary in line: preline = preline[0:-1] if preline.endswith(b'\r'): preline = preline[0:-1] out.write(preline) out.close() return (True, "File '%s' upload success!" % fn) else: out.write(preline) preline = line return (False, "Unexpect Ends of data.") def send_head(self): path = self.translate_path(self.path) f = None if os.path.isdir(path): if not self.path.endswith('/'): # redirect browser - doing basically what apache does self.send_response(301) self.send_header("Location", self.path + "/") self.end_headers() return None for index in "index.html", "index.htm": index = os.path.join(path, index) if os.path.exists(index): path = index break else: return self.list_directory(path) ctype = self.guess_type(path) try: # Always read in binary mode. Opening files in text mode may cause # newline translations, making the actual size of the content # transmitted *less* than the content-length! f = open(path, 'rb') except IOError: self.send_error(404, "File not found") return None self.send_response(200) self.send_header("Content-type", ctype) fs = os.fstat(f.fileno()) self.send_header("Content-Length", str(fs[6])) self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) self.end_headers() return f def list_directory(self, path): try: list = os.listdir(path) except os.error: self.send_error(404, "No permission to list directory") return None list.sort(key=lambda a: a.lower()) f = BytesIO() displaypath = html.escape(urllib.parse.unquote(self.path)) f.write(b'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">') f.write(("<html>\n<title>Directory listing for %s</title>\n" % displaypath).encode()) # 避免中文乱码 f.write( b"<head>\n<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />\n</head>\n") f.write(("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath).encode()) f.write(b"<hr>\n") f.write(b"<form ENCTYPE=\"multipart/form-data\" method=\"post\">") f.write(b"<input name=\"file\" type=\"file\"/>") f.write(b"<input type=\"submit\" value=\"upload\"/></form>\n") f.write(b"<hr>\n<ul>\n") for name in list: fullname = os.path.join(path, name) displayname = linkname = name # Append / for directories or @ for symbolic links if os.path.isdir(fullname): displayname = name + "/" linkname = name + "/" if os.path.islink(fullname): displayname = name + "@" # Note: a link to a directory displays with @ and links with / f.write(('<li><a href="%s">%s</a>\n' % (urllib.parse.quote(linkname), html.escape(displayname))).encode()) f.write(b"</ul>\n<hr>\n</body>\n</html>\n") length = f.tell() f.seek(0) self.send_response(200) self.send_header("Content-type", "text/html") self.send_header("Content-Length", str(length)) self.end_headers() return f def translate_path(self, path): path = path.split('?', 1)[0] path = path.split('#', 1)[0] path = posixpath.normpath(urllib.parse.unquote(path)) words = path.split('/') words = [_f for _f in words if _f] path = os.getcwd() for word in words: drive, word = os.path.splitdrive(word) head, word = os.path.split(word) if word in (os.curdir, os.pardir): continue path = os.path.join(path, word) return path def copyfile(self, source, outputfile): shutil.copyfileobj(source, outputfile) def guess_type(self, path): base, ext = posixpath.splitext(path) if ext in self.extensions_map: return self.extensions_map[ext] ext = ext.lower() if ext in self.extensions_map: return self.extensions_map[ext] else: return self.extensions_map[''] if not mimetypes.inited: mimetypes.init() # try to read system mime.types extensions_map = mimetypes.types_map.copy() extensions_map.update({ '': 'application/octet-stream', # Default '.py': 'text/plain', '.c': 'text/plain', '.h': 'text/plain', }) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--bind', '-b', default='', metavar='ADDRESS', help='Specify alternate bind address ' '[default: all interfaces]') parser.add_argument('--port', '-p', default=8000, type=int, help='Specify alternate port [default: 8000]') args = parser.parse_args() http.server.test(HandlerClass=SimpleHTTPRequestHandler, port=args.port, bind=args.bind) |
Visual Studio Code远程调试pytorch模型训练时,报错如下:
1 2 3 |
File "/home/xxxx/.vscode-server/extensions/ms-python.python-2019.5.18875/pythonFiles/lib/python/ptvsd/daemon.py", line 145, in start raise RuntimeError(‘already started’) RuntimeError: already started |
解决办法,在.py文件头添加如下语句:
1 2 |
import multiprocessing multiprocessing.set_start_method('spawn',True) |
更新:
如果你的工程是基于pytorch的,那么检查自己的dataLoader,是否使用了num_workers参数。当使用该参数时,可能会报上述错误。解决办法是将num_workers设置为0
系统:Windows 7
问题:
在ubuntu 18.04
系统上,使用 gparted
调整 Windows 7
系统分区大小之后,开机登陆用户,桌面无法显示,屏幕黑屏或者蓝色空白。
屏幕右下角显示:
1 2 3 |
Windows 7 内部版本7601 此Windows副本不是正版 |
按下 Ctrl+Shift+DEL
能打开任务管理器,但右击相关进程属性发现所有的系统应用显示为 E:
盘,可以确定是盘符错乱导致无法正常加载系统文件及和户配置。
解决方法:
用U盘PE启动,命令行中执行 regist32.exe
,定位到 HKEY_LOCAL_MACHINE\SYSTEM\MountedDevices
,复制 \DosDevices\C:
的值,这个值有长有短,但只要硬盘不变,分区不变,PE下得到的值和硬盘系统的值是一样的,可以直接复制过来用。
将光标定位在 HKEY_LOCAL_MACHINE
上,文件菜单或右键菜单上就可以有 加载配置单元
,到硬盘系统中注册表保存位置,C:\Windows\System32\config
中,选择注册表文件 SYSTEM
,在弹出的框中输入一个 项名字
(名字可以随意取),如 TEST
,然后 HKEY_LOCAL_MACHINE
下就会新增一个 TEST
节点,加载刚才选择的 SYSTEM
的配置。定位到 MountedDevices
,找到对应的 \DosDevices\C:
,把刚才复制的值粘贴过去。
重新启动系统。
OpenConnect
是一个 Cisco Anyconnect
的替代品,具有开源、易获取、可靠等优点。而官方版本的 Cisco Anyconnect
配置较为繁琐,需要在管理界面同时部署多平台客户端才能支持多平台。相比之下 OpenConnect
在这点就具有优势,可以在官方版本无法跨平台时替代使用。
命令行模式:
1 2 3 4 |
$ sudo apt-get install -y \ openconnect libopenconnect-dev \ network-manager-openconnect-gnome \ resolvconf |
安装完成之后,在网络管理界面上的 VPN
设置界面上可以配置 Cisco Anyconnect
相关的项目。
如果刚刚安装之后, VPN
设置界面上没有出现这个选项,则可能需要重启系统才行。
目前由于 CUDA-9.1.85
已经不支持 Femi
架构了。
因此如下参数:,
1 |
arch=compute_20,code=sm_20 |
会导致全部的 .cu
文件会全部编译失败,我们只能是从 CUDA-8.x
上进行编译。
老老实实装一个 ubuntu 16.04
编译吧,实体机或者 nvidia-docker
,都可以试试。
目前 ubuntu 18.04
上使用 sudo apt-get install nvidia-cuda-toolkit
安装的是 9.1.85
版本的 nvidia cuda
, 尽管版本比较老,但是好在稳定性好,适用范围广。
当我们的项目需要使用指定版本的 pytorch
的时候,目前官方提供的编译好的 nvidia cuda
安装包并不兼容全部的硬件。这个在实际环境中是比较麻烦的。
目前来说,比较稳妥的办法是直接从源代码编译。
如果显卡是几年前的显卡(GeForce GTX 760 Compute Capability = 3.0 / GeForce GT 720M Lenveo Thinkpad T440 Compute Capability = 2.1),运行的时候会提示:
1 2 3 |
Found GPU0 GeForce GTX 760 which is of cuda capability 3.0. PyTorch no longer supports this GPU because it is too old. The minimum cuda capability that we support is 3.5. |
执行的时候会报错
1 |
RuntimeError: cuda runtime error (48) : no kernel image is available for execution on the device |
硬件的计算能力查询 Recommended GPU for Developers
------------------------------------------------------------------------------------
安装官方软件源的 cuda-9.1.85
, 高版本的显卡驱动不支持:
1 2 3 4 5 6 7 8 9 10 11 |
# 卸载 nvidia-340 驱动,切换到开源的Nouveau驱动,否则在后面安装 nvidia-cuda-toolkit 会存在冲突 $ sudo apt-get remove nvidia-340 # 安装系统自带的cuda $ sudo apt-get install nvidia-cuda-toolkit # 安装390版本驱动 $ sudo apt-get install nvidia-driver-390 # 更新驱动之后,一定要重启系统,否则可能会出现各种莫名的异常 $ sudo reboot |
如果安装时报错,如下:
1 2 3 4 5 6 7 8 9 10 |
$ sudo apt-get install nvidia-cuda-toolkit 正在读取软件包列表... 完成 正在分析软件包的依赖关系树 正在读取状态信息... 完成 nvidia-cuda-toolkit 已经是最新版 (9.1.85-3ubuntu1)。 您也许需要运行“apt --fix-broken install”来修正上面的错误。 下列软件包有未满足的依赖关系: libcuinj64-9.1 : 依赖: libcuda1 (>= 387.26) 或 libcuda-9.1-1 E: 有未能满足的依赖关系。请尝试不指明软件包的名字来运行“apt --fix-broken install”(也可以指定一个解决办法)。 |
并且 sudo apt --fix-broken install
无效,则执行强制包清除命令:
1 |
$ sudo dpkg -P nvidia-340 |
Lenveo T440 Compute Capability = 2.1 不支持 cuDNN
,因此没必要安装 , 其实连最新版本的 CUDA-10.1
也不能安装,原因在于 NVIDIA GT 720M
的驱动只支持到 390
版本,而 CUDA-10.1
需 418
以上的版本才能支持,具体表现在于系统启动后没有加载显卡驱动,dmesg
可以查看到如下信息:
1 2 3 4 5 6 |
[ 72.533870] NVRM: The NVIDIA GeForce GT 720M GPU installed in this system is NVRM: supported through the NVIDIA 390.xx Legacy drivers. Please NVRM: visit http://www.nvidia.com/object/unix.html for more NVRM: information. The 430.50 NVIDIA driver will ignore NVRM: this GPU. Continuing probe... [ 72.533875] NVRM: No NVIDIA graphics adapter found! |
------------------------------------------------------------------------------------
切换 GCC
版本到 GCC-5
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
$ sudo apt install gcc-5 $ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 70 $ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-6 60 $ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 50 $ sudo apt install g++-5 $ sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-7 70 $ sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-6 60 $ sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-5 50 $ sudo update-alternatives --config g++ # 一定要退出当前运行的SHELL,否则环境变量可能没有刷新 $ exit |
------------------------------------------------------------------------------------
依旧是推荐在 Anaconda 上建立独立的编译环境,然后执行编译:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
$ sudo apt-get install git # conda remove -n pytorch --all $ conda create -n pytorch -y python=3.6.8 pip $ source activate pytorch $ conda install numpy pyyaml mkl=2019.1 mkl-include=2019.1 setuptools cmake cffi typing pybind11 $ conda install ninja $ conda install -c soumith magma-cuda80 cudatoolkit=8.0 $ git clone https://github.com/pytorch/pytorch $ cd pytorch # pytorch 1.0.1 版本支持“Compute Capability” 低于3.0版本的硬件,pytorch 1.2.0需要至少3.5版本的硬件才可以正常运行 # https://github.com/pytorch/pytorch/blob/v1.3.0/torch/utils/cpp_extension.py $ git checkout v1.0.1 -b v1.0.1 $ git submodule sync $ git submodule update --init --recursive $ export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} # 如果不需要使用cuda的话,这里还要加上一句:export NO_CUDA=1 $ python setup.py clean # 卸载以前安装的pytorch $ conda uninstall pytorch $ export CUDA_HOST_COMPILER=/usr/bin/gcc-5 $ export CUDAHOSTCXX=/usr/bin/gcc-5 $ export CMAKE_CXX_COMPILER=/usr/bin/gcc-5 # 调整代码,修正一系列已知的编译问题,代码要求6.0以上的GCC编译,否则报错,我们直接把这个要求降级到5.0 $ sed -i "s/6.0.0/5.0.0/g" cmake/MiscCheck.cmake # 从Nvidia开发网站查询到自己硬件对应的“Compute Capability” # 比如 “GeForce GTX 760” 对应 “3.0” 计算能力,能力不正确会导致运行异常 # RuntimeError: cuda runtime error (48) : no kernel image is available for execution on the device $ python setup.py install # 对于开发者模式,可以使用 # python setup.py build develop # 一定要退出 pytorch 的编译目录,在pytorch代码目录下执行命令会出现异常 $ cd .. |
如果出现如下错误:
1 2 3 4 5 |
[ 68%] Building NVCC (Device) object caffe2/CMakeFiles/caffe2_gpu.dir/__/aten/src/ATen/native/sparse/cuda/caffe2_gpu_generated_SparseCUDABlas.cu.o ~/pytorch/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu(58): error: more than one instance of function "at::native::sparse::cuda::cusparseGetErrorString" matches the argument list: function "cusparseGetErrorString(cusparseStatus_t)" function "at::native::sparse::cuda::cusparseGetErrorString(cusparseStatus_t)" argument types are: (cusparseStatus_t) |
则需要调整代码 aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu
, 在其中的 cusparseGetErrorString
函数上增加 #if (!((CUSPARSE_VER_MAJOR >= 10) && (CUSPARSE_VER_MINOR >= 2)))
如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
#if (!((CUSPARSE_VER_MAJOR >= 10) && (CUSPARSE_VER_MINOR >= 2))) const char* cusparseGetErrorString(cusparseStatus_t status) { switch(status) { case CUSPARSE_STATUS_SUCCESS: return "success"; case CUSPARSE_STATUS_NOT_INITIALIZED: return "library not initialized"; case CUSPARSE_STATUS_ALLOC_FAILED: return "resource allocation failed"; case CUSPARSE_STATUS_INVALID_VALUE: return "an invalid numeric value was used as an argument"; case CUSPARSE_STATUS_ARCH_MISMATCH: return "an absent device architectural feature is required"; case CUSPARSE_STATUS_MAPPING_ERROR: return "an access to GPU memory space failed"; case CUSPARSE_STATUS_EXECUTION_FAILED: return "the GPU program failed to execute"; case CUSPARSE_STATUS_INTERNAL_ERROR: return "an internal operation failed"; case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: return "the matrix type is not supported by this function"; case CUSPARSE_STATUS_ZERO_PIVOT: return "an entry of the matrix is either structural zero or numerical zero (singular block)"; default: return "unknown error"; } } #endif |
这样解决跟 CUDA-10.1
自带函数的冲突问题。
具体参考: https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu
源码安装的Pytorch,卸载需要执行:
1 2 3 4 5 |
# conda uninstall pytorch $ pip uninstall torch $ python setup.py clean |
Pytorch 代码下载非常缓慢,可以本站下载同步好的pytorch源代码。
如果需要运行 MaskTextSpotter
, 最少需要 4GB
显存,低于这个容量,运行不起来。
安装最新版本的 cuda-10.1
,低版本的编译会出问题:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# 卸载之前已经安装的cuda $ sudo apt-get remove nvidia-cuda-toolkit $ wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin $ sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600 $ wget http://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-ubuntu1804-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb $ sudo dpkg -i cuda-repo-ubuntu1804-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb $ sudo apt-key add /var/cuda-repo-10-1-local-10.1.243-418.87.00/7fa2af80.pub $ sudo apt-get update $ sudo apt-get -y install cuda # 部分驱动可能会更新,需要执行更新,否则可能依旧不正常 $ sudo apt-get dist-upgrade $ sudo apt-get autoremove # 可能需要删除一下XWindow的配置文件,否则驱动可能不能正常加载 $ sudo rm -rf ~/.Xauthority # 如果出现如下错误 # ubuntu 18.04 "nvidia-340 导致 /usr/lib/x86_64-linux-gnu/libGL.so.1 # 转移到 /usr/lib/x86_64-linux-gnu/libGL.so.1.distrib" # 参考 http://www.mobibrw.com/?p=21739 # 删除安装源,可以节约几个GB的磁盘,安装完成后这部分已经用不上了 $ sudo apt-get remove --purge cuda-repo-ubuntu1804-10-1-local-10.1.243-418.87.00 $ sudo apt-get update # 部分驱动可能会更新,需要执行更新,否则可能依旧不正常 $ sudo apt-get dist-upgrade $ sudo apt-get autoremove |
配置独立环境
1 2 3 4 5 6 7 |
# first, make sure that your conda is setup properly with the right environment # for that, check that `which conda`, `which pip` and `which python` points to the # right path. From a clean conda env, this is what you need to do # conda remove -n MaskTextSpotter --all $ conda create -n MaskTextSpotter -y python=3.6.8 pip |
编译安装 Pytoch
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
$ sudo apt-get install git # 进入运行环境 $ source activate MaskTextSpotter $ conda install numpy pyyaml mkl=2019.1 mkl-include=2019.1 setuptools cmake cffi typing pybind11 $ conda install ninja # magma-cuda90 magma-cuda91 magma-cuda92 会编译失败 $ conda install -c pytorch magma-cuda101 $ git clone https://github.com/pytorch/pytorch # 也可直接本站下载一份同步好的代码 wget https://www.mobibrw.com/wp-content/uploads/2019/11/pytorch.zip $ cd pytorch # pytorch 1.0.1 版本支持“Compute Capability” 低于3.0版本的硬件,pytorch 1.2.0需要至少3.5版本的硬件才可以正常运行 # https://github.com/pytorch/pytorch/blob/v1.3.0/torch/utils/cpp_extension.py $ git checkout v1.0.1 -b v1.0.1 $ git submodule sync $ git submodule update --init --recursive $ export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} $ python setup.py clean # 卸载以前安装的pytorch $ conda uninstall pytorch $ pip uninstall pytorch # 从Nvidia开发网站查询到自己硬件对应的“Compute Capability” # 比如 “GeForce GTX 760” 对应 “3.0” 计算能力,能力不正确会导致运行异常 # RuntimeError: cuda runtime error (48) : no kernel image is available for execution on the device $ TORCH_CUDA_ARCH_LIST="3.0" python setup.py install # 一定要退出 pytorch 的编译目录,在pytorch代码目录下执行命令会出现异常 $ cd .. # 退出环境 $ conda deactivate |
如果出现如下错误:
1 2 3 4 5 |
[ 68%] Building NVCC (Device) object caffe2/CMakeFiles/caffe2_gpu.dir/__/aten/src/ATen/native/sparse/cuda/caffe2_gpu_generated_SparseCUDABlas.cu.o ~/pytorch/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu(58): error: more than one instance of function "at::native::sparse::cuda::cusparseGetErrorString" matches the argument list: function "cusparseGetErrorString(cusparseStatus_t)" function "at::native::sparse::cuda::cusparseGetErrorString(cusparseStatus_t)" argument types are: (cusparseStatus_t) |
则需要调整代码 aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu
, 在其中的 cusparseGetErrorString
函数上增加 #if (!((CUSPARSE_VER_MAJOR >= 10) && (CUSPARSE_VER_MINOR >= 2)))
如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
#if (!((CUSPARSE_VER_MAJOR >= 10) && (CUSPARSE_VER_MINOR >= 2))) const char* cusparseGetErrorString(cusparseStatus_t status) { switch(status) { case CUSPARSE_STATUS_SUCCESS: return "success"; case CUSPARSE_STATUS_NOT_INITIALIZED: return "library not initialized"; case CUSPARSE_STATUS_ALLOC_FAILED: return "resource allocation failed"; case CUSPARSE_STATUS_INVALID_VALUE: return "an invalid numeric value was used as an argument"; case CUSPARSE_STATUS_ARCH_MISMATCH: return "an absent device architectural feature is required"; case CUSPARSE_STATUS_MAPPING_ERROR: return "an access to GPU memory space failed"; case CUSPARSE_STATUS_EXECUTION_FAILED: return "the GPU program failed to execute"; case CUSPARSE_STATUS_INTERNAL_ERROR: return "an internal operation failed"; case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: return "the matrix type is not supported by this function"; case CUSPARSE_STATUS_ZERO_PIVOT: return "an entry of the matrix is either structural zero or numerical zero (singular block)"; default: return "unknown error"; } } #endif |
这样解决跟 CUDA-10.1
自带函数的冲突问题。
具体参考: https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu
编译安装 TorchVision
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
$ sudo apt-get install git # 进入运行环境 $ source activate MaskTextSpotter $ git clone https://github.com/pytorch/vision.git # 也可本站下载一份拷贝 wget https://www.mobibrw.com/wp-content/uploads/2019/11/vision.zip $ cd vision $ git checkout v0.2.1 -b v0.2.1 $ python setup.py install # 退出环境 $ conda deactivate |
源代码编译
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
$ source activate MaskTextSpotter # this installs the right pip and dependencies for the fresh python $ conda install ipython pip # python dependencies $ pip install ninja yacs cython matplotlib tqdm opencv-python shapely scipy tensorboardX $ export INSTALL_DIR=$PWD # install pycocotools $ cd $INSTALL_DIR $ git clone https://github.com/cocodataset/cocoapi.git $ cd cocoapi/PythonAPI $ python setup.py build_ext install # 本站下载 https://www.mobibrw.com/wp-content/uploads/2019/11/cocoapi.zip # install apex (optional) $ cd $INSTALL_DIR $ git clone https://github.com/NVIDIA/apex.git $ cd apex $ python setup.py install --cuda_ext --cpp_ext # 本站下载 wget https://www.mobibrw.com/wp-content/uploads/2019/11/apex.zip # clone repo $ cd $INSTALL_DIR $ git clone https://github.com/MhLiao/MaskTextSpotter.git $ cd MaskTextSpotter # 本站下载 wget https://www.mobibrw.com/wp-content/uploads/2019/11/MaskTextSpotter.zip # build $ python setup.py build develop $ unset INSTALL_DIR |
准备测试数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# 创建目录(源代码根目录) $ mkdir outputs $ cd outputs $ mkdir finetune $ cd finetune # 下载已经训练好的模型 https://drive.google.com/open?id=1pPRS7qS_K1keXjSye0kksqhvoyD0SARz # 本站下载 $ wget https://www.mobibrw.com/wp-content/uploads/2019/11/model_finetune.zip $ unzip model_finetune.zip $ cd ../../ $ mkdir datasets $ cd datasets # 下载 icdar2013 数据集 $ wget https://www.mobibrw.com/wp-content/uploads/2019/11/icdar2013.zip $ unzip icdar2013.zip $ cd icdar2013 # 下载测试集文件 $ git clone https://github.com/zazaliu/ICDAR2PASCAL_VOC.git # 本站下载 wget https://www.mobibrw.com/wp-content/uploads/2019/11/ICDAR2PASCAL_VOC.zip $ cp -r ICDAR2PASCAL_VOC/ICDAR2015/ch4_training_localization_transcription_gt/ test_gts # 执行测试 $ cd ../../ # 预先删除生成的文件,否则可能会启动之后就崩溃退出 $ rm -rf outputs/finetune/inference/ $ bash test.sh |
执行测试的时候,如果出现如下错误信息:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
File "tools/test_net.py", line 95, in <module> main() File "tools/test_net.py", line 89, in main cfg=cfg, File "~/MaskTextSpotter/maskrcnn_benchmark/engine/text_inference.py", line 380, in inference predictions = compute_on_dataset(model, data_loader, device) File "~/MaskTextSpotter/maskrcnn_benchmark/engine/text_inference.py", line 55, in compute_on_dataset for i, batch in tqdm(enumerate(data_loader)): File "~.conda/envs/MaskTextSpotter/lib/python3.6/site-packages/tqdm/std.py", line 1091, in __iter__ for obj in iterable: File "~.conda/envs/MaskTextSpotter/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 637, in __next__ return self._process_next_batch(batch) File "~.conda/envs/MaskTextSpotter/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 658, in _process_next_batch raise batch.exc_type(batch.exc_msg) ValueError: Traceback (most recent call last): File "~.conda/envs/MaskTextSpotter/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in _worker_loop samples = collate_fn([dataset[i] for i in batch_indices]) File "~.conda/envs/MaskTextSpotter/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in <listcomp> samples = collate_fn([dataset[i] for i in batch_indices]) File "~/MaskTextSpotter/maskrcnn_benchmark/data/datasets/icdar.py", line 32, in __getitem__ words,boxes,charsbbs,segmentations=self.load_gt_from_txt(gt_path,height,width) File "~/MaskTextSpotter/maskrcnn_benchmark/data/datasets/icdar.py", line 94, in load_gt_from_txt strs, loc = self.line2boxes(line) File "~/MaskTextSpotter/maskrcnn_benchmark/data/datasets/icdar.py", line 153, in line2boxes loc = np.vstack(v).transpose() File "<__array_function__ internals>", line 6, in vstack File "~.conda/envs/MaskTextSpotter/lib/python3.6/site-packages/numpy/core/shape_base.py", line 282, in vstack return _nx.concatenate(arrs, 0) File "<__array_function__ internals>", line 6, in concatenate ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 2 and the array at index 1 has size 1 |
那么问题出现的原因是maskrcnn_benchmark/data/datasets/icdar.py
解析文件的时候,遇到了478,239,511,241,511,255,478,253,$5,000
这样的数据,测试代码如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import numpy as np line = '478,239,511,241,511,255,478,253,$5,000' def line2boxes(line): parts = line.strip().split(',') if '\xef\xbb\xbf' in parts[0]: parts[0] = parts[0][3:] if '\ufeff' in parts[0]: parts[0] = parts[0].replace('\ufeff', '') x1 = np.array([int(float(x)) for x in parts[::9]]) y1 = np.array([int(float(x)) for x in parts[1::9]]) x2 = np.array([int(float(x)) for x in parts[2::9]]) y2 = np.array([int(float(x)) for x in parts[3::9]]) x3 = np.array([int(float(x)) for x in parts[4::9]]) y3 = np.array([int(float(x)) for x in parts[5::9]]) x4 = np.array([int(float(x)) for x in parts[6::9]]) y4 = np.array([int(float(x)) for x in parts[7::9]]) strs = parts[8::9] print(x1) loc = np.vstack((x1, y1, x2, y2, x3, y3, x4, y4)).transpose() print(loc) return strs, loc line2boxes(line) |
修正后的代码如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import numpy as np line = '478,239,511,241,511,255,478,253,$5,000' def line2boxes(line): parts = line.strip().split(',', 8) if '\xef\xbb\xbf' in parts[0]: parts[0] = parts[0][3:] if '\ufeff' in parts[0]: parts[0] = parts[0].replace('\ufeff', '') x1 = np.array([int(float(x)) for x in parts[::9]]) y1 = np.array([int(float(x)) for x in parts[1::9]]) x2 = np.array([int(float(x)) for x in parts[2::9]]) y2 = np.array([int(float(x)) for x in parts[3::9]]) x3 = np.array([int(float(x)) for x in parts[4::9]]) y3 = np.array([int(float(x)) for x in parts[5::9]]) x4 = np.array([int(float(x)) for x in parts[6::9]]) y4 = np.array([int(float(x)) for x in parts[7::9]]) strs = parts[8::9] print(x1) loc = np.vstack((x1, y1, x2, y2, x3, y3, x4, y4)).transpose() print(loc) return strs, loc line2boxes(line) |
其他错误,可能是中途软件安装卸载造成的软件版本冲突,则直接删除环境,重新创建一个干净的环境重新构建。
升级 ubuntu
系统, 从 16.04.5
升级到 18.04.1
,接着又开始配置各种软件环境。
当配置好 Android
开发环境,准备创建一个模拟器并运行程序环境看是否OK时,问题出现了。
创建和运行时都提示:/dev/kvm device: permission denied
或者 /dev/kvm device: open failed
,而且模拟器跑不起来。
执行命令查看:
1 2 |
$ ls -al /dev/kvm crw------- 1 root root 10, 232 11月 17 22:37 /dev/kvm |
需要安装 qemu-kvm
并把当前用户加入到 kvm
用户组即可:
1 2 3 4 5 6 |
$ sudo apt install qemu-kvm $ sudo adduser `whoami` kvm $ ls -al /dev/kvm crw-rw---- 1 root kvm 10, 232 11月 18 14:40 /dev/kvm |
然后运行模拟器。
如果依旧报错,则需要修改 /dev/kvm 的所有者为当前用户,如下:
1 |
$ sudo chown `whoami` /dev/kvm |