iser_verbs.c 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313
  1. /*
  2. * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
  3. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
  4. * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
  5. *
  6. * This software is available to you under a choice of one of two
  7. * licenses. You may choose to be licensed under the terms of the GNU
  8. * General Public License (GPL) Version 2, available from the file
  9. * COPYING in the main directory of this source tree, or the
  10. * OpenIB.org BSD license below:
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above
  17. * copyright notice, this list of conditions and the following
  18. * disclaimer.
  19. *
  20. * - Redistributions in binary form must reproduce the above
  21. * copyright notice, this list of conditions and the following
  22. * disclaimer in the documentation and/or other materials
  23. * provided with the distribution.
  24. *
  25. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32. * SOFTWARE.
  33. */
  34. #include <linux/kernel.h>
  35. #include <linux/module.h>
  36. #include <linux/slab.h>
  37. #include <linux/delay.h>
  38. #include "iscsi_iser.h"
  39. #define ISCSI_ISER_MAX_CONN 8
  40. #define ISER_MAX_RX_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
  41. #define ISER_MAX_TX_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN)
  42. #define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
  43. ISCSI_ISER_MAX_CONN)
  44. static int iser_cq_poll_limit = 512;
  45. static void iser_cq_tasklet_fn(unsigned long data);
  46. static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
  47. static void iser_cq_event_callback(struct ib_event *cause, void *context)
  48. {
  49. iser_err("got cq event %d \n", cause->event);
  50. }
  51. static void iser_qp_event_callback(struct ib_event *cause, void *context)
  52. {
  53. iser_err("got qp event %d\n",cause->event);
  54. }
  55. static void iser_event_handler(struct ib_event_handler *handler,
  56. struct ib_event *event)
  57. {
  58. iser_err("async event %d on device %s port %d\n", event->event,
  59. event->device->name, event->element.port_num);
  60. }
  61. /**
  62. * iser_create_device_ib_res - creates Protection Domain (PD), Completion
  63. * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with
  64. * the adapator.
  65. *
  66. * returns 0 on success, -1 on failure
  67. */
  68. static int iser_create_device_ib_res(struct iser_device *device)
  69. {
  70. struct ib_device_attr *dev_attr = &device->dev_attr;
  71. int ret, i;
  72. ret = ib_query_device(device->ib_device, dev_attr);
  73. if (ret) {
  74. pr_warn("Query device failed for %s\n", device->ib_device->name);
  75. return ret;
  76. }
  77. /* Assign function handles - based on FMR support */
  78. if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
  79. device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
  80. iser_info("FMR supported, using FMR for registration\n");
  81. device->iser_alloc_rdma_reg_res = iser_create_fmr_pool;
  82. device->iser_free_rdma_reg_res = iser_free_fmr_pool;
  83. device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr;
  84. device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;
  85. } else
  86. if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
  87. iser_info("FastReg supported, using FastReg for registration\n");
  88. device->iser_alloc_rdma_reg_res = iser_create_fastreg_pool;
  89. device->iser_free_rdma_reg_res = iser_free_fastreg_pool;
  90. device->iser_reg_rdma_mem = iser_reg_rdma_mem_fastreg;
  91. device->iser_unreg_rdma_mem = iser_unreg_mem_fastreg;
  92. } else {
  93. iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
  94. return -1;
  95. }
  96. device->comps_used = min(ISER_MAX_CQ,
  97. device->ib_device->num_comp_vectors);
  98. iser_info("using %d CQs, device %s supports %d vectors\n",
  99. device->comps_used, device->ib_device->name,
  100. device->ib_device->num_comp_vectors);
  101. device->pd = ib_alloc_pd(device->ib_device);
  102. if (IS_ERR(device->pd))
  103. goto pd_err;
  104. for (i = 0; i < device->comps_used; i++) {
  105. struct iser_comp *comp = &device->comps[i];
  106. comp->device = device;
  107. comp->cq = ib_create_cq(device->ib_device,
  108. iser_cq_callback,
  109. iser_cq_event_callback,
  110. (void *)comp,
  111. ISER_MAX_CQ_LEN, i);
  112. if (IS_ERR(comp->cq)) {
  113. comp->cq = NULL;
  114. goto cq_err;
  115. }
  116. if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
  117. goto cq_err;
  118. tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
  119. (unsigned long)comp);
  120. }
  121. device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
  122. IB_ACCESS_REMOTE_WRITE |
  123. IB_ACCESS_REMOTE_READ);
  124. if (IS_ERR(device->mr))
  125. goto dma_mr_err;
  126. INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
  127. iser_event_handler);
  128. if (ib_register_event_handler(&device->event_handler))
  129. goto handler_err;
  130. return 0;
  131. handler_err:
  132. ib_dereg_mr(device->mr);
  133. dma_mr_err:
  134. for (i = 0; i < device->comps_used; i++)
  135. tasklet_kill(&device->comps[i].tasklet);
  136. cq_err:
  137. for (i = 0; i < device->comps_used; i++) {
  138. struct iser_comp *comp = &device->comps[i];
  139. if (comp->cq)
  140. ib_destroy_cq(comp->cq);
  141. }
  142. ib_dealloc_pd(device->pd);
  143. pd_err:
  144. iser_err("failed to allocate an IB resource\n");
  145. return -1;
  146. }
  147. /**
  148. * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR,
  149. * CQ and PD created with the device associated with the adapator.
  150. */
  151. static void iser_free_device_ib_res(struct iser_device *device)
  152. {
  153. int i;
  154. BUG_ON(device->mr == NULL);
  155. for (i = 0; i < device->comps_used; i++) {
  156. struct iser_comp *comp = &device->comps[i];
  157. tasklet_kill(&comp->tasklet);
  158. ib_destroy_cq(comp->cq);
  159. comp->cq = NULL;
  160. }
  161. (void)ib_unregister_event_handler(&device->event_handler);
  162. (void)ib_dereg_mr(device->mr);
  163. (void)ib_dealloc_pd(device->pd);
  164. device->mr = NULL;
  165. device->pd = NULL;
  166. }
  167. /**
  168. * iser_create_fmr_pool - Creates FMR pool and page_vector
  169. *
  170. * returns 0 on success, or errno code on failure
  171. */
  172. int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max)
  173. {
  174. struct iser_device *device = ib_conn->device;
  175. struct ib_fmr_pool_param params;
  176. int ret = -ENOMEM;
  177. ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) +
  178. (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)),
  179. GFP_KERNEL);
  180. if (!ib_conn->fmr.page_vec)
  181. return ret;
  182. ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1);
  183. params.page_shift = SHIFT_4K;
  184. /* when the first/last SG element are not start/end *
  185. * page aligned, the map whould be of N+1 pages */
  186. params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
  187. /* make the pool size twice the max number of SCSI commands *
  188. * the ML is expected to queue, watermark for unmap at 50% */
  189. params.pool_size = cmds_max * 2;
  190. params.dirty_watermark = cmds_max;
  191. params.cache = 0;
  192. params.flush_function = NULL;
  193. params.access = (IB_ACCESS_LOCAL_WRITE |
  194. IB_ACCESS_REMOTE_WRITE |
  195. IB_ACCESS_REMOTE_READ);
  196. ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, &params);
  197. if (!IS_ERR(ib_conn->fmr.pool))
  198. return 0;
  199. /* no FMR => no need for page_vec */
  200. kfree(ib_conn->fmr.page_vec);
  201. ib_conn->fmr.page_vec = NULL;
  202. ret = PTR_ERR(ib_conn->fmr.pool);
  203. ib_conn->fmr.pool = NULL;
  204. if (ret != -ENOSYS) {
  205. iser_err("FMR allocation failed, err %d\n", ret);
  206. return ret;
  207. } else {
  208. iser_warn("FMRs are not supported, using unaligned mode\n");
  209. return 0;
  210. }
  211. }
  212. /**
  213. * iser_free_fmr_pool - releases the FMR pool and page vec
  214. */
  215. void iser_free_fmr_pool(struct ib_conn *ib_conn)
  216. {
  217. iser_info("freeing conn %p fmr pool %p\n",
  218. ib_conn, ib_conn->fmr.pool);
  219. if (ib_conn->fmr.pool != NULL)
  220. ib_destroy_fmr_pool(ib_conn->fmr.pool);
  221. ib_conn->fmr.pool = NULL;
  222. kfree(ib_conn->fmr.page_vec);
  223. ib_conn->fmr.page_vec = NULL;
  224. }
  225. static int
  226. iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
  227. bool pi_enable, struct fast_reg_descriptor *desc)
  228. {
  229. int ret;
  230. desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device,
  231. ISCSI_ISER_SG_TABLESIZE + 1);
  232. if (IS_ERR(desc->data_frpl)) {
  233. ret = PTR_ERR(desc->data_frpl);
  234. iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n",
  235. ret);
  236. return PTR_ERR(desc->data_frpl);
  237. }
  238. desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1);
  239. if (IS_ERR(desc->data_mr)) {
  240. ret = PTR_ERR(desc->data_mr);
  241. iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
  242. goto fast_reg_mr_failure;
  243. }
  244. desc->reg_indicators |= ISER_DATA_KEY_VALID;
  245. if (pi_enable) {
  246. struct ib_mr_init_attr mr_init_attr = {0};
  247. struct iser_pi_context *pi_ctx = NULL;
  248. desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
  249. if (!desc->pi_ctx) {
  250. iser_err("Failed to allocate pi context\n");
  251. ret = -ENOMEM;
  252. goto pi_ctx_alloc_failure;
  253. }
  254. pi_ctx = desc->pi_ctx;
  255. pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
  256. ISCSI_ISER_SG_TABLESIZE);
  257. if (IS_ERR(pi_ctx->prot_frpl)) {
  258. ret = PTR_ERR(pi_ctx->prot_frpl);
  259. iser_err("Failed to allocate prot frpl ret=%d\n",
  260. ret);
  261. goto prot_frpl_failure;
  262. }
  263. pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
  264. ISCSI_ISER_SG_TABLESIZE + 1);
  265. if (IS_ERR(pi_ctx->prot_mr)) {
  266. ret = PTR_ERR(pi_ctx->prot_mr);
  267. iser_err("Failed to allocate prot frmr ret=%d\n",
  268. ret);
  269. goto prot_mr_failure;
  270. }
  271. desc->reg_indicators |= ISER_PROT_KEY_VALID;
  272. mr_init_attr.max_reg_descriptors = 2;
  273. mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
  274. pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
  275. if (IS_ERR(pi_ctx->sig_mr)) {
  276. ret = PTR_ERR(pi_ctx->sig_mr);
  277. iser_err("Failed to allocate signature enabled mr err=%d\n",
  278. ret);
  279. goto sig_mr_failure;
  280. }
  281. desc->reg_indicators |= ISER_SIG_KEY_VALID;
  282. }
  283. desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
  284. iser_dbg("Create fr_desc %p page_list %p\n",
  285. desc, desc->data_frpl->page_list);
  286. return 0;
  287. sig_mr_failure:
  288. ib_dereg_mr(desc->pi_ctx->prot_mr);
  289. prot_mr_failure:
  290. ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
  291. prot_frpl_failure:
  292. kfree(desc->pi_ctx);
  293. pi_ctx_alloc_failure:
  294. ib_dereg_mr(desc->data_mr);
  295. fast_reg_mr_failure:
  296. ib_free_fast_reg_page_list(desc->data_frpl);
  297. return ret;
  298. }
  299. /**
  300. * iser_create_fastreg_pool - Creates pool of fast_reg descriptors
  301. * for fast registration work requests.
  302. * returns 0 on success, or errno code on failure
  303. */
  304. int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max)
  305. {
  306. struct iser_device *device = ib_conn->device;
  307. struct fast_reg_descriptor *desc;
  308. int i, ret;
  309. INIT_LIST_HEAD(&ib_conn->fastreg.pool);
  310. ib_conn->fastreg.pool_size = 0;
  311. for (i = 0; i < cmds_max; i++) {
  312. desc = kzalloc(sizeof(*desc), GFP_KERNEL);
  313. if (!desc) {
  314. iser_err("Failed to allocate a new fast_reg descriptor\n");
  315. ret = -ENOMEM;
  316. goto err;
  317. }
  318. ret = iser_create_fastreg_desc(device->ib_device, device->pd,
  319. ib_conn->pi_support, desc);
  320. if (ret) {
  321. iser_err("Failed to create fastreg descriptor err=%d\n",
  322. ret);
  323. kfree(desc);
  324. goto err;
  325. }
  326. list_add_tail(&desc->list, &ib_conn->fastreg.pool);
  327. ib_conn->fastreg.pool_size++;
  328. }
  329. return 0;
  330. err:
  331. iser_free_fastreg_pool(ib_conn);
  332. return ret;
  333. }
  334. /**
  335. * iser_free_fastreg_pool - releases the pool of fast_reg descriptors
  336. */
  337. void iser_free_fastreg_pool(struct ib_conn *ib_conn)
  338. {
  339. struct fast_reg_descriptor *desc, *tmp;
  340. int i = 0;
  341. if (list_empty(&ib_conn->fastreg.pool))
  342. return;
  343. iser_info("freeing conn %p fr pool\n", ib_conn);
  344. list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) {
  345. list_del(&desc->list);
  346. ib_free_fast_reg_page_list(desc->data_frpl);
  347. ib_dereg_mr(desc->data_mr);
  348. if (desc->pi_ctx) {
  349. ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
  350. ib_dereg_mr(desc->pi_ctx->prot_mr);
  351. ib_destroy_mr(desc->pi_ctx->sig_mr);
  352. kfree(desc->pi_ctx);
  353. }
  354. kfree(desc);
  355. ++i;
  356. }
  357. if (i < ib_conn->fastreg.pool_size)
  358. iser_warn("pool still has %d regions registered\n",
  359. ib_conn->fastreg.pool_size - i);
  360. }
  361. /**
  362. * iser_create_ib_conn_res - Queue-Pair (QP)
  363. *
  364. * returns 0 on success, -1 on failure
  365. */
  366. static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
  367. {
  368. struct iser_device *device;
  369. struct ib_qp_init_attr init_attr;
  370. int ret = -ENOMEM;
  371. int index, min_index = 0;
  372. BUG_ON(ib_conn->device == NULL);
  373. device = ib_conn->device;
  374. memset(&init_attr, 0, sizeof init_attr);
  375. mutex_lock(&ig.connlist_mutex);
  376. /* select the CQ with the minimal number of usages */
  377. for (index = 0; index < device->comps_used; index++) {
  378. if (device->comps[index].active_qps <
  379. device->comps[min_index].active_qps)
  380. min_index = index;
  381. }
  382. ib_conn->comp = &device->comps[min_index];
  383. ib_conn->comp->active_qps++;
  384. mutex_unlock(&ig.connlist_mutex);
  385. iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn);
  386. init_attr.event_handler = iser_qp_event_callback;
  387. init_attr.qp_context = (void *)ib_conn;
  388. init_attr.send_cq = ib_conn->comp->cq;
  389. init_attr.recv_cq = ib_conn->comp->cq;
  390. init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
  391. init_attr.cap.max_send_sge = 2;
  392. init_attr.cap.max_recv_sge = 1;
  393. init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
  394. init_attr.qp_type = IB_QPT_RC;
  395. if (ib_conn->pi_support) {
  396. init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
  397. init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
  398. } else {
  399. init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
  400. }
  401. ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
  402. if (ret)
  403. goto out_err;
  404. ib_conn->qp = ib_conn->cma_id->qp;
  405. iser_info("setting conn %p cma_id %p qp %p\n",
  406. ib_conn, ib_conn->cma_id,
  407. ib_conn->cma_id->qp);
  408. return ret;
  409. out_err:
  410. iser_err("unable to alloc mem or create resource, err %d\n", ret);
  411. return ret;
  412. }
  413. /**
  414. * based on the resolved device node GUID see if there already allocated
  415. * device for this device. If there's no such, create one.
  416. */
  417. static
  418. struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id)
  419. {
  420. struct iser_device *device;
  421. mutex_lock(&ig.device_list_mutex);
  422. list_for_each_entry(device, &ig.device_list, ig_list)
  423. /* find if there's a match using the node GUID */
  424. if (device->ib_device->node_guid == cma_id->device->node_guid)
  425. goto inc_refcnt;
  426. device = kzalloc(sizeof *device, GFP_KERNEL);
  427. if (device == NULL)
  428. goto out;
  429. /* assign this device to the device */
  430. device->ib_device = cma_id->device;
  431. /* init the device and link it into ig device list */
  432. if (iser_create_device_ib_res(device)) {
  433. kfree(device);
  434. device = NULL;
  435. goto out;
  436. }
  437. list_add(&device->ig_list, &ig.device_list);
  438. inc_refcnt:
  439. device->refcount++;
  440. out:
  441. mutex_unlock(&ig.device_list_mutex);
  442. return device;
  443. }
  444. /* if there's no demand for this device, release it */
  445. static void iser_device_try_release(struct iser_device *device)
  446. {
  447. mutex_lock(&ig.device_list_mutex);
  448. device->refcount--;
  449. iser_info("device %p refcount %d\n", device, device->refcount);
  450. if (!device->refcount) {
  451. iser_free_device_ib_res(device);
  452. list_del(&device->ig_list);
  453. kfree(device);
  454. }
  455. mutex_unlock(&ig.device_list_mutex);
  456. }
  457. /**
  458. * Called with state mutex held
  459. **/
  460. static int iser_conn_state_comp_exch(struct iser_conn *iser_conn,
  461. enum iser_conn_state comp,
  462. enum iser_conn_state exch)
  463. {
  464. int ret;
  465. ret = (iser_conn->state == comp);
  466. if (ret)
  467. iser_conn->state = exch;
  468. return ret;
  469. }
  470. void iser_release_work(struct work_struct *work)
  471. {
  472. struct iser_conn *iser_conn;
  473. iser_conn = container_of(work, struct iser_conn, release_work);
  474. /* Wait for conn_stop to complete */
  475. wait_for_completion(&iser_conn->stop_completion);
  476. /* Wait for IB resouces cleanup to complete */
  477. wait_for_completion(&iser_conn->ib_completion);
  478. mutex_lock(&iser_conn->state_mutex);
  479. iser_conn->state = ISER_CONN_DOWN;
  480. mutex_unlock(&iser_conn->state_mutex);
  481. iser_conn_release(iser_conn);
  482. }
  483. /**
  484. * iser_free_ib_conn_res - release IB related resources
  485. * @iser_conn: iser connection struct
  486. * @destroy: indicator if we need to try to release the
  487. * iser device and memory regoins pool (only iscsi
  488. * shutdown and DEVICE_REMOVAL will use this).
  489. *
  490. * This routine is called with the iser state mutex held
  491. * so the cm_id removal is out of here. It is Safe to
  492. * be invoked multiple times.
  493. */
  494. static void iser_free_ib_conn_res(struct iser_conn *iser_conn,
  495. bool destroy)
  496. {
  497. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  498. struct iser_device *device = ib_conn->device;
  499. iser_info("freeing conn %p cma_id %p qp %p\n",
  500. iser_conn, ib_conn->cma_id, ib_conn->qp);
  501. if (ib_conn->qp != NULL) {
  502. ib_conn->comp->active_qps--;
  503. rdma_destroy_qp(ib_conn->cma_id);
  504. ib_conn->qp = NULL;
  505. }
  506. if (destroy) {
  507. if (iser_conn->rx_descs)
  508. iser_free_rx_descriptors(iser_conn);
  509. if (device != NULL) {
  510. iser_device_try_release(device);
  511. ib_conn->device = NULL;
  512. }
  513. }
  514. }
  515. /**
  516. * Frees all conn objects and deallocs conn descriptor
  517. */
  518. void iser_conn_release(struct iser_conn *iser_conn)
  519. {
  520. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  521. mutex_lock(&ig.connlist_mutex);
  522. list_del(&iser_conn->conn_list);
  523. mutex_unlock(&ig.connlist_mutex);
  524. mutex_lock(&iser_conn->state_mutex);
  525. if (iser_conn->state != ISER_CONN_DOWN)
  526. iser_warn("iser conn %p state %d, expected state down.\n",
  527. iser_conn, iser_conn->state);
  528. /*
  529. * In case we never got to bind stage, we still need to
  530. * release IB resources (which is safe to call more than once).
  531. */
  532. iser_free_ib_conn_res(iser_conn, true);
  533. mutex_unlock(&iser_conn->state_mutex);
  534. if (ib_conn->cma_id != NULL) {
  535. rdma_destroy_id(ib_conn->cma_id);
  536. ib_conn->cma_id = NULL;
  537. }
  538. kfree(iser_conn);
  539. }
  540. /**
  541. * triggers start of the disconnect procedures and wait for them to be done
  542. * Called with state mutex held
  543. */
  544. int iser_conn_terminate(struct iser_conn *iser_conn)
  545. {
  546. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  547. struct ib_send_wr *bad_wr;
  548. int err = 0;
  549. /* terminate the iser conn only if the conn state is UP */
  550. if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP,
  551. ISER_CONN_TERMINATING))
  552. return 0;
  553. iser_info("iser_conn %p state %d\n", iser_conn, iser_conn->state);
  554. /* suspend queuing of new iscsi commands */
  555. if (iser_conn->iscsi_conn)
  556. iscsi_suspend_queue(iser_conn->iscsi_conn);
  557. /*
  558. * In case we didn't already clean up the cma_id (peer initiated
  559. * a disconnection), we need to Cause the CMA to change the QP
  560. * state to ERROR.
  561. */
  562. if (ib_conn->cma_id) {
  563. err = rdma_disconnect(ib_conn->cma_id);
  564. if (err)
  565. iser_err("Failed to disconnect, conn: 0x%p err %d\n",
  566. iser_conn, err);
  567. /* post an indication that all flush errors were consumed */
  568. err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
  569. if (err)
  570. iser_err("conn %p failed to post beacon", ib_conn);
  571. wait_for_completion(&ib_conn->flush_comp);
  572. }
  573. return 1;
  574. }
  575. /**
  576. * Called with state mutex held
  577. **/
  578. static void iser_connect_error(struct rdma_cm_id *cma_id)
  579. {
  580. struct iser_conn *iser_conn;
  581. iser_conn = (struct iser_conn *)cma_id->context;
  582. iser_conn->state = ISER_CONN_DOWN;
  583. }
  584. /**
  585. * Called with state mutex held
  586. **/
  587. static void iser_addr_handler(struct rdma_cm_id *cma_id)
  588. {
  589. struct iser_device *device;
  590. struct iser_conn *iser_conn;
  591. struct ib_conn *ib_conn;
  592. int ret;
  593. iser_conn = (struct iser_conn *)cma_id->context;
  594. if (iser_conn->state != ISER_CONN_PENDING)
  595. /* bailout */
  596. return;
  597. ib_conn = &iser_conn->ib_conn;
  598. device = iser_device_find_by_ib_device(cma_id);
  599. if (!device) {
  600. iser_err("device lookup/creation failed\n");
  601. iser_connect_error(cma_id);
  602. return;
  603. }
  604. ib_conn->device = device;
  605. /* connection T10-PI support */
  606. if (iser_pi_enable) {
  607. if (!(device->dev_attr.device_cap_flags &
  608. IB_DEVICE_SIGNATURE_HANDOVER)) {
  609. iser_warn("T10-PI requested but not supported on %s, "
  610. "continue without T10-PI\n",
  611. ib_conn->device->ib_device->name);
  612. ib_conn->pi_support = false;
  613. } else {
  614. ib_conn->pi_support = true;
  615. }
  616. }
  617. ret = rdma_resolve_route(cma_id, 1000);
  618. if (ret) {
  619. iser_err("resolve route failed: %d\n", ret);
  620. iser_connect_error(cma_id);
  621. return;
  622. }
  623. }
  624. /**
  625. * Called with state mutex held
  626. **/
  627. static void iser_route_handler(struct rdma_cm_id *cma_id)
  628. {
  629. struct rdma_conn_param conn_param;
  630. int ret;
  631. struct iser_cm_hdr req_hdr;
  632. struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
  633. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  634. struct iser_device *device = ib_conn->device;
  635. if (iser_conn->state != ISER_CONN_PENDING)
  636. /* bailout */
  637. return;
  638. ret = iser_create_ib_conn_res(ib_conn);
  639. if (ret)
  640. goto failure;
  641. memset(&conn_param, 0, sizeof conn_param);
  642. conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
  643. conn_param.initiator_depth = 1;
  644. conn_param.retry_count = 7;
  645. conn_param.rnr_retry_count = 6;
  646. memset(&req_hdr, 0, sizeof(req_hdr));
  647. req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
  648. ISER_SEND_W_INV_NOT_SUPPORTED);
  649. conn_param.private_data = (void *)&req_hdr;
  650. conn_param.private_data_len = sizeof(struct iser_cm_hdr);
  651. ret = rdma_connect(cma_id, &conn_param);
  652. if (ret) {
  653. iser_err("failure connecting: %d\n", ret);
  654. goto failure;
  655. }
  656. return;
  657. failure:
  658. iser_connect_error(cma_id);
  659. }
  660. static void iser_connected_handler(struct rdma_cm_id *cma_id)
  661. {
  662. struct iser_conn *iser_conn;
  663. struct ib_qp_attr attr;
  664. struct ib_qp_init_attr init_attr;
  665. iser_conn = (struct iser_conn *)cma_id->context;
  666. if (iser_conn->state != ISER_CONN_PENDING)
  667. /* bailout */
  668. return;
  669. (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
  670. iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
  671. iser_conn->state = ISER_CONN_UP;
  672. complete(&iser_conn->up_completion);
  673. }
  674. static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
  675. {
  676. struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
  677. if (iser_conn_terminate(iser_conn)) {
  678. if (iser_conn->iscsi_conn)
  679. iscsi_conn_failure(iser_conn->iscsi_conn,
  680. ISCSI_ERR_CONN_FAILED);
  681. else
  682. iser_err("iscsi_iser connection isn't bound\n");
  683. }
  684. }
  685. static void iser_cleanup_handler(struct rdma_cm_id *cma_id,
  686. bool destroy)
  687. {
  688. struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
  689. /*
  690. * We are not guaranteed that we visited disconnected_handler
  691. * by now, call it here to be safe that we handle CM drep
  692. * and flush errors.
  693. */
  694. iser_disconnected_handler(cma_id);
  695. iser_free_ib_conn_res(iser_conn, destroy);
  696. complete(&iser_conn->ib_completion);
  697. };
  698. static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
  699. {
  700. struct iser_conn *iser_conn;
  701. int ret = 0;
  702. iser_conn = (struct iser_conn *)cma_id->context;
  703. iser_info("event %d status %d conn %p id %p\n",
  704. event->event, event->status, cma_id->context, cma_id);
  705. mutex_lock(&iser_conn->state_mutex);
  706. switch (event->event) {
  707. case RDMA_CM_EVENT_ADDR_RESOLVED:
  708. iser_addr_handler(cma_id);
  709. break;
  710. case RDMA_CM_EVENT_ROUTE_RESOLVED:
  711. iser_route_handler(cma_id);
  712. break;
  713. case RDMA_CM_EVENT_ESTABLISHED:
  714. iser_connected_handler(cma_id);
  715. break;
  716. case RDMA_CM_EVENT_ADDR_ERROR:
  717. case RDMA_CM_EVENT_ROUTE_ERROR:
  718. case RDMA_CM_EVENT_CONNECT_ERROR:
  719. case RDMA_CM_EVENT_UNREACHABLE:
  720. case RDMA_CM_EVENT_REJECTED:
  721. iser_connect_error(cma_id);
  722. break;
  723. case RDMA_CM_EVENT_DISCONNECTED:
  724. case RDMA_CM_EVENT_ADDR_CHANGE:
  725. iser_disconnected_handler(cma_id);
  726. break;
  727. case RDMA_CM_EVENT_DEVICE_REMOVAL:
  728. /*
  729. * we *must* destroy the device as we cannot rely
  730. * on iscsid to be around to initiate error handling.
  731. * also implicitly destroy the cma_id.
  732. */
  733. iser_cleanup_handler(cma_id, true);
  734. iser_conn->ib_conn.cma_id = NULL;
  735. ret = 1;
  736. break;
  737. case RDMA_CM_EVENT_TIMEWAIT_EXIT:
  738. iser_cleanup_handler(cma_id, false);
  739. break;
  740. default:
  741. iser_err("Unexpected RDMA CM event (%d)\n", event->event);
  742. break;
  743. }
  744. mutex_unlock(&iser_conn->state_mutex);
  745. return ret;
  746. }
  747. void iser_conn_init(struct iser_conn *iser_conn)
  748. {
  749. iser_conn->state = ISER_CONN_INIT;
  750. iser_conn->ib_conn.post_recv_buf_count = 0;
  751. init_completion(&iser_conn->ib_conn.flush_comp);
  752. init_completion(&iser_conn->stop_completion);
  753. init_completion(&iser_conn->ib_completion);
  754. init_completion(&iser_conn->up_completion);
  755. INIT_LIST_HEAD(&iser_conn->conn_list);
  756. spin_lock_init(&iser_conn->ib_conn.lock);
  757. mutex_init(&iser_conn->state_mutex);
  758. }
  759. /**
  760. * starts the process of connecting to the target
  761. * sleeps until the connection is established or rejected
  762. */
  763. int iser_connect(struct iser_conn *iser_conn,
  764. struct sockaddr *src_addr,
  765. struct sockaddr *dst_addr,
  766. int non_blocking)
  767. {
  768. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  769. int err = 0;
  770. mutex_lock(&iser_conn->state_mutex);
  771. sprintf(iser_conn->name, "%pISp", dst_addr);
  772. iser_info("connecting to: %s\n", iser_conn->name);
  773. /* the device is known only --after-- address resolution */
  774. ib_conn->device = NULL;
  775. iser_conn->state = ISER_CONN_PENDING;
  776. ib_conn->beacon.wr_id = ISER_BEACON_WRID;
  777. ib_conn->beacon.opcode = IB_WR_SEND;
  778. ib_conn->cma_id = rdma_create_id(iser_cma_handler,
  779. (void *)iser_conn,
  780. RDMA_PS_TCP, IB_QPT_RC);
  781. if (IS_ERR(ib_conn->cma_id)) {
  782. err = PTR_ERR(ib_conn->cma_id);
  783. iser_err("rdma_create_id failed: %d\n", err);
  784. goto id_failure;
  785. }
  786. err = rdma_resolve_addr(ib_conn->cma_id, src_addr, dst_addr, 1000);
  787. if (err) {
  788. iser_err("rdma_resolve_addr failed: %d\n", err);
  789. goto addr_failure;
  790. }
  791. if (!non_blocking) {
  792. wait_for_completion_interruptible(&iser_conn->up_completion);
  793. if (iser_conn->state != ISER_CONN_UP) {
  794. err = -EIO;
  795. goto connect_failure;
  796. }
  797. }
  798. mutex_unlock(&iser_conn->state_mutex);
  799. mutex_lock(&ig.connlist_mutex);
  800. list_add(&iser_conn->conn_list, &ig.connlist);
  801. mutex_unlock(&ig.connlist_mutex);
  802. return 0;
  803. id_failure:
  804. ib_conn->cma_id = NULL;
  805. addr_failure:
  806. iser_conn->state = ISER_CONN_DOWN;
  807. connect_failure:
  808. mutex_unlock(&iser_conn->state_mutex);
  809. iser_conn_release(iser_conn);
  810. return err;
  811. }
  812. /**
  813. * iser_reg_page_vec - Register physical memory
  814. *
  815. * returns: 0 on success, errno code on failure
  816. */
  817. int iser_reg_page_vec(struct ib_conn *ib_conn,
  818. struct iser_page_vec *page_vec,
  819. struct iser_mem_reg *mem_reg)
  820. {
  821. struct ib_pool_fmr *mem;
  822. u64 io_addr;
  823. u64 *page_list;
  824. int status;
  825. page_list = page_vec->pages;
  826. io_addr = page_list[0];
  827. mem = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
  828. page_list,
  829. page_vec->length,
  830. io_addr);
  831. if (IS_ERR(mem)) {
  832. status = (int)PTR_ERR(mem);
  833. iser_err("ib_fmr_pool_map_phys failed: %d\n", status);
  834. return status;
  835. }
  836. mem_reg->lkey = mem->fmr->lkey;
  837. mem_reg->rkey = mem->fmr->rkey;
  838. mem_reg->len = page_vec->length * SIZE_4K;
  839. mem_reg->va = io_addr;
  840. mem_reg->is_mr = 1;
  841. mem_reg->mem_h = (void *)mem;
  842. mem_reg->va += page_vec->offset;
  843. mem_reg->len = page_vec->data_size;
  844. iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, "
  845. "entry[0]: (0x%08lx,%ld)] -> "
  846. "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n",
  847. page_vec, page_vec->length,
  848. (unsigned long)page_vec->pages[0],
  849. (unsigned long)page_vec->data_size,
  850. (unsigned int)mem_reg->lkey, mem_reg->mem_h,
  851. (unsigned long)mem_reg->va, (unsigned long)mem_reg->len);
  852. return 0;
  853. }
  854. /**
  855. * Unregister (previosuly registered using FMR) memory.
  856. * If memory is non-FMR does nothing.
  857. */
  858. void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
  859. enum iser_data_dir cmd_dir)
  860. {
  861. struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
  862. int ret;
  863. if (!reg->is_mr)
  864. return;
  865. iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
  866. ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
  867. if (ret)
  868. iser_err("ib_fmr_pool_unmap failed %d\n", ret);
  869. reg->mem_h = NULL;
  870. }
  871. void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
  872. enum iser_data_dir cmd_dir)
  873. {
  874. struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
  875. struct iser_conn *iser_conn = iser_task->iser_conn;
  876. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  877. struct fast_reg_descriptor *desc = reg->mem_h;
  878. if (!reg->is_mr)
  879. return;
  880. reg->mem_h = NULL;
  881. reg->is_mr = 0;
  882. spin_lock_bh(&ib_conn->lock);
  883. list_add_tail(&desc->list, &ib_conn->fastreg.pool);
  884. spin_unlock_bh(&ib_conn->lock);
  885. }
  886. int iser_post_recvl(struct iser_conn *iser_conn)
  887. {
  888. struct ib_recv_wr rx_wr, *rx_wr_failed;
  889. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  890. struct ib_sge sge;
  891. int ib_ret;
  892. sge.addr = iser_conn->login_resp_dma;
  893. sge.length = ISER_RX_LOGIN_SIZE;
  894. sge.lkey = ib_conn->device->mr->lkey;
  895. rx_wr.wr_id = (unsigned long)iser_conn->login_resp_buf;
  896. rx_wr.sg_list = &sge;
  897. rx_wr.num_sge = 1;
  898. rx_wr.next = NULL;
  899. ib_conn->post_recv_buf_count++;
  900. ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
  901. if (ib_ret) {
  902. iser_err("ib_post_recv failed ret=%d\n", ib_ret);
  903. ib_conn->post_recv_buf_count--;
  904. }
  905. return ib_ret;
  906. }
  907. int iser_post_recvm(struct iser_conn *iser_conn, int count)
  908. {
  909. struct ib_recv_wr *rx_wr, *rx_wr_failed;
  910. int i, ib_ret;
  911. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  912. unsigned int my_rx_head = iser_conn->rx_desc_head;
  913. struct iser_rx_desc *rx_desc;
  914. for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
  915. rx_desc = &iser_conn->rx_descs[my_rx_head];
  916. rx_wr->wr_id = (unsigned long)rx_desc;
  917. rx_wr->sg_list = &rx_desc->rx_sg;
  918. rx_wr->num_sge = 1;
  919. rx_wr->next = rx_wr + 1;
  920. my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
  921. }
  922. rx_wr--;
  923. rx_wr->next = NULL; /* mark end of work requests list */
  924. ib_conn->post_recv_buf_count += count;
  925. ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
  926. if (ib_ret) {
  927. iser_err("ib_post_recv failed ret=%d\n", ib_ret);
  928. ib_conn->post_recv_buf_count -= count;
  929. } else
  930. iser_conn->rx_desc_head = my_rx_head;
  931. return ib_ret;
  932. }
  933. /**
  934. * iser_start_send - Initiate a Send DTO operation
  935. *
  936. * returns 0 on success, -1 on failure
  937. */
  938. int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
  939. bool signal)
  940. {
  941. int ib_ret;
  942. struct ib_send_wr send_wr, *send_wr_failed;
  943. ib_dma_sync_single_for_device(ib_conn->device->ib_device,
  944. tx_desc->dma_addr, ISER_HEADERS_LEN,
  945. DMA_TO_DEVICE);
  946. send_wr.next = NULL;
  947. send_wr.wr_id = (unsigned long)tx_desc;
  948. send_wr.sg_list = tx_desc->tx_sg;
  949. send_wr.num_sge = tx_desc->num_sge;
  950. send_wr.opcode = IB_WR_SEND;
  951. send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0;
  952. ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
  953. if (ib_ret)
  954. iser_err("ib_post_send failed, ret:%d\n", ib_ret);
  955. return ib_ret;
  956. }
  957. /**
  958. * is_iser_tx_desc - Indicate if the completion wr_id
  959. * is a TX descriptor or not.
  960. * @iser_conn: iser connection
  961. * @wr_id: completion WR identifier
  962. *
  963. * Since we cannot rely on wc opcode in FLUSH errors
  964. * we must work around it by checking if the wr_id address
  965. * falls in the iser connection rx_descs buffer. If so
  966. * it is an RX descriptor, otherwize it is a TX.
  967. */
  968. static inline bool
  969. is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
  970. {
  971. void *start = iser_conn->rx_descs;
  972. int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
  973. if (wr_id >= start && wr_id < start + len)
  974. return false;
  975. return true;
  976. }
  977. /**
  978. * iser_handle_comp_error() - Handle error completion
  979. * @ib_conn: connection RDMA resources
  980. * @wc: work completion
  981. *
  982. * Notes: We may handle a FLUSH error completion and in this case
  983. * we only cleanup in case TX type was DATAOUT. For non-FLUSH
  984. * error completion we should also notify iscsi layer that
  985. * connection is failed (in case we passed bind stage).
  986. */
  987. static void
  988. iser_handle_comp_error(struct ib_conn *ib_conn,
  989. struct ib_wc *wc)
  990. {
  991. struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
  992. ib_conn);
  993. if (wc->status != IB_WC_WR_FLUSH_ERR)
  994. if (iser_conn->iscsi_conn)
  995. iscsi_conn_failure(iser_conn->iscsi_conn,
  996. ISCSI_ERR_CONN_FAILED);
  997. if (is_iser_tx_desc(iser_conn, (void *)wc->wr_id)) {
  998. struct iser_tx_desc *desc = (struct iser_tx_desc *)wc->wr_id;
  999. if (desc->type == ISCSI_TX_DATAOUT)
  1000. kmem_cache_free(ig.desc_cache, desc);
  1001. } else {
  1002. ib_conn->post_recv_buf_count--;
  1003. }
  1004. }
  1005. /**
  1006. * iser_handle_wc - handle a single work completion
  1007. * @wc: work completion
  1008. *
  1009. * Soft-IRQ context, work completion can be either
  1010. * SEND or RECV, and can turn out successful or
  1011. * with error (or flush error).
  1012. */
  1013. static void iser_handle_wc(struct ib_wc *wc)
  1014. {
  1015. struct ib_conn *ib_conn;
  1016. struct iser_tx_desc *tx_desc;
  1017. struct iser_rx_desc *rx_desc;
  1018. ib_conn = wc->qp->qp_context;
  1019. if (wc->status == IB_WC_SUCCESS) {
  1020. if (wc->opcode == IB_WC_RECV) {
  1021. rx_desc = (struct iser_rx_desc *)wc->wr_id;
  1022. iser_rcv_completion(rx_desc, wc->byte_len,
  1023. ib_conn);
  1024. } else
  1025. if (wc->opcode == IB_WC_SEND) {
  1026. tx_desc = (struct iser_tx_desc *)wc->wr_id;
  1027. iser_snd_completion(tx_desc, ib_conn);
  1028. } else {
  1029. iser_err("Unknown wc opcode %d\n", wc->opcode);
  1030. }
  1031. } else {
  1032. if (wc->status != IB_WC_WR_FLUSH_ERR)
  1033. iser_err("wr id %llx status %d vend_err %x\n",
  1034. wc->wr_id, wc->status, wc->vendor_err);
  1035. else
  1036. iser_dbg("flush error: wr id %llx\n", wc->wr_id);
  1037. if (wc->wr_id != ISER_FASTREG_LI_WRID &&
  1038. wc->wr_id != ISER_BEACON_WRID)
  1039. iser_handle_comp_error(ib_conn, wc);
  1040. /* complete in case all flush errors were consumed */
  1041. if (wc->wr_id == ISER_BEACON_WRID)
  1042. complete(&ib_conn->flush_comp);
  1043. }
  1044. }
  1045. /**
  1046. * iser_cq_tasklet_fn - iSER completion polling loop
  1047. * @data: iSER completion context
  1048. *
  1049. * Soft-IRQ context, polling connection CQ until
  1050. * either CQ was empty or we exausted polling budget
  1051. */
  1052. static void iser_cq_tasklet_fn(unsigned long data)
  1053. {
  1054. struct iser_comp *comp = (struct iser_comp *)data;
  1055. struct ib_cq *cq = comp->cq;
  1056. struct ib_wc *const wcs = comp->wcs;
  1057. int i, n, completed = 0;
  1058. while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
  1059. for (i = 0; i < n; i++)
  1060. iser_handle_wc(&wcs[i]);
  1061. completed += n;
  1062. if (completed >= iser_cq_poll_limit)
  1063. break;
  1064. }
  1065. /*
  1066. * It is assumed here that arming CQ only once its empty
  1067. * would not cause interrupts to be missed.
  1068. */
  1069. ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
  1070. iser_dbg("got %d completions\n", completed);
  1071. }
  1072. static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
  1073. {
  1074. struct iser_comp *comp = cq_context;
  1075. tasklet_schedule(&comp->tasklet);
  1076. }
  1077. u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
  1078. enum iser_data_dir cmd_dir, sector_t *sector)
  1079. {
  1080. struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
  1081. struct fast_reg_descriptor *desc = reg->mem_h;
  1082. unsigned long sector_size = iser_task->sc->device->sector_size;
  1083. struct ib_mr_status mr_status;
  1084. int ret;
  1085. if (desc && desc->reg_indicators & ISER_FASTREG_PROTECTED) {
  1086. desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
  1087. ret = ib_check_mr_status(desc->pi_ctx->sig_mr,
  1088. IB_MR_CHECK_SIG_STATUS, &mr_status);
  1089. if (ret) {
  1090. pr_err("ib_check_mr_status failed, ret %d\n", ret);
  1091. goto err;
  1092. }
  1093. if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
  1094. sector_t sector_off = mr_status.sig_err.sig_err_offset;
  1095. do_div(sector_off, sector_size + 8);
  1096. *sector = scsi_get_lba(iser_task->sc) + sector_off;
  1097. pr_err("PI error found type %d at sector %llx "
  1098. "expected %x vs actual %x\n",
  1099. mr_status.sig_err.err_type,
  1100. (unsigned long long)*sector,
  1101. mr_status.sig_err.expected,
  1102. mr_status.sig_err.actual);
  1103. switch (mr_status.sig_err.err_type) {
  1104. case IB_SIG_BAD_GUARD:
  1105. return 0x1;
  1106. case IB_SIG_BAD_REFTAG:
  1107. return 0x3;
  1108. case IB_SIG_BAD_APPTAG:
  1109. return 0x2;
  1110. }
  1111. }
  1112. }
  1113. return 0;
  1114. err:
  1115. /* Not alot we can do here, return ambiguous guard error */
  1116. return 0x1;
  1117. }