1
0
mirror of https://git.FreeBSD.org/src.git synced 2025-01-31 16:57:10 +00:00

Import the mthca kernel side infiniband driver from Linux 4.9 and fix

compilation under FreeBSD. The mthca driver was temporarily removed as
part of the Linux 4.9 RoCE/infinband upgrade.

Top commit in Linux source tree:
69973b830859bc6529a7a0468ba0d80ee5117826

Sponsored by:	Mellanox Technologies
This commit is contained in:
Hans Petter Selasky 2018-02-13 17:04:34 +00:00
parent 3cdd74bb3c
commit 33ec1ccbae
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=329222
33 changed files with 15581 additions and 0 deletions

View File

@ -315,6 +315,7 @@ options DRM_DEBUG # Include debug printfs (slow)
# Requires the iwn firmware module
# ixl: Intel XL710 40Gbe PCIE Ethernet
# ixlv: Intel XL710 40Gbe VF PCIE Ethernet
# mthca: Mellanox HCA InfiniBand
# mlx4ib: Mellanox ConnectX HCA InfiniBand
# mlx4en: Mellanox ConnectX HCA Ethernet
# nfe: nVidia nForce MCP on-board Ethernet Networking (BSD open source)
@ -334,6 +335,7 @@ device iwn # Intel 4965/1000/5000/6000 wireless NICs.
device ixl # Intel XL710 40Gbe PCIE Ethernet
options IXL_IW # Enable iWARP Client Interface in ixl(4)
device ixlv # Intel XL710 40Gbe VF PCIE Ethernet
device mthca # Mellanox HCA InfiniBand
device mlx4 # Shared code module between IB and Ethernet
device mlx4ib # Mellanox ConnectX HCA InfiniBand
device mlx4en # Mellanox ConnectX HCA Ethernet

View File

@ -4551,6 +4551,43 @@ ofed/drivers/infiniband/ulp/sdp/sdp_cma.c optional sdp inet \
ofed/drivers/infiniband/ulp/sdp/sdp_tx.c optional sdp inet \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
dev/mthca/mthca_allocator.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_av.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_catas.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_cmd.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_cq.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_eq.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_mad.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_main.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_mcg.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_memfree.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_mr.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_pd.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_profile.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_provider.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_qp.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_reset.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_srq.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mthca/mthca_uar.c optional mthca pci ofed \
compile-with "${OFED_C}"
dev/mlx4/mlx4_ib/mlx4_ib_alias_GUID.c optional mlx4ib pci ofed \
compile-with "${OFED_C}"
dev/mlx4/mlx4_ib/mlx4_ib_mcg.c optional mlx4ib pci ofed \

View File

@ -0,0 +1,301 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/bitmap.h>
#include "mthca_dev.h"
/* Trivial bitmap-based allocator */
u32 mthca_alloc(struct mthca_alloc *alloc)
{
unsigned long flags;
u32 obj;
spin_lock_irqsave(&alloc->lock, flags);
obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last);
if (obj >= alloc->max) {
alloc->top = (alloc->top + alloc->max) & alloc->mask;
obj = find_first_zero_bit(alloc->table, alloc->max);
}
if (obj < alloc->max) {
set_bit(obj, alloc->table);
obj |= alloc->top;
} else
obj = -1;
spin_unlock_irqrestore(&alloc->lock, flags);
return obj;
}
void mthca_free(struct mthca_alloc *alloc, u32 obj)
{
unsigned long flags;
obj &= alloc->max - 1;
spin_lock_irqsave(&alloc->lock, flags);
clear_bit(obj, alloc->table);
alloc->last = min(alloc->last, obj);
alloc->top = (alloc->top + alloc->max) & alloc->mask;
spin_unlock_irqrestore(&alloc->lock, flags);
}
int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
u32 reserved)
{
int i;
/* num must be a power of 2 */
if (num != 1 << (ffs(num) - 1))
return -EINVAL;
alloc->last = 0;
alloc->top = 0;
alloc->max = num;
alloc->mask = mask;
spin_lock_init(&alloc->lock);
alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof (long),
GFP_KERNEL);
if (!alloc->table)
return -ENOMEM;
bitmap_zero(alloc->table, num);
for (i = 0; i < reserved; ++i)
set_bit(i, alloc->table);
return 0;
}
void mthca_alloc_cleanup(struct mthca_alloc *alloc)
{
kfree(alloc->table);
}
/*
* Array of pointers with lazy allocation of leaf pages. Callers of
* _get, _set and _clear methods must use a lock or otherwise
* serialize access to the array.
*/
#define MTHCA_ARRAY_MASK (PAGE_SIZE / sizeof (void *) - 1)
void *mthca_array_get(struct mthca_array *array, int index)
{
int p = (index * sizeof (void *)) >> PAGE_SHIFT;
if (array->page_list[p].page)
return array->page_list[p].page[index & MTHCA_ARRAY_MASK];
else
return NULL;
}
int mthca_array_set(struct mthca_array *array, int index, void *value)
{
int p = (index * sizeof (void *)) >> PAGE_SHIFT;
/* Allocate with GFP_ATOMIC because we'll be called with locks held. */
if (!array->page_list[p].page)
array->page_list[p].page = (void **) get_zeroed_page(GFP_ATOMIC);
if (!array->page_list[p].page)
return -ENOMEM;
array->page_list[p].page[index & MTHCA_ARRAY_MASK] = value;
++array->page_list[p].used;
return 0;
}
void mthca_array_clear(struct mthca_array *array, int index)
{
int p = (index * sizeof (void *)) >> PAGE_SHIFT;
if (--array->page_list[p].used == 0) {
free_page((unsigned long) array->page_list[p].page);
array->page_list[p].page = NULL;
} else
array->page_list[p].page[index & MTHCA_ARRAY_MASK] = NULL;
if (array->page_list[p].used < 0)
pr_debug("Array %p index %d page %d with ref count %d < 0\n",
array, index, p, array->page_list[p].used);
}
int mthca_array_init(struct mthca_array *array, int nent)
{
int npage = (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE;
int i;
array->page_list = kmalloc(npage * sizeof *array->page_list, GFP_KERNEL);
if (!array->page_list)
return -ENOMEM;
for (i = 0; i < npage; ++i) {
array->page_list[i].page = NULL;
array->page_list[i].used = 0;
}
return 0;
}
void mthca_array_cleanup(struct mthca_array *array, int nent)
{
int i;
for (i = 0; i < (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
free_page((unsigned long) array->page_list[i].page);
kfree(array->page_list);
}
/*
* Handling for queue buffers -- we allocate a bunch of memory and
* register it in a memory region at HCA virtual address 0. If the
* requested size is > max_direct, we split the allocation into
* multiple pages, so we don't require too much contiguous memory.
*/
int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
union mthca_buf *buf, int *is_direct, struct mthca_pd *pd,
int hca_write, struct mthca_mr *mr)
{
int err = -ENOMEM;
int npages, shift;
u64 *dma_list = NULL;
dma_addr_t t;
int i;
if (size <= max_direct) {
*is_direct = 1;
npages = 1;
shift = get_order(size) + PAGE_SHIFT;
buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev,
size, &t, GFP_KERNEL);
if (!buf->direct.buf)
return -ENOMEM;
dma_unmap_addr_set(&buf->direct, mapping, t);
memset(buf->direct.buf, 0, size);
while (t & ((1 << shift) - 1)) {
--shift;
npages *= 2;
}
dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
if (!dma_list)
goto err_free;
for (i = 0; i < npages; ++i)
dma_list[i] = t + i * (1 << shift);
} else {
*is_direct = 0;
npages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
shift = PAGE_SHIFT;
dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
if (!dma_list)
return -ENOMEM;
buf->page_list = kmalloc(npages * sizeof *buf->page_list,
GFP_KERNEL);
if (!buf->page_list)
goto err_out;
for (i = 0; i < npages; ++i)
buf->page_list[i].buf = NULL;
for (i = 0; i < npages; ++i) {
buf->page_list[i].buf =
dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
&t, GFP_KERNEL);
if (!buf->page_list[i].buf)
goto err_free;
dma_list[i] = t;
dma_unmap_addr_set(&buf->page_list[i], mapping, t);
clear_page(buf->page_list[i].buf);
}
}
err = mthca_mr_alloc_phys(dev, pd->pd_num,
dma_list, shift, npages,
0, size,
MTHCA_MPT_FLAG_LOCAL_READ |
(hca_write ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0),
mr);
if (err)
goto err_free;
kfree(dma_list);
return 0;
err_free:
mthca_buf_free(dev, size, buf, *is_direct, NULL);
err_out:
kfree(dma_list);
return err;
}
void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf,
int is_direct, struct mthca_mr *mr)
{
int i;
if (mr)
mthca_free_mr(dev, mr);
if (is_direct)
dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf,
dma_unmap_addr(&buf->direct, mapping));
else {
for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
buf->page_list[i].buf,
dma_unmap_addr(&buf->page_list[i],
mapping));
kfree(buf->page_list);
}
}

374
sys/dev/mthca/mthca_av.c Normal file
View File

@ -0,0 +1,374 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/string.h>
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
#include "mthca_dev.h"
enum {
MTHCA_RATE_TAVOR_FULL = 0,
MTHCA_RATE_TAVOR_1X = 1,
MTHCA_RATE_TAVOR_4X = 2,
MTHCA_RATE_TAVOR_1X_DDR = 3
};
enum {
MTHCA_RATE_MEMFREE_FULL = 0,
MTHCA_RATE_MEMFREE_QUARTER = 1,
MTHCA_RATE_MEMFREE_EIGHTH = 2,
MTHCA_RATE_MEMFREE_HALF = 3
};
struct mthca_av {
__be32 port_pd;
u8 reserved1;
u8 g_slid;
__be16 dlid;
u8 reserved2;
u8 gid_index;
u8 msg_sr;
u8 hop_limit;
__be32 sl_tclass_flowlabel;
__be32 dgid[4];
};
static enum ib_rate memfree_rate_to_ib(u8 mthca_rate, u8 port_rate)
{
switch (mthca_rate) {
case MTHCA_RATE_MEMFREE_EIGHTH:
return mult_to_ib_rate(port_rate >> 3);
case MTHCA_RATE_MEMFREE_QUARTER:
return mult_to_ib_rate(port_rate >> 2);
case MTHCA_RATE_MEMFREE_HALF:
return mult_to_ib_rate(port_rate >> 1);
case MTHCA_RATE_MEMFREE_FULL:
default:
return mult_to_ib_rate(port_rate);
}
}
static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate)
{
switch (mthca_rate) {
case MTHCA_RATE_TAVOR_1X: return IB_RATE_2_5_GBPS;
case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS;
case MTHCA_RATE_TAVOR_4X: return IB_RATE_10_GBPS;
default: return mult_to_ib_rate(port_rate);
}
}
enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port)
{
if (mthca_is_memfree(dev)) {
/* Handle old Arbel FW */
if (dev->limits.stat_rate_support == 0x3 && mthca_rate)
return IB_RATE_2_5_GBPS;
return memfree_rate_to_ib(mthca_rate, dev->rate[port - 1]);
} else
return tavor_rate_to_ib(mthca_rate, dev->rate[port - 1]);
}
static u8 ib_rate_to_memfree(u8 req_rate, u8 cur_rate)
{
if (cur_rate <= req_rate)
return 0;
/*
* Inter-packet delay (IPD) to get from rate X down to a rate
* no more than Y is (X - 1) / Y.
*/
switch ((cur_rate - 1) / req_rate) {
case 0: return MTHCA_RATE_MEMFREE_FULL;
case 1: return MTHCA_RATE_MEMFREE_HALF;
case 2: /* fall through */
case 3: return MTHCA_RATE_MEMFREE_QUARTER;
default: return MTHCA_RATE_MEMFREE_EIGHTH;
}
}
static u8 ib_rate_to_tavor(u8 static_rate)
{
switch (static_rate) {
case IB_RATE_2_5_GBPS: return MTHCA_RATE_TAVOR_1X;
case IB_RATE_5_GBPS: return MTHCA_RATE_TAVOR_1X_DDR;
case IB_RATE_10_GBPS: return MTHCA_RATE_TAVOR_4X;
default: return MTHCA_RATE_TAVOR_FULL;
}
}
u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port)
{
u8 rate;
if (!static_rate || ib_rate_to_mult(static_rate) >= dev->rate[port - 1])
return 0;
if (mthca_is_memfree(dev))
rate = ib_rate_to_memfree(ib_rate_to_mult(static_rate),
dev->rate[port - 1]);
else
rate = ib_rate_to_tavor(static_rate);
if (!(dev->limits.stat_rate_support & (1 << rate)))
rate = 1;
return rate;
}
int mthca_create_ah(struct mthca_dev *dev,
struct mthca_pd *pd,
struct ib_ah_attr *ah_attr,
struct mthca_ah *ah)
{
u32 index = -1;
struct mthca_av *av = NULL;
ah->type = MTHCA_AH_PCI_POOL;
if (mthca_is_memfree(dev)) {
ah->av = kmalloc(sizeof *ah->av, GFP_ATOMIC);
if (!ah->av)
return -ENOMEM;
ah->type = MTHCA_AH_KMALLOC;
av = ah->av;
} else if (!atomic_read(&pd->sqp_count) &&
!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
index = mthca_alloc(&dev->av_table.alloc);
/* fall back to allocate in host memory */
if (index == -1)
goto on_hca_fail;
av = kmalloc(sizeof *av, GFP_ATOMIC);
if (!av)
goto on_hca_fail;
ah->type = MTHCA_AH_ON_HCA;
ah->avdma = dev->av_table.ddr_av_base +
index * MTHCA_AV_SIZE;
}
on_hca_fail:
if (ah->type == MTHCA_AH_PCI_POOL) {
ah->av = pci_pool_alloc(dev->av_table.pool,
GFP_ATOMIC, &ah->avdma);
if (!ah->av)
return -ENOMEM;
av = ah->av;
}
ah->key = pd->ntmr.ibmr.lkey;
memset(av, 0, MTHCA_AV_SIZE);
av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24));
av->g_slid = ah_attr->src_path_bits;
av->dlid = cpu_to_be16(ah_attr->dlid);
av->msg_sr = (3 << 4) | /* 2K message */
mthca_get_rate(dev, ah_attr->static_rate, ah_attr->port_num);
av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
if (ah_attr->ah_flags & IB_AH_GRH) {
av->g_slid |= 0x80;
av->gid_index = (ah_attr->port_num - 1) * dev->limits.gid_table_len +
ah_attr->grh.sgid_index;
av->hop_limit = ah_attr->grh.hop_limit;
av->sl_tclass_flowlabel |=
cpu_to_be32((ah_attr->grh.traffic_class << 20) |
ah_attr->grh.flow_label);
memcpy(av->dgid, ah_attr->grh.dgid.raw, 16);
} else {
/* Arbel workaround -- low byte of GID must be 2 */
av->dgid[3] = cpu_to_be32(2);
}
if (0) {
int j;
mthca_dbg(dev, "Created UDAV at %p/%08lx:\n",
av, (unsigned long) ah->avdma);
for (j = 0; j < 8; ++j)
printk(KERN_DEBUG " [%2x] %08x\n",
j * 4, be32_to_cpu(((__be32 *) av)[j]));
}
if (ah->type == MTHCA_AH_ON_HCA) {
memcpy_toio(dev->av_table.av_map + index * MTHCA_AV_SIZE,
av, MTHCA_AV_SIZE);
kfree(av);
}
return 0;
}
int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah)
{
switch (ah->type) {
case MTHCA_AH_ON_HCA:
mthca_free(&dev->av_table.alloc,
(ah->avdma - dev->av_table.ddr_av_base) /
MTHCA_AV_SIZE);
break;
case MTHCA_AH_PCI_POOL:
pci_pool_free(dev->av_table.pool, ah->av, ah->avdma);
break;
case MTHCA_AH_KMALLOC:
kfree(ah->av);
break;
}
return 0;
}
int mthca_ah_grh_present(struct mthca_ah *ah)
{
return !!(ah->av->g_slid & 0x80);
}
int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
struct ib_ud_header *header)
{
if (ah->type == MTHCA_AH_ON_HCA)
return -EINVAL;
header->lrh.service_level = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
header->lrh.destination_lid = ah->av->dlid;
header->lrh.source_lid = cpu_to_be16(ah->av->g_slid & 0x7f);
if (mthca_ah_grh_present(ah)) {
header->grh.traffic_class =
(be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff;
header->grh.flow_label =
ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff);
header->grh.hop_limit = ah->av->hop_limit;
ib_get_cached_gid(&dev->ib_dev,
be32_to_cpu(ah->av->port_pd) >> 24,
ah->av->gid_index % dev->limits.gid_table_len,
&header->grh.source_gid, NULL);
memcpy(header->grh.destination_gid.raw,
ah->av->dgid, 16);
}
return 0;
}
int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr)
{
struct mthca_ah *ah = to_mah(ibah);
struct mthca_dev *dev = to_mdev(ibah->device);
/* Only implement for MAD and memfree ah for now. */
if (ah->type == MTHCA_AH_ON_HCA)
return -ENOSYS;
memset(attr, 0, sizeof *attr);
attr->dlid = be16_to_cpu(ah->av->dlid);
attr->sl = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
attr->port_num = be32_to_cpu(ah->av->port_pd) >> 24;
attr->static_rate = mthca_rate_to_ib(dev, ah->av->msg_sr & 0x7,
attr->port_num);
attr->src_path_bits = ah->av->g_slid & 0x7F;
attr->ah_flags = mthca_ah_grh_present(ah) ? IB_AH_GRH : 0;
if (attr->ah_flags) {
attr->grh.traffic_class =
be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20;
attr->grh.flow_label =
be32_to_cpu(ah->av->sl_tclass_flowlabel) & 0xfffff;
attr->grh.hop_limit = ah->av->hop_limit;
attr->grh.sgid_index = ah->av->gid_index &
(dev->limits.gid_table_len - 1);
memcpy(attr->grh.dgid.raw, ah->av->dgid, 16);
}
return 0;
}
int mthca_init_av_table(struct mthca_dev *dev)
{
int err;
if (mthca_is_memfree(dev))
return 0;
err = mthca_alloc_init(&dev->av_table.alloc,
dev->av_table.num_ddr_avs,
dev->av_table.num_ddr_avs - 1,
0);
if (err)
return err;
dev->av_table.pool = pci_pool_create("mthca_av", dev->pdev,
MTHCA_AV_SIZE,
MTHCA_AV_SIZE, 0);
if (!dev->av_table.pool)
goto out_free_alloc;
if (!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
dev->av_table.av_map = ioremap(pci_resource_start(dev->pdev, 4) +
dev->av_table.ddr_av_base -
dev->ddr_start,
dev->av_table.num_ddr_avs *
MTHCA_AV_SIZE);
if (!dev->av_table.av_map)
goto out_free_pool;
} else
dev->av_table.av_map = NULL;
return 0;
out_free_pool:
pci_pool_destroy(dev->av_table.pool);
out_free_alloc:
mthca_alloc_cleanup(&dev->av_table.alloc);
return -ENOMEM;
}
void mthca_cleanup_av_table(struct mthca_dev *dev)
{
if (mthca_is_memfree(dev))
return;
if (dev->av_table.av_map)
iounmap(dev->av_table.av_map);
pci_pool_destroy(dev->av_table.pool);
mthca_alloc_cleanup(&dev->av_table.alloc);
}

202
sys/dev/mthca/mthca_catas.c Normal file
View File

@ -0,0 +1,202 @@
/*
* Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define LINUXKPI_PARAM_PREFIX mthca_
#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
#include "mthca_dev.h"
#define MTHCA_CATAS_POLL_INTERVAL (5 * HZ)
enum {
MTHCA_CATAS_TYPE_INTERNAL = 0,
MTHCA_CATAS_TYPE_UPLINK = 3,
MTHCA_CATAS_TYPE_DDR = 4,
MTHCA_CATAS_TYPE_PARITY = 5,
};
static DEFINE_SPINLOCK(catas_lock);
static LIST_HEAD(catas_list);
static struct workqueue_struct *catas_wq;
static struct work_struct catas_work;
static int catas_reset_disable;
module_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero");
static void catas_reset(struct work_struct *work)
{
struct mthca_dev *dev, *tmpdev;
LIST_HEAD(tlist);
int ret;
mutex_lock(&mthca_device_mutex);
spin_lock_irq(&catas_lock);
list_splice_init(&catas_list, &tlist);
spin_unlock_irq(&catas_lock);
list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
struct pci_dev *pdev = dev->pdev;
ret = __mthca_restart_one(dev->pdev);
/* 'dev' now is not valid */
if (ret)
printk(KERN_ERR "mthca %s: Reset failed (%d)\n",
pci_name(pdev), ret);
else {
struct mthca_dev *d = pci_get_drvdata(pdev);
mthca_dbg(d, "Reset succeeded\n");
}
}
mutex_unlock(&mthca_device_mutex);
}
static void handle_catas(struct mthca_dev *dev)
{
struct ib_event event;
unsigned long flags;
const char *type;
int i;
event.device = &dev->ib_dev;
event.event = IB_EVENT_DEVICE_FATAL;
event.element.port_num = 0;
dev->active = false;
ib_dispatch_event(&event);
switch (swab32(readl(dev->catas_err.map)) >> 24) {
case MTHCA_CATAS_TYPE_INTERNAL:
type = "internal error";
break;
case MTHCA_CATAS_TYPE_UPLINK:
type = "uplink bus error";
break;
case MTHCA_CATAS_TYPE_DDR:
type = "DDR data error";
break;
case MTHCA_CATAS_TYPE_PARITY:
type = "internal parity error";
break;
default:
type = "unknown error";
break;
}
mthca_err(dev, "Catastrophic error detected: %s\n", type);
for (i = 0; i < dev->catas_err.size; ++i)
mthca_err(dev, " buf[%02x]: %08x\n",
i, swab32(readl(dev->catas_err.map + i)));
if (catas_reset_disable)
return;
spin_lock_irqsave(&catas_lock, flags);
list_add(&dev->catas_err.list, &catas_list);
queue_work(catas_wq, &catas_work);
spin_unlock_irqrestore(&catas_lock, flags);
}
static void poll_catas(unsigned long dev_ptr)
{
struct mthca_dev *dev = (struct mthca_dev *) dev_ptr;
int i;
for (i = 0; i < dev->catas_err.size; ++i)
if (readl(dev->catas_err.map + i)) {
handle_catas(dev);
return;
}
mod_timer(&dev->catas_err.timer,
round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL));
}
void mthca_start_catas_poll(struct mthca_dev *dev)
{
phys_addr_t addr;
init_timer(&dev->catas_err.timer);
dev->catas_err.map = NULL;
addr = pci_resource_start(dev->pdev, 0) +
((pci_resource_len(dev->pdev, 0) - 1) &
dev->catas_err.addr);
dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4);
if (!dev->catas_err.map) {
mthca_warn(dev, "couldn't map catastrophic error region "
"at 0x%llx/0x%x\n", (unsigned long long) addr,
dev->catas_err.size * 4);
return;
}
dev->catas_err.timer.data = (unsigned long) dev;
dev->catas_err.timer.function = poll_catas;
dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL;
INIT_LIST_HEAD(&dev->catas_err.list);
add_timer(&dev->catas_err.timer);
}
void mthca_stop_catas_poll(struct mthca_dev *dev)
{
del_timer_sync(&dev->catas_err.timer);
if (dev->catas_err.map)
iounmap(dev->catas_err.map);
spin_lock_irq(&catas_lock);
list_del(&dev->catas_err.list);
spin_unlock_irq(&catas_lock);
}
int __init mthca_catas_init(void)
{
INIT_WORK(&catas_work, catas_reset);
catas_wq = alloc_ordered_workqueue("mthca_catas", WQ_MEM_RECLAIM);
if (!catas_wq)
return -ENOMEM;
return 0;
}
void mthca_catas_cleanup(void)
{
destroy_workqueue(catas_wq);
}

1968
sys/dev/mthca/mthca_cmd.c Normal file

File diff suppressed because it is too large Load Diff

325
sys/dev/mthca/mthca_cmd.h Normal file
View File

@ -0,0 +1,325 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MTHCA_CMD_H
#define MTHCA_CMD_H
#include <rdma/ib_verbs.h>
#define MTHCA_MAILBOX_SIZE 4096
enum {
/* command completed successfully: */
MTHCA_CMD_STAT_OK = 0x00,
/* Internal error (such as a bus error) occurred while processing command: */
MTHCA_CMD_STAT_INTERNAL_ERR = 0x01,
/* Operation/command not supported or opcode modifier not supported: */
MTHCA_CMD_STAT_BAD_OP = 0x02,
/* Parameter not supported or parameter out of range: */
MTHCA_CMD_STAT_BAD_PARAM = 0x03,
/* System not enabled or bad system state: */
MTHCA_CMD_STAT_BAD_SYS_STATE = 0x04,
/* Attempt to access reserved or unallocaterd resource: */
MTHCA_CMD_STAT_BAD_RESOURCE = 0x05,
/* Requested resource is currently executing a command, or is otherwise busy: */
MTHCA_CMD_STAT_RESOURCE_BUSY = 0x06,
/* memory error: */
MTHCA_CMD_STAT_DDR_MEM_ERR = 0x07,
/* Required capability exceeds device limits: */
MTHCA_CMD_STAT_EXCEED_LIM = 0x08,
/* Resource is not in the appropriate state or ownership: */
MTHCA_CMD_STAT_BAD_RES_STATE = 0x09,
/* Index out of range: */
MTHCA_CMD_STAT_BAD_INDEX = 0x0a,
/* FW image corrupted: */
MTHCA_CMD_STAT_BAD_NVMEM = 0x0b,
/* Attempt to modify a QP/EE which is not in the presumed state: */
MTHCA_CMD_STAT_BAD_QPEE_STATE = 0x10,
/* Bad segment parameters (Address/Size): */
MTHCA_CMD_STAT_BAD_SEG_PARAM = 0x20,
/* Memory Region has Memory Windows bound to: */
MTHCA_CMD_STAT_REG_BOUND = 0x21,
/* HCA local attached memory not present: */
MTHCA_CMD_STAT_LAM_NOT_PRE = 0x22,
/* Bad management packet (silently discarded): */
MTHCA_CMD_STAT_BAD_PKT = 0x30,
/* More outstanding CQEs in CQ than new CQ size: */
MTHCA_CMD_STAT_BAD_SIZE = 0x40
};
enum {
MTHCA_TRANS_INVALID = 0,
MTHCA_TRANS_RST2INIT,
MTHCA_TRANS_INIT2INIT,
MTHCA_TRANS_INIT2RTR,
MTHCA_TRANS_RTR2RTS,
MTHCA_TRANS_RTS2RTS,
MTHCA_TRANS_SQERR2RTS,
MTHCA_TRANS_ANY2ERR,
MTHCA_TRANS_RTS2SQD,
MTHCA_TRANS_SQD2SQD,
MTHCA_TRANS_SQD2RTS,
MTHCA_TRANS_ANY2RST,
};
enum {
DEV_LIM_FLAG_RC = 1 << 0,
DEV_LIM_FLAG_UC = 1 << 1,
DEV_LIM_FLAG_UD = 1 << 2,
DEV_LIM_FLAG_RD = 1 << 3,
DEV_LIM_FLAG_RAW_IPV6 = 1 << 4,
DEV_LIM_FLAG_RAW_ETHER = 1 << 5,
DEV_LIM_FLAG_SRQ = 1 << 6,
DEV_LIM_FLAG_IPOIB_CSUM = 1 << 7,
DEV_LIM_FLAG_BAD_PKEY_CNTR = 1 << 8,
DEV_LIM_FLAG_BAD_QKEY_CNTR = 1 << 9,
DEV_LIM_FLAG_MW = 1 << 16,
DEV_LIM_FLAG_AUTO_PATH_MIG = 1 << 17,
DEV_LIM_FLAG_ATOMIC = 1 << 18,
DEV_LIM_FLAG_RAW_MULTI = 1 << 19,
DEV_LIM_FLAG_UD_AV_PORT_ENFORCE = 1 << 20,
DEV_LIM_FLAG_UD_MULTI = 1 << 21,
};
struct mthca_mailbox {
dma_addr_t dma;
void *buf;
};
struct mthca_dev_lim {
int max_srq_sz;
int max_qp_sz;
int reserved_qps;
int max_qps;
int reserved_srqs;
int max_srqs;
int reserved_eecs;
int max_eecs;
int max_cq_sz;
int reserved_cqs;
int max_cqs;
int max_mpts;
int reserved_eqs;
int max_eqs;
int reserved_mtts;
int max_mrw_sz;
int reserved_mrws;
int max_mtt_seg;
int max_requester_per_qp;
int max_responder_per_qp;
int max_rdma_global;
int local_ca_ack_delay;
int max_mtu;
int max_port_width;
int max_vl;
int num_ports;
int max_gids;
u16 stat_rate_support;
int max_pkeys;
u32 flags;
int reserved_uars;
int uar_size;
int min_page_sz;
int max_sg;
int max_desc_sz;
int max_qp_per_mcg;
int reserved_mgms;
int max_mcgs;
int reserved_pds;
int max_pds;
int reserved_rdds;
int max_rdds;
int eec_entry_sz;
int qpc_entry_sz;
int eeec_entry_sz;
int eqpc_entry_sz;
int eqc_entry_sz;
int cqc_entry_sz;
int srq_entry_sz;
int uar_scratch_entry_sz;
int mpt_entry_sz;
union {
struct {
int max_avs;
} tavor;
struct {
int resize_srq;
int max_pbl_sz;
u8 bmme_flags;
u32 reserved_lkey;
int lam_required;
u64 max_icm_sz;
} arbel;
} hca;
};
struct mthca_adapter {
u32 vendor_id;
u32 device_id;
u32 revision_id;
char board_id[MTHCA_BOARD_ID_LEN];
u8 inta_pin;
};
struct mthca_init_hca_param {
u64 qpc_base;
u64 eec_base;
u64 srqc_base;
u64 cqc_base;
u64 eqpc_base;
u64 eeec_base;
u64 eqc_base;
u64 rdb_base;
u64 mc_base;
u64 mpt_base;
u64 mtt_base;
u64 uar_scratch_base;
u64 uarc_base;
u16 log_mc_entry_sz;
u16 mc_hash_sz;
u8 log_num_qps;
u8 log_num_eecs;
u8 log_num_srqs;
u8 log_num_cqs;
u8 log_num_eqs;
u8 log_mc_table_sz;
u8 mtt_seg_sz;
u8 log_mpt_sz;
u8 log_uar_sz;
u8 log_uarc_sz;
};
struct mthca_init_ib_param {
int port_width;
int vl_cap;
int mtu_cap;
u16 gid_cap;
u16 pkey_cap;
int set_guid0;
u64 guid0;
int set_node_guid;
u64 node_guid;
int set_si_guid;
u64 si_guid;
};
struct mthca_set_ib_param {
int set_si_guid;
int reset_qkey_viol;
u64 si_guid;
u32 cap_mask;
};
int mthca_cmd_init(struct mthca_dev *dev);
void mthca_cmd_cleanup(struct mthca_dev *dev);
int mthca_cmd_use_events(struct mthca_dev *dev);
void mthca_cmd_use_polling(struct mthca_dev *dev);
void mthca_cmd_event(struct mthca_dev *dev, u16 token,
u8 status, u64 out_param);
struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
gfp_t gfp_mask);
void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox);
int mthca_SYS_EN(struct mthca_dev *dev);
int mthca_SYS_DIS(struct mthca_dev *dev);
int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm);
int mthca_UNMAP_FA(struct mthca_dev *dev);
int mthca_RUN_FW(struct mthca_dev *dev);
int mthca_QUERY_FW(struct mthca_dev *dev);
int mthca_ENABLE_LAM(struct mthca_dev *dev);
int mthca_DISABLE_LAM(struct mthca_dev *dev);
int mthca_QUERY_DDR(struct mthca_dev *dev);
int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
struct mthca_dev_lim *dev_lim);
int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
struct mthca_adapter *adapter);
int mthca_INIT_HCA(struct mthca_dev *dev,
struct mthca_init_hca_param *param);
int mthca_INIT_IB(struct mthca_dev *dev,
struct mthca_init_ib_param *param,
int port);
int mthca_CLOSE_IB(struct mthca_dev *dev, int port);
int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic);
int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
int port);
int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt);
int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt);
int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count);
int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm);
int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev);
int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages);
int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int mpt_index);
int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int mpt_index);
int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int num_mtt);
int mthca_SYNC_TPT(struct mthca_dev *dev);
int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
int eq_num);
int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int eq_num);
int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int eq_num);
int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int cq_num);
int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int cq_num);
int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size);
int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int srq_num);
int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int srq_num);
int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num,
struct mthca_mailbox *mailbox);
int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit);
int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
enum ib_qp_state next, u32 num, int is_ee,
struct mthca_mailbox *mailbox, u32 optmask);
int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
struct mthca_mailbox *mailbox);
int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn);
int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
int mthca_READ_MGM(struct mthca_dev *dev, int index,
struct mthca_mailbox *mailbox);
int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
struct mthca_mailbox *mailbox);
int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
u16 *hash);
int mthca_NOP(struct mthca_dev *dev);
#endif /* MTHCA_CMD_H */

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MTHCA_CONFIG_REG_H
#define MTHCA_CONFIG_REG_H
#define MTHCA_HCR_BASE 0x80680
#define MTHCA_HCR_SIZE 0x0001c
#define MTHCA_ECR_BASE 0x80700
#define MTHCA_ECR_SIZE 0x00008
#define MTHCA_ECR_CLR_BASE 0x80708
#define MTHCA_ECR_CLR_SIZE 0x00008
#define MTHCA_MAP_ECR_SIZE (MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE)
#define MTHCA_CLR_INT_BASE 0xf00d8
#define MTHCA_CLR_INT_SIZE 0x00008
#define MTHCA_EQ_SET_CI_SIZE (8 * 32)
#endif /* MTHCA_CONFIG_REG_H */

981
sys/dev/mthca/mthca_cq.c Normal file
View File

@ -0,0 +1,981 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/gfp.h>
#include <linux/hardirq.h>
#include <linux/sched.h>
#include <asm/io.h>
#include <rdma/ib_pack.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
#include "mthca_memfree.h"
enum {
MTHCA_MAX_DIRECT_CQ_SIZE = 4 * PAGE_SIZE
};
enum {
MTHCA_CQ_ENTRY_SIZE = 0x20
};
enum {
MTHCA_ATOMIC_BYTE_LEN = 8
};
/*
* Must be packed because start is 64 bits but only aligned to 32 bits.
*/
struct mthca_cq_context {
__be32 flags;
__be64 start;
__be32 logsize_usrpage;
__be32 error_eqn; /* Tavor only */
__be32 comp_eqn;
__be32 pd;
__be32 lkey;
__be32 last_notified_index;
__be32 solicit_producer_index;
__be32 consumer_index;
__be32 producer_index;
__be32 cqn;
__be32 ci_db; /* Arbel only */
__be32 state_db; /* Arbel only */
u32 reserved;
} __attribute__((packed));
#define MTHCA_CQ_STATUS_OK ( 0 << 28)
#define MTHCA_CQ_STATUS_OVERFLOW ( 9 << 28)
#define MTHCA_CQ_STATUS_WRITE_FAIL (10 << 28)
#define MTHCA_CQ_FLAG_TR ( 1 << 18)
#define MTHCA_CQ_FLAG_OI ( 1 << 17)
#define MTHCA_CQ_STATE_DISARMED ( 0 << 8)
#define MTHCA_CQ_STATE_ARMED ( 1 << 8)
#define MTHCA_CQ_STATE_ARMED_SOL ( 4 << 8)
#define MTHCA_EQ_STATE_FIRED (10 << 8)
enum {
MTHCA_ERROR_CQE_OPCODE_MASK = 0xfe
};
enum {
SYNDROME_LOCAL_LENGTH_ERR = 0x01,
SYNDROME_LOCAL_QP_OP_ERR = 0x02,
SYNDROME_LOCAL_EEC_OP_ERR = 0x03,
SYNDROME_LOCAL_PROT_ERR = 0x04,
SYNDROME_WR_FLUSH_ERR = 0x05,
SYNDROME_MW_BIND_ERR = 0x06,
SYNDROME_BAD_RESP_ERR = 0x10,
SYNDROME_LOCAL_ACCESS_ERR = 0x11,
SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12,
SYNDROME_REMOTE_ACCESS_ERR = 0x13,
SYNDROME_REMOTE_OP_ERR = 0x14,
SYNDROME_RETRY_EXC_ERR = 0x15,
SYNDROME_RNR_RETRY_EXC_ERR = 0x16,
SYNDROME_LOCAL_RDD_VIOL_ERR = 0x20,
SYNDROME_REMOTE_INVAL_RD_REQ_ERR = 0x21,
SYNDROME_REMOTE_ABORTED_ERR = 0x22,
SYNDROME_INVAL_EECN_ERR = 0x23,
SYNDROME_INVAL_EEC_STATE_ERR = 0x24
};
struct mthca_cqe {
__be32 my_qpn;
__be32 my_ee;
__be32 rqpn;
u8 sl_ipok;
u8 g_mlpath;
__be16 rlid;
__be32 imm_etype_pkey_eec;
__be32 byte_cnt;
__be32 wqe;
u8 opcode;
u8 is_send;
u8 reserved;
u8 owner;
};
struct mthca_err_cqe {
__be32 my_qpn;
u32 reserved1[3];
u8 syndrome;
u8 vendor_err;
__be16 db_cnt;
u32 reserved2;
__be32 wqe;
u8 opcode;
u8 reserved3[2];
u8 owner;
};
#define MTHCA_CQ_ENTRY_OWNER_SW (0 << 7)
#define MTHCA_CQ_ENTRY_OWNER_HW (1 << 7)
#define MTHCA_TAVOR_CQ_DB_INC_CI (1 << 24)
#define MTHCA_TAVOR_CQ_DB_REQ_NOT (2 << 24)
#define MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL (3 << 24)
#define MTHCA_TAVOR_CQ_DB_SET_CI (4 << 24)
#define MTHCA_TAVOR_CQ_DB_REQ_NOT_MULT (5 << 24)
#define MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL (1 << 24)
#define MTHCA_ARBEL_CQ_DB_REQ_NOT (2 << 24)
#define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24)
static inline struct mthca_cqe *get_cqe_from_buf(struct mthca_cq_buf *buf,
int entry)
{
if (buf->is_direct)
return buf->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE);
else
return buf->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf
+ (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE;
}
static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
{
return get_cqe_from_buf(&cq->buf, entry);
}
static inline struct mthca_cqe *cqe_sw(struct mthca_cqe *cqe)
{
return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe;
}
static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq)
{
return cqe_sw(get_cqe(cq, cq->cons_index & cq->ibcq.cqe));
}
static inline void set_cqe_hw(struct mthca_cqe *cqe)
{
cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW;
}
static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr)
{
__be32 *cqe = cqe_ptr;
(void) cqe; /* avoid warning if mthca_dbg compiled away... */
mthca_dbg(dev, "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
be32_to_cpu(cqe[0]), be32_to_cpu(cqe[1]), be32_to_cpu(cqe[2]),
be32_to_cpu(cqe[3]), be32_to_cpu(cqe[4]), be32_to_cpu(cqe[5]),
be32_to_cpu(cqe[6]), be32_to_cpu(cqe[7]));
}
/*
* incr is ignored in native Arbel (mem-free) mode, so cq->cons_index
* should be correct before calling update_cons_index().
*/
static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq,
int incr)
{
if (mthca_is_memfree(dev)) {
*cq->set_ci_db = cpu_to_be32(cq->cons_index);
wmb();
} else {
mthca_write64(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn, incr - 1,
dev->kar + MTHCA_CQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
/*
* Make sure doorbells don't leak out of CQ spinlock
* and reach the HCA out of order:
*/
mmiowb();
}
}
void mthca_cq_completion(struct mthca_dev *dev, u32 cqn)
{
struct mthca_cq *cq;
cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1));
if (!cq) {
mthca_warn(dev, "Completion event for bogus CQ %08x\n", cqn);
return;
}
++cq->arm_sn;
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
enum ib_event_type event_type)
{
struct mthca_cq *cq;
struct ib_event event;
spin_lock(&dev->cq_table.lock);
cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1));
if (cq)
++cq->refcount;
spin_unlock(&dev->cq_table.lock);
if (!cq) {
mthca_warn(dev, "Async event for bogus CQ %08x\n", cqn);
return;
}
event.device = &dev->ib_dev;
event.event = event_type;
event.element.cq = &cq->ibcq;
if (cq->ibcq.event_handler)
cq->ibcq.event_handler(&event, cq->ibcq.cq_context);
spin_lock(&dev->cq_table.lock);
if (!--cq->refcount)
wake_up(&cq->wait);
spin_unlock(&dev->cq_table.lock);
}
static inline int is_recv_cqe(struct mthca_cqe *cqe)
{
if ((cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
MTHCA_ERROR_CQE_OPCODE_MASK)
return !(cqe->opcode & 0x01);
else
return !(cqe->is_send & 0x80);
}
void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
struct mthca_srq *srq)
{
struct mthca_cqe *cqe;
u32 prod_index;
int i, nfreed = 0;
spin_lock_irq(&cq->lock);
/*
* First we need to find the current producer index, so we
* know where to start cleaning from. It doesn't matter if HW
* adds new entries after this loop -- the QP we're worried
* about is already in RESET, so the new entries won't come
* from our QP and therefore don't need to be checked.
*/
for (prod_index = cq->cons_index;
cqe_sw(get_cqe(cq, prod_index & cq->ibcq.cqe));
++prod_index)
if (prod_index == cq->cons_index + cq->ibcq.cqe)
break;
if (0)
mthca_dbg(dev, "Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n",
qpn, cq->cqn, cq->cons_index, prod_index);
/*
* Now sweep backwards through the CQ, removing CQ entries
* that match our QP by copying older entries on top of them.
*/
while ((int) --prod_index - (int) cq->cons_index >= 0) {
cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
if (cqe->my_qpn == cpu_to_be32(qpn)) {
if (srq && is_recv_cqe(cqe))
mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe));
++nfreed;
} else if (nfreed)
memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe),
cqe, MTHCA_CQ_ENTRY_SIZE);
}
if (nfreed) {
for (i = 0; i < nfreed; ++i)
set_cqe_hw(get_cqe(cq, (cq->cons_index + i) & cq->ibcq.cqe));
wmb();
cq->cons_index += nfreed;
update_cons_index(dev, cq, nfreed);
}
spin_unlock_irq(&cq->lock);
}
void mthca_cq_resize_copy_cqes(struct mthca_cq *cq)
{
int i;
/*
* In Tavor mode, the hardware keeps the consumer and producer
* indices mod the CQ size. Since we might be making the CQ
* bigger, we need to deal with the case where the producer
* index wrapped around before the CQ was resized.
*/
if (!mthca_is_memfree(to_mdev(cq->ibcq.device)) &&
cq->ibcq.cqe < cq->resize_buf->cqe) {
cq->cons_index &= cq->ibcq.cqe;
if (cqe_sw(get_cqe(cq, cq->ibcq.cqe)))
cq->cons_index -= cq->ibcq.cqe + 1;
}
for (i = cq->cons_index; cqe_sw(get_cqe(cq, i & cq->ibcq.cqe)); ++i)
memcpy(get_cqe_from_buf(&cq->resize_buf->buf,
i & cq->resize_buf->cqe),
get_cqe(cq, i & cq->ibcq.cqe), MTHCA_CQ_ENTRY_SIZE);
}
int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent)
{
int ret;
int i;
ret = mthca_buf_alloc(dev, nent * MTHCA_CQ_ENTRY_SIZE,
MTHCA_MAX_DIRECT_CQ_SIZE,
&buf->queue, &buf->is_direct,
&dev->driver_pd, 1, &buf->mr);
if (ret)
return ret;
for (i = 0; i < nent; ++i)
set_cqe_hw(get_cqe_from_buf(buf, i));
return 0;
}
void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe)
{
mthca_buf_free(dev, (cqe + 1) * MTHCA_CQ_ENTRY_SIZE, &buf->queue,
buf->is_direct, &buf->mr);
}
static void handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
struct mthca_qp *qp, int wqe_index, int is_send,
struct mthca_err_cqe *cqe,
struct ib_wc *entry, int *free_cqe)
{
int dbd;
__be32 new_wqe;
if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) {
mthca_dbg(dev, "local QP operation err "
"(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n",
be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe),
cq->cqn, cq->cons_index);
dump_cqe(dev, cqe);
}
/*
* For completions in error, only work request ID, status, vendor error
* (and freed resource count for RD) have to be set.
*/
switch (cqe->syndrome) {
case SYNDROME_LOCAL_LENGTH_ERR:
entry->status = IB_WC_LOC_LEN_ERR;
break;
case SYNDROME_LOCAL_QP_OP_ERR:
entry->status = IB_WC_LOC_QP_OP_ERR;
break;
case SYNDROME_LOCAL_EEC_OP_ERR:
entry->status = IB_WC_LOC_EEC_OP_ERR;
break;
case SYNDROME_LOCAL_PROT_ERR:
entry->status = IB_WC_LOC_PROT_ERR;
break;
case SYNDROME_WR_FLUSH_ERR:
entry->status = IB_WC_WR_FLUSH_ERR;
break;
case SYNDROME_MW_BIND_ERR:
entry->status = IB_WC_MW_BIND_ERR;
break;
case SYNDROME_BAD_RESP_ERR:
entry->status = IB_WC_BAD_RESP_ERR;
break;
case SYNDROME_LOCAL_ACCESS_ERR:
entry->status = IB_WC_LOC_ACCESS_ERR;
break;
case SYNDROME_REMOTE_INVAL_REQ_ERR:
entry->status = IB_WC_REM_INV_REQ_ERR;
break;
case SYNDROME_REMOTE_ACCESS_ERR:
entry->status = IB_WC_REM_ACCESS_ERR;
break;
case SYNDROME_REMOTE_OP_ERR:
entry->status = IB_WC_REM_OP_ERR;
break;
case SYNDROME_RETRY_EXC_ERR:
entry->status = IB_WC_RETRY_EXC_ERR;
break;
case SYNDROME_RNR_RETRY_EXC_ERR:
entry->status = IB_WC_RNR_RETRY_EXC_ERR;
break;
case SYNDROME_LOCAL_RDD_VIOL_ERR:
entry->status = IB_WC_LOC_RDD_VIOL_ERR;
break;
case SYNDROME_REMOTE_INVAL_RD_REQ_ERR:
entry->status = IB_WC_REM_INV_RD_REQ_ERR;
break;
case SYNDROME_REMOTE_ABORTED_ERR:
entry->status = IB_WC_REM_ABORT_ERR;
break;
case SYNDROME_INVAL_EECN_ERR:
entry->status = IB_WC_INV_EECN_ERR;
break;
case SYNDROME_INVAL_EEC_STATE_ERR:
entry->status = IB_WC_INV_EEC_STATE_ERR;
break;
default:
entry->status = IB_WC_GENERAL_ERR;
break;
}
entry->vendor_err = cqe->vendor_err;
/*
* Mem-free HCAs always generate one CQE per WQE, even in the
* error case, so we don't have to check the doorbell count, etc.
*/
if (mthca_is_memfree(dev))
return;
mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe);
/*
* If we're at the end of the WQE chain, or we've used up our
* doorbell count, free the CQE. Otherwise just update it for
* the next poll operation.
*/
if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd))
return;
be16_add_cpu(&cqe->db_cnt, -dbd);
cqe->wqe = new_wqe;
cqe->syndrome = SYNDROME_WR_FLUSH_ERR;
*free_cqe = 0;
}
static inline int mthca_poll_one(struct mthca_dev *dev,
struct mthca_cq *cq,
struct mthca_qp **cur_qp,
int *freed,
struct ib_wc *entry)
{
struct mthca_wq *wq;
struct mthca_cqe *cqe;
int wqe_index;
int is_error;
int is_send;
int free_cqe = 1;
int err = 0;
u16 checksum;
cqe = next_cqe_sw(cq);
if (!cqe)
return -EAGAIN;
/*
* Make sure we read CQ entry contents after we've checked the
* ownership bit.
*/
rmb();
if (0) {
mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n",
cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
be32_to_cpu(cqe->wqe));
dump_cqe(dev, cqe);
}
is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
MTHCA_ERROR_CQE_OPCODE_MASK;
is_send = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80;
if (!*cur_qp || be32_to_cpu(cqe->my_qpn) != (*cur_qp)->qpn) {
/*
* We do not have to take the QP table lock here,
* because CQs will be locked while QPs are removed
* from the table.
*/
*cur_qp = mthca_array_get(&dev->qp_table.qp,
be32_to_cpu(cqe->my_qpn) &
(dev->limits.num_qps - 1));
if (!*cur_qp) {
mthca_warn(dev, "CQ entry for unknown QP %06x\n",
be32_to_cpu(cqe->my_qpn) & 0xffffff);
err = -EINVAL;
goto out;
}
}
entry->qp = &(*cur_qp)->ibqp;
if (is_send) {
wq = &(*cur_qp)->sq;
wqe_index = ((be32_to_cpu(cqe->wqe) - (*cur_qp)->send_wqe_offset)
>> wq->wqe_shift);
entry->wr_id = (*cur_qp)->wrid[wqe_index +
(*cur_qp)->rq.max];
} else if ((*cur_qp)->ibqp.srq) {
struct mthca_srq *srq = to_msrq((*cur_qp)->ibqp.srq);
u32 wqe = be32_to_cpu(cqe->wqe);
wq = NULL;
wqe_index = wqe >> srq->wqe_shift;
entry->wr_id = srq->wrid[wqe_index];
mthca_free_srq_wqe(srq, wqe);
} else {
s32 wqe;
wq = &(*cur_qp)->rq;
wqe = be32_to_cpu(cqe->wqe);
wqe_index = wqe >> wq->wqe_shift;
/*
* WQE addr == base - 1 might be reported in receive completion
* with error instead of (rq size - 1) by Sinai FW 1.0.800 and
* Arbel FW 5.1.400. This bug should be fixed in later FW revs.
*/
if (unlikely(wqe_index < 0))
wqe_index = wq->max - 1;
entry->wr_id = (*cur_qp)->wrid[wqe_index];
}
if (wq) {
if (wq->last_comp < wqe_index)
wq->tail += wqe_index - wq->last_comp;
else
wq->tail += wqe_index + wq->max - wq->last_comp;
wq->last_comp = wqe_index;
}
if (is_error) {
handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send,
(struct mthca_err_cqe *) cqe,
entry, &free_cqe);
goto out;
}
if (is_send) {
entry->wc_flags = 0;
switch (cqe->opcode) {
case MTHCA_OPCODE_RDMA_WRITE:
entry->opcode = IB_WC_RDMA_WRITE;
break;
case MTHCA_OPCODE_RDMA_WRITE_IMM:
entry->opcode = IB_WC_RDMA_WRITE;
entry->wc_flags |= IB_WC_WITH_IMM;
break;
case MTHCA_OPCODE_SEND:
entry->opcode = IB_WC_SEND;
break;
case MTHCA_OPCODE_SEND_IMM:
entry->opcode = IB_WC_SEND;
entry->wc_flags |= IB_WC_WITH_IMM;
break;
case MTHCA_OPCODE_RDMA_READ:
entry->opcode = IB_WC_RDMA_READ;
entry->byte_len = be32_to_cpu(cqe->byte_cnt);
break;
case MTHCA_OPCODE_ATOMIC_CS:
entry->opcode = IB_WC_COMP_SWAP;
entry->byte_len = MTHCA_ATOMIC_BYTE_LEN;
break;
case MTHCA_OPCODE_ATOMIC_FA:
entry->opcode = IB_WC_FETCH_ADD;
entry->byte_len = MTHCA_ATOMIC_BYTE_LEN;
break;
default:
entry->opcode = MTHCA_OPCODE_INVALID;
break;
}
} else {
entry->byte_len = be32_to_cpu(cqe->byte_cnt);
switch (cqe->opcode & 0x1f) {
case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE:
case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE:
entry->wc_flags = IB_WC_WITH_IMM;
entry->ex.imm_data = cqe->imm_etype_pkey_eec;
entry->opcode = IB_WC_RECV;
break;
case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
entry->wc_flags = IB_WC_WITH_IMM;
entry->ex.imm_data = cqe->imm_etype_pkey_eec;
entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
break;
default:
entry->wc_flags = 0;
entry->opcode = IB_WC_RECV;
break;
}
entry->slid = be16_to_cpu(cqe->rlid);
entry->sl = cqe->sl_ipok >> 4;
entry->src_qp = be32_to_cpu(cqe->rqpn) & 0xffffff;
entry->dlid_path_bits = cqe->g_mlpath & 0x7f;
entry->pkey_index = be32_to_cpu(cqe->imm_etype_pkey_eec) >> 16;
entry->wc_flags |= cqe->g_mlpath & 0x80 ? IB_WC_GRH : 0;
checksum = (be32_to_cpu(cqe->rqpn) >> 24) |
((be32_to_cpu(cqe->my_ee) >> 16) & 0xff00);
entry->wc_flags |= (cqe->sl_ipok & 1 && checksum == 0xffff) ?
IB_WC_IP_CSUM_OK : 0;
}
entry->status = IB_WC_SUCCESS;
out:
if (likely(free_cqe)) {
set_cqe_hw(cqe);
++(*freed);
++cq->cons_index;
}
return err;
}
int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
struct ib_wc *entry)
{
struct mthca_dev *dev = to_mdev(ibcq->device);
struct mthca_cq *cq = to_mcq(ibcq);
struct mthca_qp *qp = NULL;
unsigned long flags;
int err = 0;
int freed = 0;
int npolled;
spin_lock_irqsave(&cq->lock, flags);
npolled = 0;
repoll:
while (npolled < num_entries) {
err = mthca_poll_one(dev, cq, &qp,
&freed, entry + npolled);
if (err)
break;
++npolled;
}
if (freed) {
wmb();
update_cons_index(dev, cq, freed);
}
/*
* If a CQ resize is in progress and we discovered that the
* old buffer is empty, then peek in the new buffer, and if
* it's not empty, switch to the new buffer and continue
* polling there.
*/
if (unlikely(err == -EAGAIN && cq->resize_buf &&
cq->resize_buf->state == CQ_RESIZE_READY)) {
/*
* In Tavor mode, the hardware keeps the producer
* index modulo the CQ size. Since we might be making
* the CQ bigger, we need to mask our consumer index
* using the size of the old CQ buffer before looking
* in the new CQ buffer.
*/
if (!mthca_is_memfree(dev))
cq->cons_index &= cq->ibcq.cqe;
if (cqe_sw(get_cqe_from_buf(&cq->resize_buf->buf,
cq->cons_index & cq->resize_buf->cqe))) {
struct mthca_cq_buf tbuf;
int tcqe;
tbuf = cq->buf;
tcqe = cq->ibcq.cqe;
cq->buf = cq->resize_buf->buf;
cq->ibcq.cqe = cq->resize_buf->cqe;
cq->resize_buf->buf = tbuf;
cq->resize_buf->cqe = tcqe;
cq->resize_buf->state = CQ_RESIZE_SWAPPED;
goto repoll;
}
}
spin_unlock_irqrestore(&cq->lock, flags);
return err == 0 || err == -EAGAIN ? npolled : err;
}
int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
{
u32 dbhi = ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
MTHCA_TAVOR_CQ_DB_REQ_NOT) |
to_mcq(cq)->cqn;
mthca_write64(dbhi, 0xffffffff, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock));
return 0;
}
int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct mthca_cq *cq = to_mcq(ibcq);
__be32 db_rec[2];
u32 dbhi;
u32 sn = cq->arm_sn & 3;
db_rec[0] = cpu_to_be32(cq->cons_index);
db_rec[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
((flags & IB_CQ_SOLICITED_MASK) ==
IB_CQ_SOLICITED ? 1 : 2));
mthca_write_db_rec(db_rec, cq->arm_db);
/*
* Make sure that the doorbell record in host memory is
* written before ringing the doorbell via PCI MMIO.
*/
wmb();
dbhi = (sn << 28) |
((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
MTHCA_ARBEL_CQ_DB_REQ_NOT) | cq->cqn;
mthca_write64(dbhi, cq->cons_index,
to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock));
return 0;
}
int mthca_init_cq(struct mthca_dev *dev, int nent,
struct mthca_ucontext *ctx, u32 pdn,
struct mthca_cq *cq)
{
struct mthca_mailbox *mailbox;
struct mthca_cq_context *cq_context;
int err = -ENOMEM;
cq->ibcq.cqe = nent - 1;
cq->is_kernel = !ctx;
cq->cqn = mthca_alloc(&dev->cq_table.alloc);
if (cq->cqn == -1)
return -ENOMEM;
if (mthca_is_memfree(dev)) {
err = mthca_table_get(dev, dev->cq_table.table, cq->cqn);
if (err)
goto err_out;
if (cq->is_kernel) {
cq->arm_sn = 1;
err = -ENOMEM;
cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
cq->cqn, &cq->set_ci_db);
if (cq->set_ci_db_index < 0)
goto err_out_icm;
cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
cq->cqn, &cq->arm_db);
if (cq->arm_db_index < 0)
goto err_out_ci;
}
}
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox))
goto err_out_arm;
cq_context = mailbox->buf;
if (cq->is_kernel) {
err = mthca_alloc_cq_buf(dev, &cq->buf, nent);
if (err)
goto err_out_mailbox;
}
spin_lock_init(&cq->lock);
cq->refcount = 1;
init_waitqueue_head(&cq->wait);
mutex_init(&cq->mutex);
memset(cq_context, 0, sizeof *cq_context);
cq_context->flags = cpu_to_be32(MTHCA_CQ_STATUS_OK |
MTHCA_CQ_STATE_DISARMED |
MTHCA_CQ_FLAG_TR);
cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
if (ctx)
cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index);
else
cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
cq_context->pd = cpu_to_be32(pdn);
cq_context->lkey = cpu_to_be32(cq->buf.mr.ibmr.lkey);
cq_context->cqn = cpu_to_be32(cq->cqn);
if (mthca_is_memfree(dev)) {
cq_context->ci_db = cpu_to_be32(cq->set_ci_db_index);
cq_context->state_db = cpu_to_be32(cq->arm_db_index);
}
err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn);
if (err) {
mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err);
goto err_out_free_mr;
}
spin_lock_irq(&dev->cq_table.lock);
if (mthca_array_set(&dev->cq_table.cq,
cq->cqn & (dev->limits.num_cqs - 1),
cq)) {
spin_unlock_irq(&dev->cq_table.lock);
goto err_out_free_mr;
}
spin_unlock_irq(&dev->cq_table.lock);
cq->cons_index = 0;
mthca_free_mailbox(dev, mailbox);
return 0;
err_out_free_mr:
if (cq->is_kernel)
mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
err_out_mailbox:
mthca_free_mailbox(dev, mailbox);
err_out_arm:
if (cq->is_kernel && mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
err_out_ci:
if (cq->is_kernel && mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
err_out_icm:
mthca_table_put(dev, dev->cq_table.table, cq->cqn);
err_out:
mthca_free(&dev->cq_table.alloc, cq->cqn);
return err;
}
static inline int get_cq_refcount(struct mthca_dev *dev, struct mthca_cq *cq)
{
int c;
spin_lock_irq(&dev->cq_table.lock);
c = cq->refcount;
spin_unlock_irq(&dev->cq_table.lock);
return c;
}
void mthca_free_cq(struct mthca_dev *dev,
struct mthca_cq *cq)
{
struct mthca_mailbox *mailbox;
int err;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox)) {
mthca_warn(dev, "No memory for mailbox to free CQ.\n");
return;
}
err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn);
if (err)
mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err);
if (0) {
__be32 *ctx = mailbox->buf;
int j;
printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n",
cq->cqn, cq->cons_index,
cq->is_kernel ? !!next_cqe_sw(cq) : 0);
for (j = 0; j < 16; ++j)
printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j]));
}
spin_lock_irq(&dev->cq_table.lock);
mthca_array_clear(&dev->cq_table.cq,
cq->cqn & (dev->limits.num_cqs - 1));
--cq->refcount;
spin_unlock_irq(&dev->cq_table.lock);
if (dev->mthca_flags & MTHCA_FLAG_MSI_X)
synchronize_irq(dev->eq_table.eq[MTHCA_EQ_COMP].msi_x_vector);
else
synchronize_irq(dev->pdev->irq);
wait_event(cq->wait, !get_cq_refcount(dev, cq));
if (cq->is_kernel) {
mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
if (mthca_is_memfree(dev)) {
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
}
}
mthca_table_put(dev, dev->cq_table.table, cq->cqn);
mthca_free(&dev->cq_table.alloc, cq->cqn);
mthca_free_mailbox(dev, mailbox);
}
int mthca_init_cq_table(struct mthca_dev *dev)
{
int err;
spin_lock_init(&dev->cq_table.lock);
err = mthca_alloc_init(&dev->cq_table.alloc,
dev->limits.num_cqs,
(1 << 24) - 1,
dev->limits.reserved_cqs);
if (err)
return err;
err = mthca_array_init(&dev->cq_table.cq,
dev->limits.num_cqs);
if (err)
mthca_alloc_cleanup(&dev->cq_table.alloc);
return err;
}
void mthca_cleanup_cq_table(struct mthca_dev *dev)
{
mthca_array_cleanup(&dev->cq_table.cq, dev->limits.num_cqs);
mthca_alloc_cleanup(&dev->cq_table.alloc);
}

599
sys/dev/mthca/mthca_dev.h Normal file
View File

@ -0,0 +1,599 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MTHCA_DEV_H
#define MTHCA_DEV_H
#include <linux/spinlock.h>
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/timer.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/semaphore.h>
#include "mthca_provider.h"
#include "mthca_doorbell.h"
#define DRV_NAME "ib_mthca"
#define PFX DRV_NAME ": "
#ifndef DRV_VERSION
#define DRV_VERSION "1.0"
#endif
#define DRV_RELDATE "April 4, 2008"
enum {
MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
MTHCA_FLAG_SRQ = 1 << 2,
MTHCA_FLAG_MSI_X = 1 << 3,
MTHCA_FLAG_NO_LAM = 1 << 4,
MTHCA_FLAG_FMR = 1 << 5,
MTHCA_FLAG_MEMFREE = 1 << 6,
MTHCA_FLAG_PCIE = 1 << 7,
MTHCA_FLAG_SINAI_OPT = 1 << 8
};
enum {
MTHCA_MAX_PORTS = 2
};
enum {
MTHCA_BOARD_ID_LEN = 64
};
enum {
MTHCA_EQ_CONTEXT_SIZE = 0x40,
MTHCA_CQ_CONTEXT_SIZE = 0x40,
MTHCA_QP_CONTEXT_SIZE = 0x200,
MTHCA_RDB_ENTRY_SIZE = 0x20,
MTHCA_AV_SIZE = 0x20,
MTHCA_MGM_ENTRY_SIZE = 0x100,
/* Arbel FW gives us these, but we need them for Tavor */
MTHCA_MPT_ENTRY_SIZE = 0x40,
MTHCA_MTT_SEG_SIZE = 0x40,
MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
};
enum {
MTHCA_EQ_CMD,
MTHCA_EQ_ASYNC,
MTHCA_EQ_COMP,
MTHCA_NUM_EQ
};
enum {
MTHCA_OPCODE_NOP = 0x00,
MTHCA_OPCODE_RDMA_WRITE = 0x08,
MTHCA_OPCODE_RDMA_WRITE_IMM = 0x09,
MTHCA_OPCODE_SEND = 0x0a,
MTHCA_OPCODE_SEND_IMM = 0x0b,
MTHCA_OPCODE_RDMA_READ = 0x10,
MTHCA_OPCODE_ATOMIC_CS = 0x11,
MTHCA_OPCODE_ATOMIC_FA = 0x12,
MTHCA_OPCODE_BIND_MW = 0x18,
MTHCA_OPCODE_INVALID = 0xff
};
enum {
MTHCA_CMD_USE_EVENTS = 1 << 0,
MTHCA_CMD_POST_DOORBELLS = 1 << 1
};
enum {
MTHCA_CMD_NUM_DBELL_DWORDS = 8
};
struct mthca_cmd {
struct pci_pool *pool;
struct mutex hcr_mutex;
struct semaphore poll_sem;
struct semaphore event_sem;
int max_cmds;
spinlock_t context_lock;
int free_head;
struct mthca_cmd_context *context;
u16 token_mask;
u32 flags;
void __iomem *dbell_map;
u16 dbell_offsets[MTHCA_CMD_NUM_DBELL_DWORDS];
};
struct mthca_limits {
int num_ports;
int vl_cap;
int mtu_cap;
int gid_table_len;
int pkey_table_len;
int local_ca_ack_delay;
int num_uars;
int max_sg;
int num_qps;
int max_wqes;
int max_desc_sz;
int max_qp_init_rdma;
int reserved_qps;
int num_srqs;
int max_srq_wqes;
int max_srq_sge;
int reserved_srqs;
int num_eecs;
int reserved_eecs;
int num_cqs;
int max_cqes;
int reserved_cqs;
int num_eqs;
int reserved_eqs;
int num_mpts;
int num_mtt_segs;
int mtt_seg_size;
int fmr_reserved_mtts;
int reserved_mtts;
int reserved_mrws;
int reserved_uars;
int num_mgms;
int num_amgms;
int reserved_mcgs;
int num_pds;
int reserved_pds;
u32 page_size_cap;
u32 flags;
u16 stat_rate_support;
u8 port_width_cap;
};
struct mthca_alloc {
u32 last;
u32 top;
u32 max;
u32 mask;
spinlock_t lock;
unsigned long *table;
};
struct mthca_array {
struct {
void **page;
int used;
} *page_list;
};
struct mthca_uar_table {
struct mthca_alloc alloc;
u64 uarc_base;
int uarc_size;
};
struct mthca_pd_table {
struct mthca_alloc alloc;
};
struct mthca_buddy {
unsigned long **bits;
int *num_free;
int max_order;
spinlock_t lock;
};
struct mthca_mr_table {
struct mthca_alloc mpt_alloc;
struct mthca_buddy mtt_buddy;
struct mthca_buddy *fmr_mtt_buddy;
u64 mtt_base;
u64 mpt_base;
struct mthca_icm_table *mtt_table;
struct mthca_icm_table *mpt_table;
struct {
void __iomem *mpt_base;
void __iomem *mtt_base;
struct mthca_buddy mtt_buddy;
} tavor_fmr;
};
struct mthca_eq_table {
struct mthca_alloc alloc;
void __iomem *clr_int;
u32 clr_mask;
u32 arm_mask;
struct mthca_eq eq[MTHCA_NUM_EQ];
u64 icm_virt;
struct page *icm_page;
dma_addr_t icm_dma;
int have_irq;
u8 inta_pin;
};
struct mthca_cq_table {
struct mthca_alloc alloc;
spinlock_t lock;
struct mthca_array cq;
struct mthca_icm_table *table;
};
struct mthca_srq_table {
struct mthca_alloc alloc;
spinlock_t lock;
struct mthca_array srq;
struct mthca_icm_table *table;
};
struct mthca_qp_table {
struct mthca_alloc alloc;
u32 rdb_base;
int rdb_shift;
int sqp_start;
spinlock_t lock;
struct mthca_array qp;
struct mthca_icm_table *qp_table;
struct mthca_icm_table *eqp_table;
struct mthca_icm_table *rdb_table;
};
struct mthca_av_table {
struct pci_pool *pool;
int num_ddr_avs;
u64 ddr_av_base;
void __iomem *av_map;
struct mthca_alloc alloc;
};
struct mthca_mcg_table {
struct mutex mutex;
struct mthca_alloc alloc;
struct mthca_icm_table *table;
};
struct mthca_catas_err {
u64 addr;
u32 __iomem *map;
u32 size;
struct timer_list timer;
struct list_head list;
};
extern struct mutex mthca_device_mutex;
struct mthca_dev {
struct ib_device ib_dev;
struct pci_dev *pdev;
int hca_type;
unsigned long mthca_flags;
unsigned long device_cap_flags;
u32 rev_id;
char board_id[MTHCA_BOARD_ID_LEN];
/* firmware info */
u64 fw_ver;
union {
struct {
u64 fw_start;
u64 fw_end;
} tavor;
struct {
u64 clr_int_base;
u64 eq_arm_base;
u64 eq_set_ci_base;
struct mthca_icm *fw_icm;
struct mthca_icm *aux_icm;
u16 fw_pages;
} arbel;
} fw;
u64 ddr_start;
u64 ddr_end;
MTHCA_DECLARE_DOORBELL_LOCK(doorbell_lock)
struct mutex cap_mask_mutex;
void __iomem *hcr;
void __iomem *kar;
void __iomem *clr_base;
union {
struct {
void __iomem *ecr_base;
} tavor;
struct {
void __iomem *eq_arm;
void __iomem *eq_set_ci_base;
} arbel;
} eq_regs;
struct mthca_cmd cmd;
struct mthca_limits limits;
struct mthca_uar_table uar_table;
struct mthca_pd_table pd_table;
struct mthca_mr_table mr_table;
struct mthca_eq_table eq_table;
struct mthca_cq_table cq_table;
struct mthca_srq_table srq_table;
struct mthca_qp_table qp_table;
struct mthca_av_table av_table;
struct mthca_mcg_table mcg_table;
struct mthca_catas_err catas_err;
struct mthca_uar driver_uar;
struct mthca_db_table *db_tab;
struct mthca_pd driver_pd;
struct mthca_mr driver_mr;
struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2];
struct ib_ah *sm_ah[MTHCA_MAX_PORTS];
spinlock_t sm_lock;
u8 rate[MTHCA_MAX_PORTS];
bool active;
};
#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
extern int mthca_debug_level;
#define mthca_dbg(mdev, format, arg...) \
do { \
if (mthca_debug_level) \
dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ## arg); \
} while (0)
#else /* CONFIG_INFINIBAND_MTHCA_DEBUG */
#define mthca_dbg(mdev, format, arg...) do { (void) mdev; } while (0)
#endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */
#define mthca_err(mdev, format, arg...) \
dev_err(&mdev->pdev->dev, format, ## arg)
#define mthca_info(mdev, format, arg...) \
dev_info(&mdev->pdev->dev, format, ## arg)
#define mthca_warn(mdev, format, arg...) \
dev_warn(&mdev->pdev->dev, format, ## arg)
extern void __buggy_use_of_MTHCA_GET(void);
extern void __buggy_use_of_MTHCA_PUT(void);
#define MTHCA_GET(dest, source, offset) \
do { \
void *__p = (char *) (source) + (offset); \
switch (sizeof (dest)) { \
case 1: (dest) = *(u8 *) __p; break; \
case 2: (dest) = be16_to_cpup(__p); break; \
case 4: (dest) = be32_to_cpup(__p); break; \
case 8: (dest) = be64_to_cpup(__p); break; \
default: __buggy_use_of_MTHCA_GET(); \
} \
} while (0)
#define MTHCA_PUT(dest, source, offset) \
do { \
void *__d = ((char *) (dest) + (offset)); \
switch (sizeof(source)) { \
case 1: *(u8 *) __d = (source); break; \
case 2: *(__be16 *) __d = cpu_to_be16(source); break; \
case 4: *(__be32 *) __d = cpu_to_be32(source); break; \
case 8: *(__be64 *) __d = cpu_to_be64(source); break; \
default: __buggy_use_of_MTHCA_PUT(); \
} \
} while (0)
int mthca_reset(struct mthca_dev *mdev);
u32 mthca_alloc(struct mthca_alloc *alloc);
void mthca_free(struct mthca_alloc *alloc, u32 obj);
int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
u32 reserved);
void mthca_alloc_cleanup(struct mthca_alloc *alloc);
void *mthca_array_get(struct mthca_array *array, int index);
int mthca_array_set(struct mthca_array *array, int index, void *value);
void mthca_array_clear(struct mthca_array *array, int index);
int mthca_array_init(struct mthca_array *array, int nent);
void mthca_array_cleanup(struct mthca_array *array, int nent);
int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
union mthca_buf *buf, int *is_direct, struct mthca_pd *pd,
int hca_write, struct mthca_mr *mr);
void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf,
int is_direct, struct mthca_mr *mr);
int mthca_init_uar_table(struct mthca_dev *dev);
int mthca_init_pd_table(struct mthca_dev *dev);
int mthca_init_mr_table(struct mthca_dev *dev);
int mthca_init_eq_table(struct mthca_dev *dev);
int mthca_init_cq_table(struct mthca_dev *dev);
int mthca_init_srq_table(struct mthca_dev *dev);
int mthca_init_qp_table(struct mthca_dev *dev);
int mthca_init_av_table(struct mthca_dev *dev);
int mthca_init_mcg_table(struct mthca_dev *dev);
void mthca_cleanup_uar_table(struct mthca_dev *dev);
void mthca_cleanup_pd_table(struct mthca_dev *dev);
void mthca_cleanup_mr_table(struct mthca_dev *dev);
void mthca_cleanup_eq_table(struct mthca_dev *dev);
void mthca_cleanup_cq_table(struct mthca_dev *dev);
void mthca_cleanup_srq_table(struct mthca_dev *dev);
void mthca_cleanup_qp_table(struct mthca_dev *dev);
void mthca_cleanup_av_table(struct mthca_dev *dev);
void mthca_cleanup_mcg_table(struct mthca_dev *dev);
int mthca_register_device(struct mthca_dev *dev);
void mthca_unregister_device(struct mthca_dev *dev);
void mthca_start_catas_poll(struct mthca_dev *dev);
void mthca_stop_catas_poll(struct mthca_dev *dev);
int __mthca_restart_one(struct pci_dev *pdev);
int mthca_catas_init(void);
void mthca_catas_cleanup(void);
int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd);
void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
int mthca_write_mtt_size(struct mthca_dev *dev);
struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size);
void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt);
int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
int start_index, u64 *buffer_list, int list_len);
int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
u64 iova, u64 total_size, u32 access, struct mthca_mr *mr);
int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
u32 access, struct mthca_mr *mr);
int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
u64 *buffer_list, int buffer_size_shift,
int list_len, u64 iova, u64 total_size,
u32 access, struct mthca_mr *mr);
void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr);
int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
u32 access, struct mthca_fmr *fmr);
int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
int list_len, u64 iova);
void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr);
int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
int list_len, u64 iova);
void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr);
int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr);
int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt);
void mthca_unmap_eq_icm(struct mthca_dev *dev);
int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
struct ib_wc *entry);
int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
int mthca_init_cq(struct mthca_dev *dev, int nent,
struct mthca_ucontext *ctx, u32 pdn,
struct mthca_cq *cq);
void mthca_free_cq(struct mthca_dev *dev,
struct mthca_cq *cq);
void mthca_cq_completion(struct mthca_dev *dev, u32 cqn);
void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
enum ib_event_type event_type);
void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
struct mthca_srq *srq);
void mthca_cq_resize_copy_cqes(struct mthca_cq *cq);
int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent);
void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe);
int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
struct ib_srq_attr *attr, struct mthca_srq *srq);
void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
int mthca_max_srq_sge(struct mthca_dev *dev);
void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
enum ib_event_type event_type);
void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
enum ib_event_type event_type);
int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr);
int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
struct ib_udata *udata);
int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
int index, int *dbd, __be32 *new_wqe);
int mthca_alloc_qp(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_cq *send_cq,
struct mthca_cq *recv_cq,
enum ib_qp_type type,
enum ib_sig_type send_policy,
struct ib_qp_cap *cap,
struct mthca_qp *qp);
int mthca_alloc_sqp(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_cq *send_cq,
struct mthca_cq *recv_cq,
enum ib_sig_type send_policy,
struct ib_qp_cap *cap,
int qpn,
int port,
struct mthca_sqp *sqp);
void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
int mthca_create_ah(struct mthca_dev *dev,
struct mthca_pd *pd,
struct ib_ah_attr *ah_attr,
struct mthca_ah *ah);
int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah);
int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
struct ib_ud_header *header);
int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr);
int mthca_ah_grh_present(struct mthca_ah *ah);
u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port);
enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port);
int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
int mthca_process_mad(struct ib_device *ibdev,
int mad_flags,
u8 port_num,
const struct ib_wc *in_wc,
const struct ib_grh *in_grh,
const struct ib_mad_hdr *in, size_t in_mad_size,
struct ib_mad_hdr *out, size_t *out_mad_size,
u16 *out_mad_pkey_index);
int mthca_create_agents(struct mthca_dev *dev);
void mthca_free_agents(struct mthca_dev *dev);
static inline struct mthca_dev *to_mdev(struct ib_device *ibdev)
{
return container_of(ibdev, struct mthca_dev, ib_dev);
}
static inline int mthca_is_memfree(struct mthca_dev *dev)
{
return dev->mthca_flags & MTHCA_FLAG_MEMFREE;
}
#endif /* MTHCA_DEV_H */

View File

@ -0,0 +1,109 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/types.h>
#define MTHCA_RD_DOORBELL 0x00
#define MTHCA_SEND_DOORBELL 0x10
#define MTHCA_RECEIVE_DOORBELL 0x18
#define MTHCA_CQ_DOORBELL 0x20
#define MTHCA_EQ_DOORBELL 0x28
#if BITS_PER_LONG == 64
/*
* Assume that we can just write a 64-bit doorbell atomically. s390
* actually doesn't have writeq() but S/390 systems don't even have
* PCI so we won't worry about it.
*/
#define MTHCA_DECLARE_DOORBELL_LOCK(name)
#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0)
#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL)
static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
{
__raw_writeq((__force u64) val, dest);
}
static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
spinlock_t *doorbell_lock)
{
__raw_writeq((__force u64) cpu_to_be64((u64) hi << 32 | lo), dest);
}
static inline void mthca_write_db_rec(__be32 val[2], __be32 *db)
{
*(u64 *) db = *(u64 *) val;
}
#else
/*
* Just fall back to a spinlock to protect the doorbell if
* BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit
* MMIO writes.
*/
#define MTHCA_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
#define MTHCA_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr)
#define MTHCA_GET_DOORBELL_LOCK(ptr) (ptr)
static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
{
__raw_writel(((__force u32 *) &val)[0], dest);
__raw_writel(((__force u32 *) &val)[1], dest + 4);
}
static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
spinlock_t *doorbell_lock)
{
unsigned long flags;
hi = (__force u32) cpu_to_be32(hi);
lo = (__force u32) cpu_to_be32(lo);
spin_lock_irqsave(doorbell_lock, flags);
__raw_writel(hi, dest);
__raw_writel(lo, dest + 4);
spin_unlock_irqrestore(doorbell_lock, flags);
}
static inline void mthca_write_db_rec(__be32 val[2], __be32 *db)
{
db[0] = val[0];
wmb();
db[1] = val[1];
}
#endif

905
sys/dev/mthca/mthca_eq.c Normal file
View File

@ -0,0 +1,905 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
#include "mthca_config_reg.h"
enum {
MTHCA_NUM_ASYNC_EQE = 0x80,
MTHCA_NUM_CMD_EQE = 0x80,
MTHCA_NUM_SPARE_EQE = 0x80,
MTHCA_EQ_ENTRY_SIZE = 0x20
};
/*
* Must be packed because start is 64 bits but only aligned to 32 bits.
*/
struct mthca_eq_context {
__be32 flags;
__be64 start;
__be32 logsize_usrpage;
__be32 tavor_pd; /* reserved for Arbel */
u8 reserved1[3];
u8 intr;
__be32 arbel_pd; /* lost_count for Tavor */
__be32 lkey;
u32 reserved2[2];
__be32 consumer_index;
__be32 producer_index;
u32 reserved3[4];
} __attribute__((packed));
#define MTHCA_EQ_STATUS_OK ( 0 << 28)
#define MTHCA_EQ_STATUS_OVERFLOW ( 9 << 28)
#define MTHCA_EQ_STATUS_WRITE_FAIL (10 << 28)
#define MTHCA_EQ_OWNER_SW ( 0 << 24)
#define MTHCA_EQ_OWNER_HW ( 1 << 24)
#define MTHCA_EQ_FLAG_TR ( 1 << 18)
#define MTHCA_EQ_FLAG_OI ( 1 << 17)
#define MTHCA_EQ_STATE_ARMED ( 1 << 8)
#define MTHCA_EQ_STATE_FIRED ( 2 << 8)
#define MTHCA_EQ_STATE_ALWAYS_ARMED ( 3 << 8)
#define MTHCA_EQ_STATE_ARBEL ( 8 << 8)
enum {
MTHCA_EVENT_TYPE_COMP = 0x00,
MTHCA_EVENT_TYPE_PATH_MIG = 0x01,
MTHCA_EVENT_TYPE_COMM_EST = 0x02,
MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03,
MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13,
MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14,
MTHCA_EVENT_TYPE_CQ_ERROR = 0x04,
MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05,
MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06,
MTHCA_EVENT_TYPE_PATH_MIG_FAILED = 0x07,
MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10,
MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11,
MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12,
MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08,
MTHCA_EVENT_TYPE_PORT_CHANGE = 0x09,
MTHCA_EVENT_TYPE_EQ_OVERFLOW = 0x0f,
MTHCA_EVENT_TYPE_ECC_DETECT = 0x0e,
MTHCA_EVENT_TYPE_CMD = 0x0a
};
#define MTHCA_ASYNC_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_PATH_MIG) | \
(1ULL << MTHCA_EVENT_TYPE_COMM_EST) | \
(1ULL << MTHCA_EVENT_TYPE_SQ_DRAINED) | \
(1ULL << MTHCA_EVENT_TYPE_CQ_ERROR) | \
(1ULL << MTHCA_EVENT_TYPE_WQ_CATAS_ERROR) | \
(1ULL << MTHCA_EVENT_TYPE_EEC_CATAS_ERROR) | \
(1ULL << MTHCA_EVENT_TYPE_PATH_MIG_FAILED) | \
(1ULL << MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
(1ULL << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR) | \
(1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \
(1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \
(1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
#define MTHCA_SRQ_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \
(1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
(1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT))
#define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD)
#define MTHCA_EQ_DB_INC_CI (1 << 24)
#define MTHCA_EQ_DB_REQ_NOT (2 << 24)
#define MTHCA_EQ_DB_DISARM_CQ (3 << 24)
#define MTHCA_EQ_DB_SET_CI (4 << 24)
#define MTHCA_EQ_DB_ALWAYS_ARM (5 << 24)
struct mthca_eqe {
u8 reserved1;
u8 type;
u8 reserved2;
u8 subtype;
union {
u32 raw[6];
struct {
__be32 cqn;
} __attribute__((packed)) comp;
struct {
u16 reserved1;
__be16 token;
u32 reserved2;
u8 reserved3[3];
u8 status;
__be64 out_param;
} __attribute__((packed)) cmd;
struct {
__be32 qpn;
} __attribute__((packed)) qp;
struct {
__be32 srqn;
} __attribute__((packed)) srq;
struct {
__be32 cqn;
u32 reserved1;
u8 reserved2[3];
u8 syndrome;
} __attribute__((packed)) cq_err;
struct {
u32 reserved1[2];
__be32 port;
} __attribute__((packed)) port_change;
} event;
u8 reserved3[3];
u8 owner;
} __attribute__((packed));
#define MTHCA_EQ_ENTRY_OWNER_SW (0 << 7)
#define MTHCA_EQ_ENTRY_OWNER_HW (1 << 7)
static inline u64 async_mask(struct mthca_dev *dev)
{
return dev->mthca_flags & MTHCA_FLAG_SRQ ?
MTHCA_ASYNC_EVENT_MASK | MTHCA_SRQ_EVENT_MASK :
MTHCA_ASYNC_EVENT_MASK;
}
static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
{
/*
* This barrier makes sure that all updates to ownership bits
* done by set_eqe_hw() hit memory before the consumer index
* is updated. set_eq_ci() allows the HCA to possibly write
* more EQ entries, and we want to avoid the exceedingly
* unlikely possibility of the HCA writing an entry and then
* having set_eqe_hw() overwrite the owner field.
*/
wmb();
mthca_write64(MTHCA_EQ_DB_SET_CI | eq->eqn, ci & (eq->nent - 1),
dev->kar + MTHCA_EQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
{
/* See comment in tavor_set_eq_ci() above. */
wmb();
__raw_writel((__force u32) cpu_to_be32(ci),
dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8);
/* We still want ordering, just not swabbing, so add a barrier */
mb();
}
static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
{
if (mthca_is_memfree(dev))
arbel_set_eq_ci(dev, eq, ci);
else
tavor_set_eq_ci(dev, eq, ci);
}
static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
{
mthca_write64(MTHCA_EQ_DB_REQ_NOT | eqn, 0,
dev->kar + MTHCA_EQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
{
writel(eqn_mask, dev->eq_regs.arbel.eq_arm);
}
static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
{
if (!mthca_is_memfree(dev)) {
mthca_write64(MTHCA_EQ_DB_DISARM_CQ | eqn, cqn,
dev->kar + MTHCA_EQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
}
static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry)
{
unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE;
return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
}
static inline struct mthca_eqe *next_eqe_sw(struct mthca_eq *eq)
{
struct mthca_eqe *eqe;
eqe = get_eqe(eq, eq->cons_index);
return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe;
}
static inline void set_eqe_hw(struct mthca_eqe *eqe)
{
eqe->owner = MTHCA_EQ_ENTRY_OWNER_HW;
}
static void port_change(struct mthca_dev *dev, int port, int active)
{
struct ib_event record;
mthca_dbg(dev, "Port change to %s for port %d\n",
active ? "active" : "down", port);
record.device = &dev->ib_dev;
record.event = active ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
record.element.port_num = port;
ib_dispatch_event(&record);
}
static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
{
struct mthca_eqe *eqe;
int disarm_cqn;
int eqes_found = 0;
int set_ci = 0;
while ((eqe = next_eqe_sw(eq))) {
/*
* Make sure we read EQ entry contents after we've
* checked the ownership bit.
*/
rmb();
switch (eqe->type) {
case MTHCA_EVENT_TYPE_COMP:
disarm_cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
disarm_cq(dev, eq->eqn, disarm_cqn);
mthca_cq_completion(dev, disarm_cqn);
break;
case MTHCA_EVENT_TYPE_PATH_MIG:
mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
IB_EVENT_PATH_MIG);
break;
case MTHCA_EVENT_TYPE_COMM_EST:
mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
IB_EVENT_COMM_EST);
break;
case MTHCA_EVENT_TYPE_SQ_DRAINED:
mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
IB_EVENT_SQ_DRAINED);
break;
case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE:
mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
IB_EVENT_QP_LAST_WQE_REACHED);
break;
case MTHCA_EVENT_TYPE_SRQ_LIMIT:
mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
IB_EVENT_SRQ_LIMIT_REACHED);
break;
case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR:
mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
IB_EVENT_QP_FATAL);
break;
case MTHCA_EVENT_TYPE_PATH_MIG_FAILED:
mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
IB_EVENT_PATH_MIG_ERR);
break;
case MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
IB_EVENT_QP_REQ_ERR);
break;
case MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR:
mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
IB_EVENT_QP_ACCESS_ERR);
break;
case MTHCA_EVENT_TYPE_CMD:
mthca_cmd_event(dev,
be16_to_cpu(eqe->event.cmd.token),
eqe->event.cmd.status,
be64_to_cpu(eqe->event.cmd.out_param));
break;
case MTHCA_EVENT_TYPE_PORT_CHANGE:
port_change(dev,
(be32_to_cpu(eqe->event.port_change.port) >> 28) & 3,
eqe->subtype == 0x4);
break;
case MTHCA_EVENT_TYPE_CQ_ERROR:
mthca_warn(dev, "CQ %s on CQN %06x\n",
eqe->event.cq_err.syndrome == 1 ?
"overrun" : "access violation",
be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
mthca_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn),
IB_EVENT_CQ_ERR);
break;
case MTHCA_EVENT_TYPE_EQ_OVERFLOW:
mthca_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
break;
case MTHCA_EVENT_TYPE_EEC_CATAS_ERROR:
case MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR:
case MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR:
case MTHCA_EVENT_TYPE_ECC_DETECT:
default:
mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n",
eqe->type, eqe->subtype, eq->eqn);
break;
}
set_eqe_hw(eqe);
++eq->cons_index;
eqes_found = 1;
++set_ci;
/*
* The HCA will think the queue has overflowed if we
* don't tell it we've been processing events. We
* create our EQs with MTHCA_NUM_SPARE_EQE extra
* entries, so we must update our consumer index at
* least that often.
*/
if (unlikely(set_ci >= MTHCA_NUM_SPARE_EQE)) {
/*
* Conditional on hca_type is OK here because
* this is a rare case, not the fast path.
*/
set_eq_ci(dev, eq, eq->cons_index);
set_ci = 0;
}
}
/*
* Rely on caller to set consumer index so that we don't have
* to test hca_type in our interrupt handling fast path.
*/
return eqes_found;
}
static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr)
{
struct mthca_dev *dev = dev_ptr;
u32 ecr;
int i;
if (dev->eq_table.clr_mask)
writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
ecr = readl(dev->eq_regs.tavor.ecr_base + 4);
if (!ecr)
return IRQ_NONE;
writel(ecr, dev->eq_regs.tavor.ecr_base +
MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
for (i = 0; i < MTHCA_NUM_EQ; ++i)
if (ecr & dev->eq_table.eq[i].eqn_mask) {
if (mthca_eq_int(dev, &dev->eq_table.eq[i]))
tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
dev->eq_table.eq[i].cons_index);
tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
}
return IRQ_HANDLED;
}
static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr)
{
struct mthca_eq *eq = eq_ptr;
struct mthca_dev *dev = eq->dev;
mthca_eq_int(dev, eq);
tavor_set_eq_ci(dev, eq, eq->cons_index);
tavor_eq_req_not(dev, eq->eqn);
/* MSI-X vectors always belong to us */
return IRQ_HANDLED;
}
static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr)
{
struct mthca_dev *dev = dev_ptr;
int work = 0;
int i;
if (dev->eq_table.clr_mask)
writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
for (i = 0; i < MTHCA_NUM_EQ; ++i)
if (mthca_eq_int(dev, &dev->eq_table.eq[i])) {
work = 1;
arbel_set_eq_ci(dev, &dev->eq_table.eq[i],
dev->eq_table.eq[i].cons_index);
}
arbel_eq_req_not(dev, dev->eq_table.arm_mask);
return IRQ_RETVAL(work);
}
static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr)
{
struct mthca_eq *eq = eq_ptr;
struct mthca_dev *dev = eq->dev;
mthca_eq_int(dev, eq);
arbel_set_eq_ci(dev, eq, eq->cons_index);
arbel_eq_req_not(dev, eq->eqn_mask);
/* MSI-X vectors always belong to us */
return IRQ_HANDLED;
}
static int mthca_create_eq(struct mthca_dev *dev,
int nent,
u8 intr,
struct mthca_eq *eq)
{
int npages;
u64 *dma_list = NULL;
dma_addr_t t;
struct mthca_mailbox *mailbox;
struct mthca_eq_context *eq_context;
int err = -ENOMEM;
int i;
eq->dev = dev;
eq->nent = roundup_pow_of_two(max(nent, 2));
npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE;
eq->page_list = kmalloc(npages * sizeof *eq->page_list,
GFP_KERNEL);
if (!eq->page_list)
goto err_out;
for (i = 0; i < npages; ++i)
eq->page_list[i].buf = NULL;
dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
if (!dma_list)
goto err_out_free;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox))
goto err_out_free;
eq_context = mailbox->buf;
for (i = 0; i < npages; ++i) {
eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
PAGE_SIZE, &t, GFP_KERNEL);
if (!eq->page_list[i].buf)
goto err_out_free_pages;
dma_list[i] = t;
dma_unmap_addr_set(&eq->page_list[i], mapping, t);
clear_page(eq->page_list[i].buf);
}
for (i = 0; i < eq->nent; ++i)
set_eqe_hw(get_eqe(eq, i));
eq->eqn = mthca_alloc(&dev->eq_table.alloc);
if (eq->eqn == -1)
goto err_out_free_pages;
err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
dma_list, PAGE_SHIFT, npages,
0, npages * PAGE_SIZE,
MTHCA_MPT_FLAG_LOCAL_WRITE |
MTHCA_MPT_FLAG_LOCAL_READ,
&eq->mr);
if (err)
goto err_out_free_eq;
memset(eq_context, 0, sizeof *eq_context);
eq_context->flags = cpu_to_be32(MTHCA_EQ_STATUS_OK |
MTHCA_EQ_OWNER_HW |
MTHCA_EQ_STATE_ARMED |
MTHCA_EQ_FLAG_TR);
if (mthca_is_memfree(dev))
eq_context->flags |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL);
eq_context->logsize_usrpage = cpu_to_be32((ffs(eq->nent) - 1) << 24);
if (mthca_is_memfree(dev)) {
eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num);
} else {
eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
eq_context->tavor_pd = cpu_to_be32(dev->driver_pd.pd_num);
}
eq_context->intr = intr;
eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey);
err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn);
if (err) {
mthca_warn(dev, "SW2HW_EQ returned %d\n", err);
goto err_out_free_mr;
}
kfree(dma_list);
mthca_free_mailbox(dev, mailbox);
eq->eqn_mask = swab32(1 << eq->eqn);
eq->cons_index = 0;
dev->eq_table.arm_mask |= eq->eqn_mask;
mthca_dbg(dev, "Allocated EQ %d with %d entries\n",
eq->eqn, eq->nent);
return err;
err_out_free_mr:
mthca_free_mr(dev, &eq->mr);
err_out_free_eq:
mthca_free(&dev->eq_table.alloc, eq->eqn);
err_out_free_pages:
for (i = 0; i < npages; ++i)
if (eq->page_list[i].buf)
dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
eq->page_list[i].buf,
dma_unmap_addr(&eq->page_list[i],
mapping));
mthca_free_mailbox(dev, mailbox);
err_out_free:
kfree(eq->page_list);
kfree(dma_list);
err_out:
return err;
}
static void mthca_free_eq(struct mthca_dev *dev,
struct mthca_eq *eq)
{
struct mthca_mailbox *mailbox;
int err;
int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
PAGE_SIZE;
int i;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox))
return;
err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn);
if (err)
mthca_warn(dev, "HW2SW_EQ returned %d\n", err);
dev->eq_table.arm_mask &= ~eq->eqn_mask;
if (0) {
mthca_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) {
if (i % 4 == 0)
printk("[%02x] ", i * 4);
printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
if ((i + 1) % 4 == 0)
printk("\n");
}
}
mthca_free_mr(dev, &eq->mr);
for (i = 0; i < npages; ++i)
pci_free_consistent(dev->pdev, PAGE_SIZE,
eq->page_list[i].buf,
dma_unmap_addr(&eq->page_list[i], mapping));
kfree(eq->page_list);
mthca_free_mailbox(dev, mailbox);
}
static void mthca_free_irqs(struct mthca_dev *dev)
{
int i;
if (dev->eq_table.have_irq)
free_irq(dev->pdev->irq, dev);
for (i = 0; i < MTHCA_NUM_EQ; ++i)
if (dev->eq_table.eq[i].have_irq) {
free_irq(dev->eq_table.eq[i].msi_x_vector,
dev->eq_table.eq + i);
dev->eq_table.eq[i].have_irq = 0;
}
}
static int mthca_map_reg(struct mthca_dev *dev,
unsigned long offset, unsigned long size,
void __iomem **map)
{
phys_addr_t base = pci_resource_start(dev->pdev, 0);
*map = ioremap(base + offset, size);
if (!*map)
return -ENOMEM;
return 0;
}
static int mthca_map_eq_regs(struct mthca_dev *dev)
{
if (mthca_is_memfree(dev)) {
/*
* We assume that the EQ arm and EQ set CI registers
* fall within the first BAR. We can't trust the
* values firmware gives us, since those addresses are
* valid on the HCA's side of the PCI bus but not
* necessarily the host side.
*/
if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
&dev->clr_base)) {
mthca_err(dev, "Couldn't map interrupt clear register, "
"aborting.\n");
return -ENOMEM;
}
/*
* Add 4 because we limit ourselves to EQs 0 ... 31,
* so we only need the low word of the register.
*/
if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
dev->fw.arbel.eq_arm_base) + 4, 4,
&dev->eq_regs.arbel.eq_arm)) {
mthca_err(dev, "Couldn't map EQ arm register, aborting.\n");
iounmap(dev->clr_base);
return -ENOMEM;
}
if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
dev->fw.arbel.eq_set_ci_base,
MTHCA_EQ_SET_CI_SIZE,
&dev->eq_regs.arbel.eq_set_ci_base)) {
mthca_err(dev, "Couldn't map EQ CI register, aborting.\n");
iounmap(dev->eq_regs.arbel.eq_arm);
iounmap(dev->clr_base);
return -ENOMEM;
}
} else {
if (mthca_map_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
&dev->clr_base)) {
mthca_err(dev, "Couldn't map interrupt clear register, "
"aborting.\n");
return -ENOMEM;
}
if (mthca_map_reg(dev, MTHCA_ECR_BASE,
MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
&dev->eq_regs.tavor.ecr_base)) {
mthca_err(dev, "Couldn't map ecr register, "
"aborting.\n");
iounmap(dev->clr_base);
return -ENOMEM;
}
}
return 0;
}
static void mthca_unmap_eq_regs(struct mthca_dev *dev)
{
if (mthca_is_memfree(dev)) {
iounmap(dev->eq_regs.arbel.eq_set_ci_base);
iounmap(dev->eq_regs.arbel.eq_arm);
iounmap(dev->clr_base);
} else {
iounmap(dev->eq_regs.tavor.ecr_base);
iounmap(dev->clr_base);
}
}
int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
{
int ret;
/*
* We assume that mapping one page is enough for the whole EQ
* context table. This is fine with all current HCAs, because
* we only use 32 EQs and each EQ uses 32 bytes of context
* memory, or 1 KB total.
*/
dev->eq_table.icm_virt = icm_virt;
dev->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
if (!dev->eq_table.icm_page)
return -ENOMEM;
dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) {
__free_page(dev->eq_table.icm_page);
return -ENOMEM;
}
ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt);
if (ret) {
pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
PCI_DMA_BIDIRECTIONAL);
__free_page(dev->eq_table.icm_page);
}
return ret;
}
void mthca_unmap_eq_icm(struct mthca_dev *dev)
{
mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1);
pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
PCI_DMA_BIDIRECTIONAL);
__free_page(dev->eq_table.icm_page);
}
int mthca_init_eq_table(struct mthca_dev *dev)
{
int err;
u8 intr;
int i;
err = mthca_alloc_init(&dev->eq_table.alloc,
dev->limits.num_eqs,
dev->limits.num_eqs - 1,
dev->limits.reserved_eqs);
if (err)
return err;
err = mthca_map_eq_regs(dev);
if (err)
goto err_out_free;
if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
dev->eq_table.clr_mask = 0;
} else {
dev->eq_table.clr_mask =
swab32(1 << (dev->eq_table.inta_pin & 31));
dev->eq_table.clr_int = dev->clr_base +
(dev->eq_table.inta_pin < 32 ? 4 : 0);
}
dev->eq_table.arm_mask = 0;
intr = dev->eq_table.inta_pin;
err = mthca_create_eq(dev, dev->limits.num_cqs + MTHCA_NUM_SPARE_EQE,
(dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr,
&dev->eq_table.eq[MTHCA_EQ_COMP]);
if (err)
goto err_out_unmap;
err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE + MTHCA_NUM_SPARE_EQE,
(dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr,
&dev->eq_table.eq[MTHCA_EQ_ASYNC]);
if (err)
goto err_out_comp;
err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE + MTHCA_NUM_SPARE_EQE,
(dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr,
&dev->eq_table.eq[MTHCA_EQ_CMD]);
if (err)
goto err_out_async;
if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
static const char *eq_name[] = {
[MTHCA_EQ_COMP] = DRV_NAME "-comp",
[MTHCA_EQ_ASYNC] = DRV_NAME "-async",
[MTHCA_EQ_CMD] = DRV_NAME "-cmd"
};
for (i = 0; i < MTHCA_NUM_EQ; ++i) {
snprintf(dev->eq_table.eq[i].irq_name,
IB_DEVICE_NAME_MAX,
"%s@pci:%s", eq_name[i],
pci_name(dev->pdev));
err = request_irq(dev->eq_table.eq[i].msi_x_vector,
mthca_is_memfree(dev) ?
mthca_arbel_msi_x_interrupt :
mthca_tavor_msi_x_interrupt,
0, dev->eq_table.eq[i].irq_name,
dev->eq_table.eq + i);
if (err)
goto err_out_cmd;
dev->eq_table.eq[i].have_irq = 1;
}
} else {
snprintf(dev->eq_table.eq[0].irq_name, IB_DEVICE_NAME_MAX,
DRV_NAME "@pci:%s", pci_name(dev->pdev));
err = request_irq(dev->pdev->irq,
mthca_is_memfree(dev) ?
mthca_arbel_interrupt :
mthca_tavor_interrupt,
IRQF_SHARED, dev->eq_table.eq[0].irq_name, dev);
if (err)
goto err_out_cmd;
dev->eq_table.have_irq = 1;
}
err = mthca_MAP_EQ(dev, async_mask(dev),
0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
if (err)
mthca_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err);
err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn);
if (err)
mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n",
dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err);
for (i = 0; i < MTHCA_NUM_EQ; ++i)
if (mthca_is_memfree(dev))
arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
else
tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
return 0;
err_out_cmd:
mthca_free_irqs(dev);
mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]);
err_out_async:
mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
err_out_comp:
mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_COMP]);
err_out_unmap:
mthca_unmap_eq_regs(dev);
err_out_free:
mthca_alloc_cleanup(&dev->eq_table.alloc);
return err;
}
void mthca_cleanup_eq_table(struct mthca_dev *dev)
{
int i;
mthca_free_irqs(dev);
mthca_MAP_EQ(dev, async_mask(dev),
1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn);
for (i = 0; i < MTHCA_NUM_EQ; ++i)
mthca_free_eq(dev, &dev->eq_table.eq[i]);
mthca_unmap_eq_regs(dev);
mthca_alloc_cleanup(&dev->eq_table.alloc);
}

350
sys/dev/mthca/mthca_mad.c Normal file
View File

@ -0,0 +1,350 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/string.h>
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
enum {
MTHCA_VENDOR_CLASS1 = 0x9,
MTHCA_VENDOR_CLASS2 = 0xa
};
static int mthca_update_rate(struct mthca_dev *dev, u8 port_num)
{
struct ib_port_attr *tprops = NULL;
int ret;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
return -ENOMEM;
ret = ib_query_port(&dev->ib_dev, port_num, tprops);
if (ret) {
printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n",
ret, dev->ib_dev.name, port_num);
goto out;
}
dev->rate[port_num - 1] = tprops->active_speed *
ib_width_enum_to_int(tprops->active_width);
out:
kfree(tprops);
return ret;
}
static void update_sm_ah(struct mthca_dev *dev,
u8 port_num, u16 lid, u8 sl)
{
struct ib_ah *new_ah;
struct ib_ah_attr ah_attr;
unsigned long flags;
if (!dev->send_agent[port_num - 1][0])
return;
memset(&ah_attr, 0, sizeof ah_attr);
ah_attr.dlid = lid;
ah_attr.sl = sl;
ah_attr.port_num = port_num;
new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
&ah_attr);
if (IS_ERR(new_ah))
return;
spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
ib_destroy_ah(dev->sm_ah[port_num - 1]);
dev->sm_ah[port_num - 1] = new_ah;
spin_unlock_irqrestore(&dev->sm_lock, flags);
}
/*
* Snoop SM MADs for port info and P_Key table sets, so we can
* synthesize LID change and P_Key change events.
*/
static void smp_snoop(struct ib_device *ibdev,
u8 port_num,
const struct ib_mad *mad,
u16 prev_lid)
{
struct ib_event event;
if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
struct ib_port_info *pinfo =
(struct ib_port_info *) ((struct ib_smp *) mad)->data;
u16 lid = be16_to_cpu(pinfo->lid);
mthca_update_rate(to_mdev(ibdev), port_num);
update_sm_ah(to_mdev(ibdev), port_num,
be16_to_cpu(pinfo->sm_lid),
pinfo->neighbormtu_mastersmsl & 0xf);
event.device = ibdev;
event.element.port_num = port_num;
if (pinfo->clientrereg_resv_subnetto & 0x80) {
event.event = IB_EVENT_CLIENT_REREGISTER;
ib_dispatch_event(&event);
}
if (prev_lid != lid) {
event.event = IB_EVENT_LID_CHANGE;
ib_dispatch_event(&event);
}
}
if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
event.device = ibdev;
event.event = IB_EVENT_PKEY_CHANGE;
event.element.port_num = port_num;
ib_dispatch_event(&event);
}
}
}
static void node_desc_override(struct ib_device *dev,
struct ib_mad *mad)
{
if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
mutex_lock(&to_mdev(dev)->cap_mask_mutex);
memcpy(((struct ib_smp *) mad)->data, dev->node_desc,
IB_DEVICE_NODE_DESC_MAX);
mutex_unlock(&to_mdev(dev)->cap_mask_mutex);
}
}
static void forward_trap(struct mthca_dev *dev,
u8 port_num,
const struct ib_mad *mad)
{
int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
struct ib_mad_send_buf *send_buf;
struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
int ret;
unsigned long flags;
if (agent) {
send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
IB_MGMT_MAD_DATA, GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(send_buf))
return;
/*
* We rely here on the fact that MLX QPs don't use the
* address handle after the send is posted (this is
* wrong following the IB spec strictly, but we know
* it's OK for our devices).
*/
spin_lock_irqsave(&dev->sm_lock, flags);
memcpy(send_buf->mad, mad, sizeof *mad);
if ((send_buf->ah = dev->sm_ah[port_num - 1]))
ret = ib_post_send_mad(send_buf, NULL);
else
ret = -EINVAL;
spin_unlock_irqrestore(&dev->sm_lock, flags);
if (ret)
ib_free_send_mad(send_buf);
}
}
int mthca_process_mad(struct ib_device *ibdev,
int mad_flags,
u8 port_num,
const struct ib_wc *in_wc,
const struct ib_grh *in_grh,
const struct ib_mad_hdr *in, size_t in_mad_size,
struct ib_mad_hdr *out, size_t *out_mad_size,
u16 *out_mad_pkey_index)
{
int err;
u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
u16 prev_lid = 0;
struct ib_port_attr pattr;
const struct ib_mad *in_mad = (const struct ib_mad *)in;
struct ib_mad *out_mad = (struct ib_mad *)out;
if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
*out_mad_size != sizeof(*out_mad)))
return IB_MAD_RESULT_FAILURE;
/* Forward locally generated traps to the SM */
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP &&
slid == 0) {
forward_trap(to_mdev(ibdev), port_num, in_mad);
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
}
/*
* Only handle SM gets, sets and trap represses for SM class
*
* Only handle PMA and Mellanox vendor-specific class gets and
* sets for other classes.
*/
if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
return IB_MAD_RESULT_SUCCESS;
/*
* Don't process SMInfo queries or vendor-specific
* MADs -- the SMA can't handle them.
*/
if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
IB_SMP_ATTR_VENDOR_MASK))
return IB_MAD_RESULT_SUCCESS;
} else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 ||
in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) {
if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
return IB_MAD_RESULT_SUCCESS;
} else
return IB_MAD_RESULT_SUCCESS;
if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
in_mad->mad_hdr.method == IB_MGMT_METHOD_SET &&
in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
!ib_query_port(ibdev, port_num, &pattr))
prev_lid = pattr.lid;
err = mthca_MAD_IFC(to_mdev(ibdev),
mad_flags & IB_MAD_IGNORE_MKEY,
mad_flags & IB_MAD_IGNORE_BKEY,
port_num, in_wc, in_grh, in_mad, out_mad);
if (err == -EBADMSG)
return IB_MAD_RESULT_SUCCESS;
else if (err) {
mthca_err(to_mdev(ibdev), "MAD_IFC returned %d\n", err);
return IB_MAD_RESULT_FAILURE;
}
if (!out_mad->mad_hdr.status) {
smp_snoop(ibdev, port_num, in_mad, prev_lid);
node_desc_override(ibdev, out_mad);
}
/* set return bit in status of directed route responses */
if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
/* no response for trap repress */
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
ib_free_send_mad(mad_send_wc->send_buf);
}
int mthca_create_agents(struct mthca_dev *dev)
{
struct ib_mad_agent *agent;
int p, q;
int ret;
spin_lock_init(&dev->sm_lock);
for (p = 0; p < dev->limits.num_ports; ++p)
for (q = 0; q <= 1; ++q) {
agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
q ? IB_QPT_GSI : IB_QPT_SMI,
NULL, 0, send_handler,
NULL, NULL, 0);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
goto err;
}
dev->send_agent[p][q] = agent;
}
for (p = 1; p <= dev->limits.num_ports; ++p) {
ret = mthca_update_rate(dev, p);
if (ret) {
mthca_err(dev, "Failed to obtain port %d rate."
" aborting.\n", p);
goto err;
}
}
return 0;
err:
for (p = 0; p < dev->limits.num_ports; ++p)
for (q = 0; q <= 1; ++q)
if (dev->send_agent[p][q])
ib_unregister_mad_agent(dev->send_agent[p][q]);
return ret;
}
void mthca_free_agents(struct mthca_dev *dev)
{
struct ib_mad_agent *agent;
int p, q;
for (p = 0; p < dev->limits.num_ports; ++p) {
for (q = 0; q <= 1; ++q) {
agent = dev->send_agent[p][q];
dev->send_agent[p][q] = NULL;
ib_unregister_mad_agent(agent);
}
if (dev->sm_ah[p])
ib_destroy_ah(dev->sm_ah[p]);
}
}

1279
sys/dev/mthca/mthca_main.c Normal file

File diff suppressed because it is too large Load Diff

335
sys/dev/mthca/mthca_mcg.c Normal file
View File

@ -0,0 +1,335 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/string.h>
#include <linux/gfp.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
struct mthca_mgm {
__be32 next_gid_index;
u32 reserved[3];
u8 gid[16];
__be32 qp[MTHCA_QP_PER_MGM];
};
static const u8 zero_gid[16]; /* automatically initialized to 0 */
/*
* Caller must hold MCG table semaphore. gid and mgm parameters must
* be properly aligned for command interface.
*
* Returns 0 unless a firmware command error occurs.
*
* If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1
* and *mgm holds MGM entry.
*
* if GID is found in AMGM, *index = index in AMGM, *prev = index of
* previous entry in hash chain and *mgm holds AMGM entry.
*
* If no AMGM exists for given gid, *index = -1, *prev = index of last
* entry in hash chain and *mgm holds end of hash chain.
*/
static int find_mgm(struct mthca_dev *dev,
u8 *gid, struct mthca_mailbox *mgm_mailbox,
u16 *hash, int *prev, int *index)
{
struct mthca_mailbox *mailbox;
struct mthca_mgm *mgm = mgm_mailbox->buf;
u8 *mgid;
int err;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox))
return -ENOMEM;
mgid = mailbox->buf;
memcpy(mgid, gid, 16);
err = mthca_MGID_HASH(dev, mailbox, hash);
if (err) {
mthca_err(dev, "MGID_HASH failed (%d)\n", err);
goto out;
}
if (0)
mthca_dbg(dev, "Hash for %pI6 is %04x\n", gid, *hash);
*index = *hash;
*prev = -1;
do {
err = mthca_READ_MGM(dev, *index, mgm_mailbox);
if (err) {
mthca_err(dev, "READ_MGM failed (%d)\n", err);
goto out;
}
if (!memcmp(mgm->gid, zero_gid, 16)) {
if (*index != *hash) {
mthca_err(dev, "Found zero MGID in AMGM.\n");
err = -EINVAL;
}
goto out;
}
if (!memcmp(mgm->gid, gid, 16))
goto out;
*prev = *index;
*index = be32_to_cpu(mgm->next_gid_index) >> 6;
} while (*index);
*index = -1;
out:
mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_mailbox *mailbox;
struct mthca_mgm *mgm;
u16 hash;
int index, prev;
int link = 0;
int i;
int err;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
mgm = mailbox->buf;
mutex_lock(&dev->mcg_table.mutex);
err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
if (err)
goto out;
if (index != -1) {
if (!memcmp(mgm->gid, zero_gid, 16))
memcpy(mgm->gid, gid->raw, 16);
} else {
link = 1;
index = mthca_alloc(&dev->mcg_table.alloc);
if (index == -1) {
mthca_err(dev, "No AMGM entries left\n");
err = -ENOMEM;
goto out;
}
err = mthca_READ_MGM(dev, index, mailbox);
if (err) {
mthca_err(dev, "READ_MGM failed (%d)\n", err);
goto out;
}
memset(mgm, 0, sizeof *mgm);
memcpy(mgm->gid, gid->raw, 16);
}
for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) {
mthca_dbg(dev, "QP %06x already a member of MGM\n",
ibqp->qp_num);
err = 0;
goto out;
} else if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) {
mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31));
break;
}
if (i == MTHCA_QP_PER_MGM) {
mthca_err(dev, "MGM at index %x is full.\n", index);
err = -ENOMEM;
goto out;
}
err = mthca_WRITE_MGM(dev, index, mailbox);
if (err) {
mthca_err(dev, "WRITE_MGM failed %d\n", err);
err = -EINVAL;
goto out;
}
if (!link)
goto out;
err = mthca_READ_MGM(dev, prev, mailbox);
if (err) {
mthca_err(dev, "READ_MGM failed %d\n", err);
goto out;
}
mgm->next_gid_index = cpu_to_be32(index << 6);
err = mthca_WRITE_MGM(dev, prev, mailbox);
if (err)
mthca_err(dev, "WRITE_MGM returned %d\n", err);
out:
if (err && link && index != -1) {
BUG_ON(index < dev->limits.num_mgms);
mthca_free(&dev->mcg_table.alloc, index);
}
mutex_unlock(&dev->mcg_table.mutex);
mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_mailbox *mailbox;
struct mthca_mgm *mgm;
u16 hash;
int prev, index;
int i, loc;
int err;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
mgm = mailbox->buf;
mutex_lock(&dev->mcg_table.mutex);
err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
if (err)
goto out;
if (index == -1) {
mthca_err(dev, "MGID %pI6 not found\n", gid->raw);
err = -EINVAL;
goto out;
}
for (loc = -1, i = 0; i < MTHCA_QP_PER_MGM; ++i) {
if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31)))
loc = i;
if (!(mgm->qp[i] & cpu_to_be32(1 << 31)))
break;
}
if (loc == -1) {
mthca_err(dev, "QP %06x not found in MGM\n", ibqp->qp_num);
err = -EINVAL;
goto out;
}
mgm->qp[loc] = mgm->qp[i - 1];
mgm->qp[i - 1] = 0;
err = mthca_WRITE_MGM(dev, index, mailbox);
if (err) {
mthca_err(dev, "WRITE_MGM returned %d\n", err);
goto out;
}
if (i != 1)
goto out;
if (prev == -1) {
/* Remove entry from MGM */
int amgm_index_to_free = be32_to_cpu(mgm->next_gid_index) >> 6;
if (amgm_index_to_free) {
err = mthca_READ_MGM(dev, amgm_index_to_free,
mailbox);
if (err) {
mthca_err(dev, "READ_MGM returned %d\n", err);
goto out;
}
} else
memset(mgm->gid, 0, 16);
err = mthca_WRITE_MGM(dev, index, mailbox);
if (err) {
mthca_err(dev, "WRITE_MGM returned %d\n", err);
goto out;
}
if (amgm_index_to_free) {
BUG_ON(amgm_index_to_free < dev->limits.num_mgms);
mthca_free(&dev->mcg_table.alloc, amgm_index_to_free);
}
} else {
/* Remove entry from AMGM */
int curr_next_index = be32_to_cpu(mgm->next_gid_index) >> 6;
err = mthca_READ_MGM(dev, prev, mailbox);
if (err) {
mthca_err(dev, "READ_MGM returned %d\n", err);
goto out;
}
mgm->next_gid_index = cpu_to_be32(curr_next_index << 6);
err = mthca_WRITE_MGM(dev, prev, mailbox);
if (err) {
mthca_err(dev, "WRITE_MGM returned %d\n", err);
goto out;
}
BUG_ON(index < dev->limits.num_mgms);
mthca_free(&dev->mcg_table.alloc, index);
}
out:
mutex_unlock(&dev->mcg_table.mutex);
mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_init_mcg_table(struct mthca_dev *dev)
{
int err;
int table_size = dev->limits.num_mgms + dev->limits.num_amgms;
err = mthca_alloc_init(&dev->mcg_table.alloc,
table_size,
table_size - 1,
dev->limits.num_mgms);
if (err)
return err;
mutex_init(&dev->mcg_table.mutex);
return 0;
}
void mthca_cleanup_mcg_table(struct mthca_dev *dev)
{
mthca_alloc_cleanup(&dev->mcg_table.alloc);
}

View File

@ -0,0 +1,758 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mm.h>
#include <linux/scatterlist.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/page.h>
#include "mthca_memfree.h"
#include "mthca_dev.h"
#include "mthca_cmd.h"
/*
* We allocate in as big chunks as we can, up to a maximum of 256 KB
* per chunk.
*/
enum {
MTHCA_ICM_ALLOC_SIZE = 1 << 18,
MTHCA_TABLE_CHUNK_SIZE = 1 << 18
};
struct mthca_user_db_table {
struct mutex mutex;
struct {
u64 uvirt;
struct scatterlist mem;
int refcount;
} page[0];
};
static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
{
int i;
if (chunk->nsg > 0)
pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
PCI_DMA_BIDIRECTIONAL);
for (i = 0; i < chunk->npages; ++i)
__free_pages(sg_page(&chunk->mem[i]),
get_order(chunk->mem[i].length));
}
static void mthca_free_icm_coherent(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
{
int i;
for (i = 0; i < chunk->npages; ++i) {
dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
lowmem_page_address(sg_page(&chunk->mem[i])),
sg_dma_address(&chunk->mem[i]));
}
}
void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent)
{
struct mthca_icm_chunk *chunk, *tmp;
if (!icm)
return;
list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
if (coherent)
mthca_free_icm_coherent(dev, chunk);
else
mthca_free_icm_pages(dev, chunk);
kfree(chunk);
}
kfree(icm);
}
static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
{
struct page *page;
/*
* Use __GFP_ZERO because buggy firmware assumes ICM pages are
* cleared, and subtle failures are seen if they aren't.
*/
page = alloc_pages(gfp_mask | __GFP_ZERO, order);
if (!page)
return -ENOMEM;
sg_set_page(mem, page, PAGE_SIZE << order, 0);
return 0;
}
static int mthca_alloc_icm_coherent(struct device *dev, struct scatterlist *mem,
int order, gfp_t gfp_mask)
{
void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, &sg_dma_address(mem),
gfp_mask);
if (!buf)
return -ENOMEM;
sg_set_buf(mem, buf, PAGE_SIZE << order);
BUG_ON(mem->offset);
sg_dma_len(mem) = PAGE_SIZE << order;
return 0;
}
struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
gfp_t gfp_mask, int coherent)
{
struct mthca_icm *icm;
struct mthca_icm_chunk *chunk = NULL;
int cur_order;
int ret;
/* We use sg_set_buf for coherent allocs, which assumes low memory */
BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
if (!icm)
return icm;
icm->refcount = 0;
INIT_LIST_HEAD(&icm->chunk_list);
cur_order = get_order(MTHCA_ICM_ALLOC_SIZE);
while (npages > 0) {
if (!chunk) {
chunk = kmalloc(sizeof *chunk,
gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
if (!chunk)
goto fail;
sg_init_table(chunk->mem, MTHCA_ICM_CHUNK_LEN);
chunk->npages = 0;
chunk->nsg = 0;
list_add_tail(&chunk->list, &icm->chunk_list);
}
while (1 << cur_order > npages)
--cur_order;
if (coherent)
ret = mthca_alloc_icm_coherent(&dev->pdev->dev,
&chunk->mem[chunk->npages],
cur_order, gfp_mask);
else
ret = mthca_alloc_icm_pages(&chunk->mem[chunk->npages],
cur_order, gfp_mask);
if (!ret) {
++chunk->npages;
if (coherent)
++chunk->nsg;
else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) {
chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
chunk->npages,
PCI_DMA_BIDIRECTIONAL);
if (chunk->nsg <= 0)
goto fail;
}
if (chunk->npages == MTHCA_ICM_CHUNK_LEN)
chunk = NULL;
npages -= 1 << cur_order;
} else {
--cur_order;
if (cur_order < 0)
goto fail;
}
}
if (!coherent && chunk) {
chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
chunk->npages,
PCI_DMA_BIDIRECTIONAL);
if (chunk->nsg <= 0)
goto fail;
}
return icm;
fail:
mthca_free_icm(dev, icm, coherent);
return NULL;
}
int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
{
int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
int ret = 0;
mutex_lock(&table->mutex);
if (table->icm[i]) {
++table->icm[i]->refcount;
goto out;
}
table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
(table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
__GFP_NOWARN, table->coherent);
if (!table->icm[i]) {
ret = -ENOMEM;
goto out;
}
if (mthca_MAP_ICM(dev, table->icm[i],
table->virt + i * MTHCA_TABLE_CHUNK_SIZE)) {
mthca_free_icm(dev, table->icm[i], table->coherent);
table->icm[i] = NULL;
ret = -ENOMEM;
goto out;
}
++table->icm[i]->refcount;
out:
mutex_unlock(&table->mutex);
return ret;
}
void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
{
int i;
if (!mthca_is_memfree(dev))
return;
i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
mutex_lock(&table->mutex);
if (--table->icm[i]->refcount == 0) {
mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE);
mthca_free_icm(dev, table->icm[i], table->coherent);
table->icm[i] = NULL;
}
mutex_unlock(&table->mutex);
}
void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle)
{
int idx, offset, dma_offset, i;
struct mthca_icm_chunk *chunk;
struct mthca_icm *icm;
struct page *page = NULL;
if (!table->lowmem)
return NULL;
mutex_lock(&table->mutex);
idx = (obj & (table->num_obj - 1)) * table->obj_size;
icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE];
dma_offset = offset = idx % MTHCA_TABLE_CHUNK_SIZE;
if (!icm)
goto out;
list_for_each_entry(chunk, &icm->chunk_list, list) {
for (i = 0; i < chunk->npages; ++i) {
if (dma_handle && dma_offset >= 0) {
if (sg_dma_len(&chunk->mem[i]) > dma_offset)
*dma_handle = sg_dma_address(&chunk->mem[i]) +
dma_offset;
dma_offset -= sg_dma_len(&chunk->mem[i]);
}
/* DMA mapping can merge pages but not split them,
* so if we found the page, dma_handle has already
* been assigned to. */
if (chunk->mem[i].length > offset) {
page = sg_page(&chunk->mem[i]);
goto out;
}
offset -= chunk->mem[i].length;
}
}
out:
mutex_unlock(&table->mutex);
return page ? lowmem_page_address(page) + offset : NULL;
}
int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
int start, int end)
{
int inc = MTHCA_TABLE_CHUNK_SIZE / table->obj_size;
int i, err;
for (i = start; i <= end; i += inc) {
err = mthca_table_get(dev, table, i);
if (err)
goto fail;
}
return 0;
fail:
while (i > start) {
i -= inc;
mthca_table_put(dev, table, i);
}
return err;
}
void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
int start, int end)
{
int i;
if (!mthca_is_memfree(dev))
return;
for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size)
mthca_table_put(dev, table, i);
}
struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
u64 virt, int obj_size,
int nobj, int reserved,
int use_lowmem, int use_coherent)
{
struct mthca_icm_table *table;
int obj_per_chunk;
int num_icm;
unsigned chunk_size;
int i;
obj_per_chunk = MTHCA_TABLE_CHUNK_SIZE / obj_size;
num_icm = DIV_ROUND_UP(nobj, obj_per_chunk);
table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL);
if (!table)
return NULL;
table->virt = virt;
table->num_icm = num_icm;
table->num_obj = nobj;
table->obj_size = obj_size;
table->lowmem = use_lowmem;
table->coherent = use_coherent;
mutex_init(&table->mutex);
for (i = 0; i < num_icm; ++i)
table->icm[i] = NULL;
for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
chunk_size = MTHCA_TABLE_CHUNK_SIZE;
if ((i + 1) * MTHCA_TABLE_CHUNK_SIZE > nobj * obj_size)
chunk_size = nobj * obj_size - i * MTHCA_TABLE_CHUNK_SIZE;
table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
(use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
__GFP_NOWARN, use_coherent);
if (!table->icm[i])
goto err;
if (mthca_MAP_ICM(dev, table->icm[i],
virt + i * MTHCA_TABLE_CHUNK_SIZE)) {
mthca_free_icm(dev, table->icm[i], table->coherent);
table->icm[i] = NULL;
goto err;
}
/*
* Add a reference to this ICM chunk so that it never
* gets freed (since it contains reserved firmware objects).
*/
++table->icm[i]->refcount;
}
return table;
err:
for (i = 0; i < num_icm; ++i)
if (table->icm[i]) {
mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE);
mthca_free_icm(dev, table->icm[i], table->coherent);
}
kfree(table);
return NULL;
}
void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
{
int i;
for (i = 0; i < table->num_icm; ++i)
if (table->icm[i]) {
mthca_UNMAP_ICM(dev,
table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE);
mthca_free_icm(dev, table->icm[i], table->coherent);
}
kfree(table);
}
static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page)
{
return dev->uar_table.uarc_base +
uar->index * dev->uar_table.uarc_size +
page * MTHCA_ICM_PAGE_SIZE;
}
int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
struct mthca_user_db_table *db_tab, int index, u64 uaddr)
{
struct page *pages[1];
int ret = 0;
int i;
if (!mthca_is_memfree(dev))
return 0;
if (index < 0 || index > dev->uar_table.uarc_size / 8)
return -EINVAL;
mutex_lock(&db_tab->mutex);
i = index / MTHCA_DB_REC_PER_PAGE;
if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) ||
(db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) ||
(uaddr & 4095)) {
ret = -EINVAL;
goto out;
}
if (db_tab->page[i].refcount) {
++db_tab->page[i].refcount;
goto out;
}
ret = get_user_pages(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages, NULL);
if (ret < 0)
goto out;
sg_set_page(&db_tab->page[i].mem, pages[0], MTHCA_ICM_PAGE_SIZE,
uaddr & ~PAGE_MASK);
ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
if (ret < 0) {
put_page(pages[0]);
goto out;
}
ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
mthca_uarc_virt(dev, uar, i));
if (ret) {
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
put_page(sg_page(&db_tab->page[i].mem));
goto out;
}
db_tab->page[i].uvirt = uaddr;
db_tab->page[i].refcount = 1;
out:
mutex_unlock(&db_tab->mutex);
return ret;
}
void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
struct mthca_user_db_table *db_tab, int index)
{
if (!mthca_is_memfree(dev))
return;
/*
* To make our bookkeeping simpler, we don't unmap DB
* pages until we clean up the whole db table.
*/
mutex_lock(&db_tab->mutex);
--db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount;
mutex_unlock(&db_tab->mutex);
}
struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
{
struct mthca_user_db_table *db_tab;
int npages;
int i;
if (!mthca_is_memfree(dev))
return NULL;
npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
if (!db_tab)
return ERR_PTR(-ENOMEM);
mutex_init(&db_tab->mutex);
for (i = 0; i < npages; ++i) {
db_tab->page[i].refcount = 0;
db_tab->page[i].uvirt = 0;
sg_init_table(&db_tab->page[i].mem, 1);
}
return db_tab;
}
void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
struct mthca_user_db_table *db_tab)
{
int i;
if (!mthca_is_memfree(dev))
return;
for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) {
if (db_tab->page[i].uvirt) {
mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1);
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
put_page(sg_page(&db_tab->page[i].mem));
}
}
kfree(db_tab);
}
int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
u32 qn, __be32 **db)
{
int group;
int start, end, dir;
int i, j;
struct mthca_db_page *page;
int ret = 0;
mutex_lock(&dev->db_tab->mutex);
switch (type) {
case MTHCA_DB_TYPE_CQ_ARM:
case MTHCA_DB_TYPE_SQ:
group = 0;
start = 0;
end = dev->db_tab->max_group1;
dir = 1;
break;
case MTHCA_DB_TYPE_CQ_SET_CI:
case MTHCA_DB_TYPE_RQ:
case MTHCA_DB_TYPE_SRQ:
group = 1;
start = dev->db_tab->npages - 1;
end = dev->db_tab->min_group2;
dir = -1;
break;
default:
ret = -EINVAL;
goto out;
}
for (i = start; i != end; i += dir)
if (dev->db_tab->page[i].db_rec &&
!bitmap_full(dev->db_tab->page[i].used,
MTHCA_DB_REC_PER_PAGE)) {
page = dev->db_tab->page + i;
goto found;
}
for (i = start; i != end; i += dir)
if (!dev->db_tab->page[i].db_rec) {
page = dev->db_tab->page + i;
goto alloc;
}
if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) {
ret = -ENOMEM;
goto out;
}
if (group == 0)
++dev->db_tab->max_group1;
else
--dev->db_tab->min_group2;
page = dev->db_tab->page + end;
alloc:
page->db_rec = dma_alloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
&page->mapping, GFP_KERNEL);
if (!page->db_rec) {
ret = -ENOMEM;
goto out;
}
memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE);
ret = mthca_MAP_ICM_page(dev, page->mapping,
mthca_uarc_virt(dev, &dev->driver_uar, i));
if (ret) {
dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
page->db_rec, page->mapping);
goto out;
}
bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE);
found:
j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE);
set_bit(j, page->used);
if (group == 1)
j = MTHCA_DB_REC_PER_PAGE - 1 - j;
ret = i * MTHCA_DB_REC_PER_PAGE + j;
page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5));
*db = (__be32 *) &page->db_rec[j];
out:
mutex_unlock(&dev->db_tab->mutex);
return ret;
}
void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
{
int i, j;
struct mthca_db_page *page;
i = db_index / MTHCA_DB_REC_PER_PAGE;
j = db_index % MTHCA_DB_REC_PER_PAGE;
page = dev->db_tab->page + i;
mutex_lock(&dev->db_tab->mutex);
page->db_rec[j] = 0;
if (i >= dev->db_tab->min_group2)
j = MTHCA_DB_REC_PER_PAGE - 1 - j;
clear_bit(j, page->used);
if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
i >= dev->db_tab->max_group1 - 1) {
mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1);
dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
page->db_rec, page->mapping);
page->db_rec = NULL;
if (i == dev->db_tab->max_group1) {
--dev->db_tab->max_group1;
/* XXX may be able to unmap more pages now */
}
if (i == dev->db_tab->min_group2)
++dev->db_tab->min_group2;
}
mutex_unlock(&dev->db_tab->mutex);
}
int mthca_init_db_tab(struct mthca_dev *dev)
{
int i;
if (!mthca_is_memfree(dev))
return 0;
dev->db_tab = kmalloc(sizeof *dev->db_tab, GFP_KERNEL);
if (!dev->db_tab)
return -ENOMEM;
mutex_init(&dev->db_tab->mutex);
dev->db_tab->npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
dev->db_tab->max_group1 = 0;
dev->db_tab->min_group2 = dev->db_tab->npages - 1;
dev->db_tab->page = kmalloc(dev->db_tab->npages *
sizeof *dev->db_tab->page,
GFP_KERNEL);
if (!dev->db_tab->page) {
kfree(dev->db_tab);
return -ENOMEM;
}
for (i = 0; i < dev->db_tab->npages; ++i)
dev->db_tab->page[i].db_rec = NULL;
return 0;
}
void mthca_cleanup_db_tab(struct mthca_dev *dev)
{
int i;
if (!mthca_is_memfree(dev))
return;
/*
* Because we don't always free our UARC pages when they
* become empty to make mthca_free_db() simpler we need to
* make a sweep through the doorbell pages and free any
* leftover pages now.
*/
for (i = 0; i < dev->db_tab->npages; ++i) {
if (!dev->db_tab->page[i].db_rec)
continue;
if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1);
dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
dev->db_tab->page[i].db_rec,
dev->db_tab->page[i].mapping);
}
kfree(dev->db_tab->page);
kfree(dev->db_tab);
}

View File

@ -0,0 +1,179 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MTHCA_MEMFREE_H
#define MTHCA_MEMFREE_H
#include <linux/list.h>
#include <linux/mutex.h>
#define MTHCA_ICM_CHUNK_LEN \
((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \
(sizeof (struct scatterlist)))
enum {
MTHCA_ICM_PAGE_SHIFT = 12,
MTHCA_ICM_PAGE_SIZE = 1 << MTHCA_ICM_PAGE_SHIFT,
MTHCA_DB_REC_PER_PAGE = MTHCA_ICM_PAGE_SIZE / 8
};
struct mthca_icm_chunk {
struct list_head list;
int npages;
int nsg;
struct scatterlist mem[MTHCA_ICM_CHUNK_LEN];
};
struct mthca_icm {
struct list_head chunk_list;
int refcount;
};
struct mthca_icm_table {
u64 virt;
int num_icm;
int num_obj;
int obj_size;
int lowmem;
int coherent;
struct mutex mutex;
struct mthca_icm *icm[0];
};
struct mthca_icm_iter {
struct mthca_icm *icm;
struct mthca_icm_chunk *chunk;
int page_idx;
};
struct mthca_dev;
struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
gfp_t gfp_mask, int coherent);
void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent);
struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
u64 virt, int obj_size,
int nobj, int reserved,
int use_lowmem, int use_coherent);
void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table);
int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle);
int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
int start, int end);
void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
int start, int end);
static inline void mthca_icm_first(struct mthca_icm *icm,
struct mthca_icm_iter *iter)
{
iter->icm = icm;
iter->chunk = list_empty(&icm->chunk_list) ?
NULL : list_entry(icm->chunk_list.next,
struct mthca_icm_chunk, list);
iter->page_idx = 0;
}
static inline int mthca_icm_last(struct mthca_icm_iter *iter)
{
return !iter->chunk;
}
static inline void mthca_icm_next(struct mthca_icm_iter *iter)
{
if (++iter->page_idx >= iter->chunk->nsg) {
if (iter->chunk->list.next == &iter->icm->chunk_list) {
iter->chunk = NULL;
return;
}
iter->chunk = list_entry(iter->chunk->list.next,
struct mthca_icm_chunk, list);
iter->page_idx = 0;
}
}
static inline dma_addr_t mthca_icm_addr(struct mthca_icm_iter *iter)
{
return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
}
static inline unsigned long mthca_icm_size(struct mthca_icm_iter *iter)
{
return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
}
struct mthca_db_page {
DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE);
__be64 *db_rec;
dma_addr_t mapping;
};
struct mthca_db_table {
int npages;
int max_group1;
int min_group2;
struct mthca_db_page *page;
struct mutex mutex;
};
enum mthca_db_type {
MTHCA_DB_TYPE_INVALID = 0x0,
MTHCA_DB_TYPE_CQ_SET_CI = 0x1,
MTHCA_DB_TYPE_CQ_ARM = 0x2,
MTHCA_DB_TYPE_SQ = 0x3,
MTHCA_DB_TYPE_RQ = 0x4,
MTHCA_DB_TYPE_SRQ = 0x5,
MTHCA_DB_TYPE_GROUP_SEP = 0x7
};
struct mthca_user_db_table;
struct mthca_uar;
int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
struct mthca_user_db_table *db_tab, int index, u64 uaddr);
void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
struct mthca_user_db_table *db_tab, int index);
struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev);
void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
struct mthca_user_db_table *db_tab);
int mthca_init_db_tab(struct mthca_dev *dev);
void mthca_cleanup_db_tab(struct mthca_dev *dev);
int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
u32 qn, __be32 **db);
void mthca_free_db(struct mthca_dev *dev, int type, int db_index);
#endif /* MTHCA_MEMFREE_H */

965
sys/dev/mthca/mthca_mr.c Normal file
View File

@ -0,0 +1,965 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/slab.h>
#include <linux/errno.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
#include "mthca_memfree.h"
struct mthca_mtt {
struct mthca_buddy *buddy;
int order;
u32 first_seg;
};
/*
* Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
*/
struct mthca_mpt_entry {
__be32 flags;
__be32 page_size;
__be32 key;
__be32 pd;
__be64 start;
__be64 length;
__be32 lkey;
__be32 window_count;
__be32 window_count_limit;
__be64 mtt_seg;
__be32 mtt_sz; /* Arbel only */
u32 reserved[2];
} __attribute__((packed));
#define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28)
#define MTHCA_MPT_FLAG_MIO (1 << 17)
#define MTHCA_MPT_FLAG_BIND_ENABLE (1 << 15)
#define MTHCA_MPT_FLAG_PHYSICAL (1 << 9)
#define MTHCA_MPT_FLAG_REGION (1 << 8)
#define MTHCA_MTT_FLAG_PRESENT 1
#define MTHCA_MPT_STATUS_SW 0xF0
#define MTHCA_MPT_STATUS_HW 0x00
#define SINAI_FMR_KEY_INC 0x1000000
/*
* Buddy allocator for MTT segments (currently not very efficient
* since it doesn't keep a free list and just searches linearly
* through the bitmaps)
*/
static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
{
int o;
int m;
u32 seg;
spin_lock(&buddy->lock);
for (o = order; o <= buddy->max_order; ++o)
if (buddy->num_free[o]) {
m = 1 << (buddy->max_order - o);
seg = find_first_bit(buddy->bits[o], m);
if (seg < m)
goto found;
}
spin_unlock(&buddy->lock);
return -1;
found:
clear_bit(seg, buddy->bits[o]);
--buddy->num_free[o];
while (o > order) {
--o;
seg <<= 1;
set_bit(seg ^ 1, buddy->bits[o]);
++buddy->num_free[o];
}
spin_unlock(&buddy->lock);
seg <<= order;
return seg;
}
static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
{
seg >>= order;
spin_lock(&buddy->lock);
while (test_bit(seg ^ 1, buddy->bits[order])) {
clear_bit(seg ^ 1, buddy->bits[order]);
--buddy->num_free[order];
seg >>= 1;
++order;
}
set_bit(seg, buddy->bits[order]);
++buddy->num_free[order];
spin_unlock(&buddy->lock);
}
static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
{
int i, s;
buddy->max_order = max_order;
spin_lock_init(&buddy->lock);
buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
GFP_KERNEL);
buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free,
GFP_KERNEL);
if (!buddy->bits || !buddy->num_free)
goto err_out;
for (i = 0; i <= buddy->max_order; ++i) {
s = BITS_TO_LONGS(1 << (buddy->max_order - i));
buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
if (!buddy->bits[i])
goto err_out_free;
bitmap_zero(buddy->bits[i],
1 << (buddy->max_order - i));
}
set_bit(0, buddy->bits[buddy->max_order]);
buddy->num_free[buddy->max_order] = 1;
return 0;
err_out_free:
for (i = 0; i <= buddy->max_order; ++i)
kfree(buddy->bits[i]);
err_out:
kfree(buddy->bits);
kfree(buddy->num_free);
return -ENOMEM;
}
static void mthca_buddy_cleanup(struct mthca_buddy *buddy)
{
int i;
for (i = 0; i <= buddy->max_order; ++i)
kfree(buddy->bits[i]);
kfree(buddy->bits);
kfree(buddy->num_free);
}
static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
struct mthca_buddy *buddy)
{
u32 seg = mthca_buddy_alloc(buddy, order);
if (seg == -1)
return -1;
if (mthca_is_memfree(dev))
if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg,
seg + (1 << order) - 1)) {
mthca_buddy_free(buddy, seg, order);
seg = -1;
}
return seg;
}
static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
struct mthca_buddy *buddy)
{
struct mthca_mtt *mtt;
int i;
if (size <= 0)
return ERR_PTR(-EINVAL);
mtt = kmalloc(sizeof *mtt, GFP_KERNEL);
if (!mtt)
return ERR_PTR(-ENOMEM);
mtt->buddy = buddy;
mtt->order = 0;
for (i = dev->limits.mtt_seg_size / 8; i < size; i <<= 1)
++mtt->order;
mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
if (mtt->first_seg == -1) {
kfree(mtt);
return ERR_PTR(-ENOMEM);
}
return mtt;
}
struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size)
{
return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy);
}
void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
{
if (!mtt)
return;
mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order);
mthca_table_put_range(dev, dev->mr_table.mtt_table,
mtt->first_seg,
mtt->first_seg + (1 << mtt->order) - 1);
kfree(mtt);
}
static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
int start_index, u64 *buffer_list, int list_len)
{
struct mthca_mailbox *mailbox;
__be64 *mtt_entry;
int err = 0;
int i;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
mtt_entry = mailbox->buf;
while (list_len > 0) {
mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
mtt->first_seg * dev->limits.mtt_seg_size +
start_index * 8);
mtt_entry[1] = 0;
for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
MTHCA_MTT_FLAG_PRESENT);
/*
* If we have an odd number of entries to write, add
* one more dummy entry for firmware efficiency.
*/
if (i & 1)
mtt_entry[i + 2] = 0;
err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1);
if (err) {
mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
goto out;
}
list_len -= i;
start_index += i;
buffer_list += i;
}
out:
mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_write_mtt_size(struct mthca_dev *dev)
{
if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
!(dev->mthca_flags & MTHCA_FLAG_FMR))
/*
* Be friendly to WRITE_MTT command
* and leave two empty slots for the
* index and reserved fields of the
* mailbox.
*/
return PAGE_SIZE / sizeof (u64) - 2;
/* For Arbel, all MTTs must fit in the same page. */
return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff;
}
static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev,
struct mthca_mtt *mtt, int start_index,
u64 *buffer_list, int list_len)
{
u64 __iomem *mtts;
int i;
mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * dev->limits.mtt_seg_size +
start_index * sizeof (u64);
for (i = 0; i < list_len; ++i)
mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT),
mtts + i);
}
static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev,
struct mthca_mtt *mtt, int start_index,
u64 *buffer_list, int list_len)
{
__be64 *mtts;
dma_addr_t dma_handle;
int i;
int s = start_index * sizeof (u64);
/* For Arbel, all MTTs must fit in the same page. */
BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
/* Require full segments */
BUG_ON(s % dev->limits.mtt_seg_size);
mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
s / dev->limits.mtt_seg_size, &dma_handle);
BUG_ON(!mtts);
dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle,
list_len * sizeof (u64), DMA_TO_DEVICE);
for (i = 0; i < list_len; ++i)
mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT);
dma_sync_single_for_device(&dev->pdev->dev, dma_handle,
list_len * sizeof (u64), DMA_TO_DEVICE);
}
int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
int start_index, u64 *buffer_list, int list_len)
{
int size = mthca_write_mtt_size(dev);
int chunk;
if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
!(dev->mthca_flags & MTHCA_FLAG_FMR))
return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
while (list_len > 0) {
chunk = min(size, list_len);
if (mthca_is_memfree(dev))
mthca_arbel_write_mtt_seg(dev, mtt, start_index,
buffer_list, chunk);
else
mthca_tavor_write_mtt_seg(dev, mtt, start_index,
buffer_list, chunk);
list_len -= chunk;
start_index += chunk;
buffer_list += chunk;
}
return 0;
}
static inline u32 tavor_hw_index_to_key(u32 ind)
{
return ind;
}
static inline u32 tavor_key_to_hw_index(u32 key)
{
return key;
}
static inline u32 arbel_hw_index_to_key(u32 ind)
{
return (ind >> 24) | (ind << 8);
}
static inline u32 arbel_key_to_hw_index(u32 key)
{
return (key << 24) | (key >> 8);
}
static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
{
if (mthca_is_memfree(dev))
return arbel_hw_index_to_key(ind);
else
return tavor_hw_index_to_key(ind);
}
static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
{
if (mthca_is_memfree(dev))
return arbel_key_to_hw_index(key);
else
return tavor_key_to_hw_index(key);
}
static inline u32 adjust_key(struct mthca_dev *dev, u32 key)
{
if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
return ((key << 20) & 0x800000) | (key & 0x7fffff);
else
return key;
}
int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
{
struct mthca_mailbox *mailbox;
struct mthca_mpt_entry *mpt_entry;
u32 key;
int i;
int err;
WARN_ON(buffer_size_shift >= 32);
key = mthca_alloc(&dev->mr_table.mpt_alloc);
if (key == -1)
return -ENOMEM;
key = adjust_key(dev, key);
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
if (mthca_is_memfree(dev)) {
err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
if (err)
goto err_out_mpt_free;
}
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox)) {
err = PTR_ERR(mailbox);
goto err_out_table;
}
mpt_entry = mailbox->buf;
mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
MTHCA_MPT_FLAG_MIO |
MTHCA_MPT_FLAG_REGION |
access);
if (!mr->mtt)
mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL);
mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
mpt_entry->key = cpu_to_be32(key);
mpt_entry->pd = cpu_to_be32(pd);
mpt_entry->start = cpu_to_be64(iova);
mpt_entry->length = cpu_to_be64(total_size);
memset(&mpt_entry->lkey, 0,
sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
if (mr->mtt)
mpt_entry->mtt_seg =
cpu_to_be64(dev->mr_table.mtt_base +
mr->mtt->first_seg * dev->limits.mtt_seg_size);
if (0) {
mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
if (i % 4 == 0)
printk("[%02x] ", i * 4);
printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
if ((i + 1) % 4 == 0)
printk("\n");
}
}
err = mthca_SW2HW_MPT(dev, mailbox,
key & (dev->limits.num_mpts - 1));
if (err) {
mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
goto err_out_mailbox;
}
mthca_free_mailbox(dev, mailbox);
return err;
err_out_mailbox:
mthca_free_mailbox(dev, mailbox);
err_out_table:
mthca_table_put(dev, dev->mr_table.mpt_table, key);
err_out_mpt_free:
mthca_free(&dev->mr_table.mpt_alloc, key);
return err;
}
int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
u32 access, struct mthca_mr *mr)
{
mr->mtt = NULL;
return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr);
}
int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
u64 *buffer_list, int buffer_size_shift,
int list_len, u64 iova, u64 total_size,
u32 access, struct mthca_mr *mr)
{
int err;
mr->mtt = mthca_alloc_mtt(dev, list_len);
if (IS_ERR(mr->mtt))
return PTR_ERR(mr->mtt);
err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len);
if (err) {
mthca_free_mtt(dev, mr->mtt);
return err;
}
err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova,
total_size, access, mr);
if (err)
mthca_free_mtt(dev, mr->mtt);
return err;
}
/* Free mr or fmr */
static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
{
mthca_table_put(dev, dev->mr_table.mpt_table,
key_to_hw_index(dev, lkey));
mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
}
void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
{
int err;
err = mthca_HW2SW_MPT(dev, NULL,
key_to_hw_index(dev, mr->ibmr.lkey) &
(dev->limits.num_mpts - 1));
if (err)
mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err);
mthca_free_region(dev, mr->ibmr.lkey);
mthca_free_mtt(dev, mr->mtt);
}
int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
u32 access, struct mthca_fmr *mr)
{
struct mthca_mpt_entry *mpt_entry;
struct mthca_mailbox *mailbox;
u64 mtt_seg;
u32 key, idx;
int list_len = mr->attr.max_pages;
int err = -ENOMEM;
int i;
if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32)
return -EINVAL;
/* For Arbel, all MTTs must fit in the same page. */
if (mthca_is_memfree(dev) &&
mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE)
return -EINVAL;
mr->maps = 0;
key = mthca_alloc(&dev->mr_table.mpt_alloc);
if (key == -1)
return -ENOMEM;
key = adjust_key(dev, key);
idx = key & (dev->limits.num_mpts - 1);
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
if (mthca_is_memfree(dev)) {
err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
if (err)
goto err_out_mpt_free;
mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key, NULL);
BUG_ON(!mr->mem.arbel.mpt);
} else
mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
sizeof *(mr->mem.tavor.mpt) * idx;
mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
if (IS_ERR(mr->mtt)) {
err = PTR_ERR(mr->mtt);
goto err_out_table;
}
mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size;
if (mthca_is_memfree(dev)) {
mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
mr->mtt->first_seg,
&mr->mem.arbel.dma_handle);
BUG_ON(!mr->mem.arbel.mtts);
} else
mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox)) {
err = PTR_ERR(mailbox);
goto err_out_free_mtt;
}
mpt_entry = mailbox->buf;
mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
MTHCA_MPT_FLAG_MIO |
MTHCA_MPT_FLAG_REGION |
access);
mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12);
mpt_entry->key = cpu_to_be32(key);
mpt_entry->pd = cpu_to_be32(pd);
memset(&mpt_entry->start, 0,
sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg);
if (0) {
mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
if (i % 4 == 0)
printk("[%02x] ", i * 4);
printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
if ((i + 1) % 4 == 0)
printk("\n");
}
}
err = mthca_SW2HW_MPT(dev, mailbox,
key & (dev->limits.num_mpts - 1));
if (err) {
mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
goto err_out_mailbox_free;
}
mthca_free_mailbox(dev, mailbox);
return 0;
err_out_mailbox_free:
mthca_free_mailbox(dev, mailbox);
err_out_free_mtt:
mthca_free_mtt(dev, mr->mtt);
err_out_table:
mthca_table_put(dev, dev->mr_table.mpt_table, key);
err_out_mpt_free:
mthca_free(&dev->mr_table.mpt_alloc, key);
return err;
}
int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
{
if (fmr->maps)
return -EBUSY;
mthca_free_region(dev, fmr->ibmr.lkey);
mthca_free_mtt(dev, fmr->mtt);
return 0;
}
static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
int list_len, u64 iova)
{
int i, page_mask;
if (list_len > fmr->attr.max_pages)
return -EINVAL;
page_mask = (1 << fmr->attr.page_shift) - 1;
/* We are getting page lists, so va must be page aligned. */
if (iova & page_mask)
return -EINVAL;
/* Trust the user not to pass misaligned data in page_list */
if (0)
for (i = 0; i < list_len; ++i) {
if (page_list[i] & ~page_mask)
return -EINVAL;
}
if (fmr->maps >= fmr->attr.max_maps)
return -EINVAL;
return 0;
}
int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
int list_len, u64 iova)
{
struct mthca_fmr *fmr = to_mfmr(ibfmr);
struct mthca_dev *dev = to_mdev(ibfmr->device);
struct mthca_mpt_entry mpt_entry;
u32 key;
int i, err;
err = mthca_check_fmr(fmr, page_list, list_len, iova);
if (err)
return err;
++fmr->maps;
key = tavor_key_to_hw_index(fmr->ibmr.lkey);
key += dev->limits.num_mpts;
fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
for (i = 0; i < list_len; ++i) {
__be64 mtt_entry = cpu_to_be64(page_list[i] |
MTHCA_MTT_FLAG_PRESENT);
mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i);
}
mpt_entry.lkey = cpu_to_be32(key);
mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
mpt_entry.start = cpu_to_be64(iova);
__raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
offsetof(struct mthca_mpt_entry, window_count) -
offsetof(struct mthca_mpt_entry, start));
writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt);
return 0;
}
int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
int list_len, u64 iova)
{
struct mthca_fmr *fmr = to_mfmr(ibfmr);
struct mthca_dev *dev = to_mdev(ibfmr->device);
u32 key;
int i, err;
err = mthca_check_fmr(fmr, page_list, list_len, iova);
if (err)
return err;
++fmr->maps;
key = arbel_key_to_hw_index(fmr->ibmr.lkey);
if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
key += SINAI_FMR_KEY_INC;
else
key += dev->limits.num_mpts;
fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
wmb();
dma_sync_single_for_cpu(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
list_len * sizeof(u64), DMA_TO_DEVICE);
for (i = 0; i < list_len; ++i)
fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
MTHCA_MTT_FLAG_PRESENT);
dma_sync_single_for_device(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
list_len * sizeof(u64), DMA_TO_DEVICE);
fmr->mem.arbel.mpt->key = cpu_to_be32(key);
fmr->mem.arbel.mpt->lkey = cpu_to_be32(key);
fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
fmr->mem.arbel.mpt->start = cpu_to_be64(iova);
wmb();
*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW;
wmb();
return 0;
}
void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
{
if (!fmr->maps)
return;
fmr->maps = 0;
writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
}
void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
{
if (!fmr->maps)
return;
fmr->maps = 0;
*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
}
int mthca_init_mr_table(struct mthca_dev *dev)
{
phys_addr_t addr;
int mpts, mtts, err, i;
err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
dev->limits.num_mpts,
~0, dev->limits.reserved_mrws);
if (err)
return err;
if (!mthca_is_memfree(dev) &&
(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN))
dev->limits.fmr_reserved_mtts = 0;
else
dev->mthca_flags |= MTHCA_FLAG_FMR;
if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
mthca_dbg(dev, "Memory key throughput optimization activated.\n");
err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
fls(dev->limits.num_mtt_segs - 1));
if (err)
goto err_mtt_buddy;
dev->mr_table.tavor_fmr.mpt_base = NULL;
dev->mr_table.tavor_fmr.mtt_base = NULL;
if (dev->limits.fmr_reserved_mtts) {
i = fls(dev->limits.fmr_reserved_mtts - 1);
if (i >= 31) {
mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n");
err = -EINVAL;
goto err_fmr_mpt;
}
mpts = mtts = 1 << i;
} else {
mtts = dev->limits.num_mtt_segs;
mpts = dev->limits.num_mpts;
}
if (!mthca_is_memfree(dev) &&
(dev->mthca_flags & MTHCA_FLAG_FMR)) {
addr = pci_resource_start(dev->pdev, 4) +
((pci_resource_len(dev->pdev, 4) - 1) &
dev->mr_table.mpt_base);
dev->mr_table.tavor_fmr.mpt_base =
ioremap(addr, mpts * sizeof(struct mthca_mpt_entry));
if (!dev->mr_table.tavor_fmr.mpt_base) {
mthca_warn(dev, "MPT ioremap for FMR failed.\n");
err = -ENOMEM;
goto err_fmr_mpt;
}
addr = pci_resource_start(dev->pdev, 4) +
((pci_resource_len(dev->pdev, 4) - 1) &
dev->mr_table.mtt_base);
dev->mr_table.tavor_fmr.mtt_base =
ioremap(addr, mtts * dev->limits.mtt_seg_size);
if (!dev->mr_table.tavor_fmr.mtt_base) {
mthca_warn(dev, "MTT ioremap for FMR failed.\n");
err = -ENOMEM;
goto err_fmr_mtt;
}
}
if (dev->limits.fmr_reserved_mtts) {
err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, fls(mtts - 1));
if (err)
goto err_fmr_mtt_buddy;
/* Prevent regular MRs from using FMR keys */
err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, fls(mtts - 1));
if (err)
goto err_reserve_fmr;
dev->mr_table.fmr_mtt_buddy =
&dev->mr_table.tavor_fmr.mtt_buddy;
} else
dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
/* FMR table is always the first, take reserved MTTs out of there */
if (dev->limits.reserved_mtts) {
i = fls(dev->limits.reserved_mtts - 1);
if (mthca_alloc_mtt_range(dev, i,
dev->mr_table.fmr_mtt_buddy) == -1) {
mthca_warn(dev, "MTT table of order %d is too small.\n",
dev->mr_table.fmr_mtt_buddy->max_order);
err = -ENOMEM;
goto err_reserve_mtts;
}
}
return 0;
err_reserve_mtts:
err_reserve_fmr:
if (dev->limits.fmr_reserved_mtts)
mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
err_fmr_mtt_buddy:
if (dev->mr_table.tavor_fmr.mtt_base)
iounmap(dev->mr_table.tavor_fmr.mtt_base);
err_fmr_mtt:
if (dev->mr_table.tavor_fmr.mpt_base)
iounmap(dev->mr_table.tavor_fmr.mpt_base);
err_fmr_mpt:
mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
err_mtt_buddy:
mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
return err;
}
void mthca_cleanup_mr_table(struct mthca_dev *dev)
{
/* XXX check if any MRs are still allocated? */
if (dev->limits.fmr_reserved_mtts)
mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
if (dev->mr_table.tavor_fmr.mtt_base)
iounmap(dev->mr_table.tavor_fmr.mtt_base);
if (dev->mr_table.tavor_fmr.mpt_base)
iounmap(dev->mr_table.tavor_fmr.mpt_base);
mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
}

81
sys/dev/mthca/mthca_pd.c Normal file
View File

@ -0,0 +1,81 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/errno.h>
#include "mthca_dev.h"
int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd)
{
int err = 0;
pd->privileged = privileged;
atomic_set(&pd->sqp_count, 0);
pd->pd_num = mthca_alloc(&dev->pd_table.alloc);
if (pd->pd_num == -1)
return -ENOMEM;
if (privileged) {
err = mthca_mr_alloc_notrans(dev, pd->pd_num,
MTHCA_MPT_FLAG_LOCAL_READ |
MTHCA_MPT_FLAG_LOCAL_WRITE,
&pd->ntmr);
if (err)
mthca_free(&dev->pd_table.alloc, pd->pd_num);
}
return err;
}
void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd)
{
if (pd->privileged)
mthca_free_mr(dev, &pd->ntmr);
mthca_free(&dev->pd_table.alloc, pd->pd_num);
}
int mthca_init_pd_table(struct mthca_dev *dev)
{
return mthca_alloc_init(&dev->pd_table.alloc,
dev->limits.num_pds,
(1 << 24) - 1,
dev->limits.reserved_pds);
}
void mthca_cleanup_pd_table(struct mthca_dev *dev)
{
/* XXX check if any PDs are still allocated? */
mthca_alloc_cleanup(&dev->pd_table.alloc);
}

View File

@ -0,0 +1,281 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/string.h>
#include <linux/slab.h>
#include "mthca_profile.h"
enum {
MTHCA_RES_QP,
MTHCA_RES_EEC,
MTHCA_RES_SRQ,
MTHCA_RES_CQ,
MTHCA_RES_EQP,
MTHCA_RES_EEEC,
MTHCA_RES_EQ,
MTHCA_RES_RDB,
MTHCA_RES_MCG,
MTHCA_RES_MPT,
MTHCA_RES_MTT,
MTHCA_RES_UAR,
MTHCA_RES_UDAV,
MTHCA_RES_UARC,
MTHCA_RES_NUM
};
enum {
MTHCA_NUM_EQS = 32,
MTHCA_NUM_PDS = 1 << 15
};
s64 mthca_make_profile(struct mthca_dev *dev,
struct mthca_profile *request,
struct mthca_dev_lim *dev_lim,
struct mthca_init_hca_param *init_hca)
{
struct mthca_resource {
u64 size;
u64 start;
int type;
int num;
int log_num;
};
u64 mem_base, mem_avail;
s64 total_size = 0;
struct mthca_resource *profile;
int i, j;
profile = kzalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL);
if (!profile)
return -ENOMEM;
profile[MTHCA_RES_QP].size = dev_lim->qpc_entry_sz;
profile[MTHCA_RES_EEC].size = dev_lim->eec_entry_sz;
profile[MTHCA_RES_SRQ].size = dev_lim->srq_entry_sz;
profile[MTHCA_RES_CQ].size = dev_lim->cqc_entry_sz;
profile[MTHCA_RES_EQP].size = dev_lim->eqpc_entry_sz;
profile[MTHCA_RES_EEEC].size = dev_lim->eeec_entry_sz;
profile[MTHCA_RES_EQ].size = dev_lim->eqc_entry_sz;
profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE;
profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE;
profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz;
profile[MTHCA_RES_MTT].size = dev->limits.mtt_seg_size;
profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz;
profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE;
profile[MTHCA_RES_UARC].size = request->uarc_size;
profile[MTHCA_RES_QP].num = request->num_qp;
profile[MTHCA_RES_SRQ].num = request->num_srq;
profile[MTHCA_RES_EQP].num = request->num_qp;
profile[MTHCA_RES_RDB].num = request->num_qp * request->rdb_per_qp;
profile[MTHCA_RES_CQ].num = request->num_cq;
profile[MTHCA_RES_EQ].num = MTHCA_NUM_EQS;
profile[MTHCA_RES_MCG].num = request->num_mcg;
profile[MTHCA_RES_MPT].num = request->num_mpt;
profile[MTHCA_RES_MTT].num = request->num_mtt;
profile[MTHCA_RES_UAR].num = request->num_uar;
profile[MTHCA_RES_UARC].num = request->num_uar;
profile[MTHCA_RES_UDAV].num = request->num_udav;
for (i = 0; i < MTHCA_RES_NUM; ++i) {
profile[i].type = i;
profile[i].log_num = max(ffs(profile[i].num) - 1, 0);
profile[i].size *= profile[i].num;
if (mthca_is_memfree(dev))
profile[i].size = max(profile[i].size, (u64) PAGE_SIZE);
}
if (mthca_is_memfree(dev)) {
mem_base = 0;
mem_avail = dev_lim->hca.arbel.max_icm_sz;
} else {
mem_base = dev->ddr_start;
mem_avail = dev->fw.tavor.fw_start - dev->ddr_start;
}
/*
* Sort the resources in decreasing order of size. Since they
* all have sizes that are powers of 2, we'll be able to keep
* resources aligned to their size and pack them without gaps
* using the sorted order.
*/
for (i = MTHCA_RES_NUM; i > 0; --i)
for (j = 1; j < i; ++j) {
if (profile[j].size > profile[j - 1].size)
swap(profile[j], profile[j - 1]);
}
for (i = 0; i < MTHCA_RES_NUM; ++i) {
if (profile[i].size) {
profile[i].start = mem_base + total_size;
total_size += profile[i].size;
}
if (total_size > mem_avail) {
mthca_err(dev, "Profile requires 0x%llx bytes; "
"won't fit in 0x%llx bytes of context memory.\n",
(unsigned long long) total_size,
(unsigned long long) mem_avail);
kfree(profile);
return -ENOMEM;
}
if (profile[i].size)
mthca_dbg(dev, "profile[%2d]--%2d/%2d @ 0x%16llx "
"(size 0x%8llx)\n",
i, profile[i].type, profile[i].log_num,
(unsigned long long) profile[i].start,
(unsigned long long) profile[i].size);
}
if (mthca_is_memfree(dev))
mthca_dbg(dev, "HCA context memory: reserving %d KB\n",
(int) (total_size >> 10));
else
mthca_dbg(dev, "HCA memory: allocated %d KB/%d KB (%d KB free)\n",
(int) (total_size >> 10), (int) (mem_avail >> 10),
(int) ((mem_avail - total_size) >> 10));
for (i = 0; i < MTHCA_RES_NUM; ++i) {
switch (profile[i].type) {
case MTHCA_RES_QP:
dev->limits.num_qps = profile[i].num;
init_hca->qpc_base = profile[i].start;
init_hca->log_num_qps = profile[i].log_num;
break;
case MTHCA_RES_EEC:
dev->limits.num_eecs = profile[i].num;
init_hca->eec_base = profile[i].start;
init_hca->log_num_eecs = profile[i].log_num;
break;
case MTHCA_RES_SRQ:
dev->limits.num_srqs = profile[i].num;
init_hca->srqc_base = profile[i].start;
init_hca->log_num_srqs = profile[i].log_num;
break;
case MTHCA_RES_CQ:
dev->limits.num_cqs = profile[i].num;
init_hca->cqc_base = profile[i].start;
init_hca->log_num_cqs = profile[i].log_num;
break;
case MTHCA_RES_EQP:
init_hca->eqpc_base = profile[i].start;
break;
case MTHCA_RES_EEEC:
init_hca->eeec_base = profile[i].start;
break;
case MTHCA_RES_EQ:
dev->limits.num_eqs = profile[i].num;
init_hca->eqc_base = profile[i].start;
init_hca->log_num_eqs = profile[i].log_num;
break;
case MTHCA_RES_RDB:
for (dev->qp_table.rdb_shift = 0;
request->num_qp << dev->qp_table.rdb_shift < profile[i].num;
++dev->qp_table.rdb_shift)
; /* nothing */
dev->qp_table.rdb_base = (u32) profile[i].start;
init_hca->rdb_base = profile[i].start;
break;
case MTHCA_RES_MCG:
dev->limits.num_mgms = profile[i].num >> 1;
dev->limits.num_amgms = profile[i].num >> 1;
init_hca->mc_base = profile[i].start;
init_hca->log_mc_entry_sz = ffs(MTHCA_MGM_ENTRY_SIZE) - 1;
init_hca->log_mc_table_sz = profile[i].log_num;
init_hca->mc_hash_sz = 1 << (profile[i].log_num - 1);
break;
case MTHCA_RES_MPT:
dev->limits.num_mpts = profile[i].num;
dev->mr_table.mpt_base = profile[i].start;
init_hca->mpt_base = profile[i].start;
init_hca->log_mpt_sz = profile[i].log_num;
break;
case MTHCA_RES_MTT:
dev->limits.num_mtt_segs = profile[i].num;
dev->mr_table.mtt_base = profile[i].start;
init_hca->mtt_base = profile[i].start;
init_hca->mtt_seg_sz = ffs(dev->limits.mtt_seg_size) - 7;
break;
case MTHCA_RES_UAR:
dev->limits.num_uars = profile[i].num;
init_hca->uar_scratch_base = profile[i].start;
break;
case MTHCA_RES_UDAV:
dev->av_table.ddr_av_base = profile[i].start;
dev->av_table.num_ddr_avs = profile[i].num;
break;
case MTHCA_RES_UARC:
dev->uar_table.uarc_size = request->uarc_size;
dev->uar_table.uarc_base = profile[i].start;
init_hca->uarc_base = profile[i].start;
init_hca->log_uarc_sz = ffs(request->uarc_size) - 13;
init_hca->log_uar_sz = ffs(request->num_uar) - 1;
break;
default:
break;
}
}
/*
* PDs don't take any HCA memory, but we assign them as part
* of the HCA profile anyway.
*/
dev->limits.num_pds = MTHCA_NUM_PDS;
if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT &&
init_hca->log_mpt_sz > 23) {
mthca_warn(dev, "MPT table too large (requested size 2^%d >= 2^24)\n",
init_hca->log_mpt_sz);
mthca_warn(dev, "Disabling memory key throughput optimization.\n");
dev->mthca_flags &= ~MTHCA_FLAG_SINAI_OPT;
}
/*
* For Tavor, FMRs use ioremapped PCI memory. For 32 bit
* systems it may use too much vmalloc space to map all MTT
* memory, so we reserve some MTTs for FMR access, taking them
* out of the MR pool. They don't use additional memory, but
* we assign them as part of the HCA profile anyway.
*/
if (mthca_is_memfree(dev) || BITS_PER_LONG == 64)
dev->limits.fmr_reserved_mtts = 0;
else
dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts;
kfree(profile);
return total_size;
}

View File

@ -0,0 +1,59 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MTHCA_PROFILE_H
#define MTHCA_PROFILE_H
#include "mthca_dev.h"
#include "mthca_cmd.h"
struct mthca_profile {
int num_qp;
int rdb_per_qp;
int num_srq;
int num_cq;
int num_mcg;
int num_mpt;
int num_mtt;
int num_udav;
int num_uar;
int uarc_size;
int fmr_reserved_mtts;
};
s64 mthca_make_profile(struct mthca_dev *mdev,
struct mthca_profile *request,
struct mthca_dev_lim *dev_lim,
struct mthca_init_hca_param *init_hca);
#endif /* MTHCA_PROFILE_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,346 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MTHCA_PROVIDER_H
#define MTHCA_PROVIDER_H
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
#include <linux/wait.h>
#define MTHCA_MPT_FLAG_ATOMIC (1 << 14)
#define MTHCA_MPT_FLAG_REMOTE_WRITE (1 << 13)
#define MTHCA_MPT_FLAG_REMOTE_READ (1 << 12)
#define MTHCA_MPT_FLAG_LOCAL_WRITE (1 << 11)
#define MTHCA_MPT_FLAG_LOCAL_READ (1 << 10)
struct mthca_buf_list {
void *buf;
DEFINE_DMA_UNMAP_ADDR(mapping);
};
union mthca_buf {
struct mthca_buf_list direct;
struct mthca_buf_list *page_list;
};
struct mthca_uar {
unsigned long pfn;
int index;
};
struct mthca_user_db_table;
struct mthca_ucontext {
struct ib_ucontext ibucontext;
struct mthca_uar uar;
struct mthca_user_db_table *db_tab;
int reg_mr_warned;
};
struct mthca_mtt;
struct mthca_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
struct mthca_mtt *mtt;
};
struct mthca_fmr {
struct ib_fmr ibmr;
struct ib_fmr_attr attr;
struct mthca_mtt *mtt;
int maps;
union {
struct {
struct mthca_mpt_entry __iomem *mpt;
u64 __iomem *mtts;
} tavor;
struct {
struct mthca_mpt_entry *mpt;
__be64 *mtts;
dma_addr_t dma_handle;
} arbel;
} mem;
};
struct mthca_pd {
struct ib_pd ibpd;
u32 pd_num;
atomic_t sqp_count;
struct mthca_mr ntmr;
int privileged;
};
struct mthca_eq {
struct mthca_dev *dev;
int eqn;
u32 eqn_mask;
u32 cons_index;
u16 msi_x_vector;
u16 msi_x_entry;
int have_irq;
int nent;
struct mthca_buf_list *page_list;
struct mthca_mr mr;
char irq_name[IB_DEVICE_NAME_MAX];
};
struct mthca_av;
enum mthca_ah_type {
MTHCA_AH_ON_HCA,
MTHCA_AH_PCI_POOL,
MTHCA_AH_KMALLOC
};
struct mthca_ah {
struct ib_ah ibah;
enum mthca_ah_type type;
u32 key;
struct mthca_av *av;
dma_addr_t avdma;
};
/*
* Quick description of our CQ/QP locking scheme:
*
* We have one global lock that protects dev->cq/qp_table. Each
* struct mthca_cq/qp also has its own lock. An individual qp lock
* may be taken inside of an individual cq lock. Both cqs attached to
* a qp may be locked, with the cq with the lower cqn locked first.
* No other nesting should be done.
*
* Each struct mthca_cq/qp also has an ref count, protected by the
* corresponding table lock. The pointer from the cq/qp_table to the
* struct counts as one reference. This reference also is good for
* access through the consumer API, so modifying the CQ/QP etc doesn't
* need to take another reference. Access to a QP because of a
* completion being polled does not need a reference either.
*
* Finally, each struct mthca_cq/qp has a wait_queue_head_t for the
* destroy function to sleep on.
*
* This means that access from the consumer API requires nothing but
* taking the struct's lock.
*
* Access because of a completion event should go as follows:
* - lock cq/qp_table and look up struct
* - increment ref count in struct
* - drop cq/qp_table lock
* - lock struct, do your thing, and unlock struct
* - decrement ref count; if zero, wake up waiters
*
* To destroy a CQ/QP, we can do the following:
* - lock cq/qp_table
* - remove pointer and decrement ref count
* - unlock cq/qp_table lock
* - wait_event until ref count is zero
*
* It is the consumer's responsibilty to make sure that no QP
* operations (WQE posting or state modification) are pending when a
* QP is destroyed. Also, the consumer must make sure that calls to
* qp_modify are serialized. Similarly, the consumer is responsible
* for ensuring that no CQ resize operations are pending when a CQ
* is destroyed.
*
* Possible optimizations (wait for profile data to see if/where we
* have locks bouncing between CPUs):
* - split cq/qp table lock into n separate (cache-aligned) locks,
* indexed (say) by the page in the table
* - split QP struct lock into three (one for common info, one for the
* send queue and one for the receive queue)
*/
struct mthca_cq_buf {
union mthca_buf queue;
struct mthca_mr mr;
int is_direct;
};
struct mthca_cq_resize {
struct mthca_cq_buf buf;
int cqe;
enum {
CQ_RESIZE_ALLOC,
CQ_RESIZE_READY,
CQ_RESIZE_SWAPPED
} state;
};
struct mthca_cq {
struct ib_cq ibcq;
spinlock_t lock;
int refcount;
int cqn;
u32 cons_index;
struct mthca_cq_buf buf;
struct mthca_cq_resize *resize_buf;
int is_kernel;
/* Next fields are Arbel only */
int set_ci_db_index;
__be32 *set_ci_db;
int arm_db_index;
__be32 *arm_db;
int arm_sn;
wait_queue_head_t wait;
struct mutex mutex;
};
struct mthca_srq {
struct ib_srq ibsrq;
spinlock_t lock;
int refcount;
int srqn;
int max;
int max_gs;
int wqe_shift;
int first_free;
int last_free;
u16 counter; /* Arbel only */
int db_index; /* Arbel only */
__be32 *db; /* Arbel only */
void *last;
int is_direct;
u64 *wrid;
union mthca_buf queue;
struct mthca_mr mr;
wait_queue_head_t wait;
struct mutex mutex;
};
struct mthca_wq {
spinlock_t lock;
int max;
unsigned next_ind;
unsigned last_comp;
unsigned head;
unsigned tail;
void *last;
int max_gs;
int wqe_shift;
int db_index; /* Arbel only */
__be32 *db;
};
struct mthca_qp {
struct ib_qp ibqp;
int refcount;
u32 qpn;
int is_direct;
u8 port; /* for SQP and memfree use only */
u8 alt_port; /* for memfree use only */
u8 transport;
u8 state;
u8 atomic_rd_en;
u8 resp_depth;
struct mthca_mr mr;
struct mthca_wq rq;
struct mthca_wq sq;
enum ib_sig_type sq_policy;
int send_wqe_offset;
int max_inline_data;
u64 *wrid;
union mthca_buf queue;
wait_queue_head_t wait;
struct mutex mutex;
};
struct mthca_sqp {
struct mthca_qp qp;
int pkey_index;
u32 qkey;
u32 send_psn;
struct ib_ud_header ud_header;
int header_buf_size;
void *header_buf;
dma_addr_t header_dma;
};
static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
{
return container_of(ibucontext, struct mthca_ucontext, ibucontext);
}
static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr)
{
return container_of(ibmr, struct mthca_fmr, ibmr);
}
static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr)
{
return container_of(ibmr, struct mthca_mr, ibmr);
}
static inline struct mthca_pd *to_mpd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct mthca_pd, ibpd);
}
static inline struct mthca_ah *to_mah(struct ib_ah *ibah)
{
return container_of(ibah, struct mthca_ah, ibah);
}
static inline struct mthca_cq *to_mcq(struct ib_cq *ibcq)
{
return container_of(ibcq, struct mthca_cq, ibcq);
}
static inline struct mthca_srq *to_msrq(struct ib_srq *ibsrq)
{
return container_of(ibsrq, struct mthca_srq, ibsrq);
}
static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct mthca_qp, ibqp);
}
static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp)
{
return container_of(qp, struct mthca_sqp, qp);
}
#endif /* MTHCA_PROVIDER_H */

2311
sys/dev/mthca/mthca_qp.c Normal file

File diff suppressed because it is too large Load Diff

303
sys/dev/mthca/mthca_reset.c Normal file
View File

@ -0,0 +1,303 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/slab.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
int mthca_reset(struct mthca_dev *mdev)
{
int i;
int err = 0;
u32 *hca_header = NULL;
#ifdef __linux__
u32 *bridge_header = NULL;
#endif
struct pci_dev *bridge = NULL;
#ifdef __linux__
int bridge_pcix_cap = 0;
#endif
int hca_pcie_cap = 0;
int hca_pcix_cap = 0;
u16 devctl;
u16 linkctl;
#define MTHCA_RESET_OFFSET 0xf0010
#define MTHCA_RESET_VALUE swab32(1)
/*
* Reset the chip. This is somewhat ugly because we have to
* save off the PCI header before reset and then restore it
* after the chip reboots. We skip config space offsets 22
* and 23 since those have a special meaning.
*
* To make matters worse, for Tavor (PCI-X HCA) we have to
* find the associated bridge device and save off its PCI
* header as well.
*/
if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE)) {
/* Look for the bridge -- its device ID will be 2 more
than HCA's device ID. */
#ifdef __linux__
while ((bridge = pci_get_device(mdev->pdev->vendor,
mdev->pdev->device + 2,
bridge)) != NULL) {
if (bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
bridge->subordinate == mdev->pdev->bus) {
mthca_dbg(mdev, "Found bridge: %s\n",
pci_name(bridge));
break;
}
}
if (!bridge) {
/*
* Didn't find a bridge for a Tavor device --
* assume we're in no-bridge mode and hope for
* the best.
*/
mthca_warn(mdev, "No bridge found for %s\n",
pci_name(mdev->pdev));
}
#else
mthca_warn(mdev, "Reset on PCI-X is not supported.\n");
goto out;
#endif
}
/* For Arbel do we need to save off the full 4K PCI Express header?? */
hca_header = kmalloc(256, GFP_KERNEL);
if (!hca_header) {
err = -ENOMEM;
mthca_err(mdev, "Couldn't allocate memory to save HCA "
"PCI header, aborting.\n");
goto out;
}
for (i = 0; i < 64; ++i) {
if (i == 22 || i == 23)
continue;
if (pci_read_config_dword(mdev->pdev, i * 4, hca_header + i)) {
err = -ENODEV;
mthca_err(mdev, "Couldn't save HCA "
"PCI header, aborting.\n");
goto out;
}
}
hca_pcix_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
hca_pcie_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP);
#ifdef __linux__
if (bridge) {
bridge_header = kmalloc(256, GFP_KERNEL);
if (!bridge_header) {
err = -ENOMEM;
mthca_err(mdev, "Couldn't allocate memory to save HCA "
"bridge PCI header, aborting.\n");
goto out;
}
for (i = 0; i < 64; ++i) {
if (i == 22 || i == 23)
continue;
if (pci_read_config_dword(bridge, i * 4, bridge_header + i)) {
err = -ENODEV;
mthca_err(mdev, "Couldn't save HCA bridge "
"PCI header, aborting.\n");
goto out;
}
}
bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
if (!bridge_pcix_cap) {
err = -ENODEV;
mthca_err(mdev, "Couldn't locate HCA bridge "
"PCI-X capability, aborting.\n");
goto out;
}
}
#endif
/* actually hit reset */
{
void __iomem *reset = ioremap(pci_resource_start(mdev->pdev, 0) +
MTHCA_RESET_OFFSET, 4);
if (!reset) {
err = -ENOMEM;
mthca_err(mdev, "Couldn't map HCA reset register, "
"aborting.\n");
goto out;
}
writel(MTHCA_RESET_VALUE, reset);
iounmap(reset);
}
/* Docs say to wait one second before accessing device */
msleep(1000);
/* Now wait for PCI device to start responding again */
{
u32 v;
int c = 0;
for (c = 0; c < 100; ++c) {
if (pci_read_config_dword(bridge ? bridge : mdev->pdev, 0, &v)) {
err = -ENODEV;
mthca_err(mdev, "Couldn't access HCA after reset, "
"aborting.\n");
goto out;
}
if (v != 0xffffffff)
goto good;
msleep(100);
}
err = -ENODEV;
mthca_err(mdev, "PCI device did not come back after reset, "
"aborting.\n");
goto out;
}
good:
#ifdef __linux__
/* Now restore the PCI headers */
if (bridge) {
if (pci_write_config_dword(bridge, bridge_pcix_cap + 0x8,
bridge_header[(bridge_pcix_cap + 0x8) / 4])) {
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge Upstream "
"split transaction control, aborting.\n");
goto out;
}
if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc,
bridge_header[(bridge_pcix_cap + 0xc) / 4])) {
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge Downstream "
"split transaction control, aborting.\n");
goto out;
}
/*
* Bridge control register is at 0x3e, so we'll
* naturally restore it last in this loop.
*/
for (i = 0; i < 16; ++i) {
if (i * 4 == PCI_COMMAND)
continue;
if (pci_write_config_dword(bridge, i * 4, bridge_header[i])) {
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge reg %x, "
"aborting.\n", i);
goto out;
}
}
if (pci_write_config_dword(bridge, PCI_COMMAND,
bridge_header[PCI_COMMAND / 4])) {
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, "
"aborting.\n");
goto out;
}
}
#endif
if (hca_pcix_cap) {
if (pci_write_config_dword(mdev->pdev, hca_pcix_cap,
hca_header[hca_pcix_cap / 4])) {
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA PCI-X "
"command register, aborting.\n");
goto out;
}
}
if (hca_pcie_cap) {
devctl = hca_header[(hca_pcie_cap + PCI_EXP_DEVCTL) / 4];
if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_DEVCTL,
devctl)) {
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA PCI Express "
"Device Control register, aborting.\n");
goto out;
}
linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4];
if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_LNKCTL,
linkctl)) {
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA PCI Express "
"Link control register, aborting.\n");
goto out;
}
}
for (i = 0; i < 16; ++i) {
if (i * 4 == PCI_COMMAND)
continue;
if (pci_write_config_dword(mdev->pdev, i * 4, hca_header[i])) {
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA reg %x, "
"aborting.\n", i);
goto out;
}
}
if (pci_write_config_dword(mdev->pdev, PCI_COMMAND,
hca_header[PCI_COMMAND / 4])) {
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA COMMAND, "
"aborting.\n");
goto out;
}
out:
#ifdef __linux__
if (bridge)
pci_dev_put(bridge);
kfree(bridge_header);
#endif
kfree(hca_header);
return err;
}

696
sys/dev/mthca/mthca_srq.c Normal file
View File

@ -0,0 +1,696 @@
/*
* Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/sched.h>
#include <asm/io.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
#include "mthca_memfree.h"
#include "mthca_wqe.h"
enum {
MTHCA_MAX_DIRECT_SRQ_SIZE = 4 * PAGE_SIZE
};
struct mthca_tavor_srq_context {
__be64 wqe_base_ds; /* low 6 bits is descriptor size */
__be32 state_pd;
__be32 lkey;
__be32 uar;
__be16 limit_watermark;
__be16 wqe_cnt;
u32 reserved[2];
};
struct mthca_arbel_srq_context {
__be32 state_logsize_srqn;
__be32 lkey;
__be32 db_index;
__be32 logstride_usrpage;
__be64 wqe_base;
__be32 eq_pd;
__be16 limit_watermark;
__be16 wqe_cnt;
u16 reserved1;
__be16 wqe_counter;
u32 reserved2[3];
};
static void *get_wqe(struct mthca_srq *srq, int n)
{
if (srq->is_direct)
return srq->queue.direct.buf + (n << srq->wqe_shift);
else
return srq->queue.page_list[(n << srq->wqe_shift) >> PAGE_SHIFT].buf +
((n << srq->wqe_shift) & (PAGE_SIZE - 1));
}
/*
* Return a pointer to the location within a WQE that we're using as a
* link when the WQE is in the free list. We use the imm field
* because in the Tavor case, posting a WQE may overwrite the next
* segment of the previous WQE, but a receive WQE will never touch the
* imm field. This avoids corrupting our free list if the previous
* WQE has already completed and been put on the free list when we
* post the next WQE.
*/
static inline int *wqe_to_link(void *wqe)
{
return (int *) (wqe + offsetof(struct mthca_next_seg, imm));
}
static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_srq *srq,
struct mthca_tavor_srq_context *context)
{
memset(context, 0, sizeof *context);
context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4));
context->state_pd = cpu_to_be32(pd->pd_num);
context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
if (pd->ibpd.uobject)
context->uar =
cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
else
context->uar = cpu_to_be32(dev->driver_uar.index);
}
static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_srq *srq,
struct mthca_arbel_srq_context *context)
{
int logsize, max;
memset(context, 0, sizeof *context);
/*
* Put max in a temporary variable to work around gcc bug
* triggered by ilog2() on sparc64.
*/
max = srq->max;
logsize = ilog2(max);
context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn);
context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
context->db_index = cpu_to_be32(srq->db_index);
context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29);
if (pd->ibpd.uobject)
context->logstride_usrpage |=
cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
else
context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index);
context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num);
}
static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq)
{
mthca_buf_free(dev, srq->max << srq->wqe_shift, &srq->queue,
srq->is_direct, &srq->mr);
kfree(srq->wrid);
}
static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd,
struct mthca_srq *srq)
{
struct mthca_data_seg *scatter;
void *wqe;
int err;
int i;
if (pd->ibpd.uobject)
return 0;
srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL);
if (!srq->wrid)
return -ENOMEM;
err = mthca_buf_alloc(dev, srq->max << srq->wqe_shift,
MTHCA_MAX_DIRECT_SRQ_SIZE,
&srq->queue, &srq->is_direct, pd, 1, &srq->mr);
if (err) {
kfree(srq->wrid);
return err;
}
/*
* Now initialize the SRQ buffer so that all of the WQEs are
* linked into the list of free WQEs. In addition, set the
* scatter list L_Keys to the sentry value of 0x100.
*/
for (i = 0; i < srq->max; ++i) {
struct mthca_next_seg *next;
next = wqe = get_wqe(srq, i);
if (i < srq->max - 1) {
*wqe_to_link(wqe) = i + 1;
next->nda_op = htonl(((i + 1) << srq->wqe_shift) | 1);
} else {
*wqe_to_link(wqe) = -1;
next->nda_op = 0;
}
for (scatter = wqe + sizeof (struct mthca_next_seg);
(void *) scatter < wqe + (1 << srq->wqe_shift);
++scatter)
scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
}
srq->last = get_wqe(srq, srq->max - 1);
return 0;
}
int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
struct ib_srq_attr *attr, struct mthca_srq *srq)
{
struct mthca_mailbox *mailbox;
int ds;
int err;
/* Sanity check SRQ size before proceeding */
if (attr->max_wr > dev->limits.max_srq_wqes ||
attr->max_sge > dev->limits.max_srq_sge)
return -EINVAL;
srq->max = attr->max_wr;
srq->max_gs = attr->max_sge;
srq->counter = 0;
if (mthca_is_memfree(dev))
srq->max = roundup_pow_of_two(srq->max + 1);
else
srq->max = srq->max + 1;
ds = max(64UL,
roundup_pow_of_two(sizeof (struct mthca_next_seg) +
srq->max_gs * sizeof (struct mthca_data_seg)));
if (!mthca_is_memfree(dev) && (ds > dev->limits.max_desc_sz))
return -EINVAL;
srq->wqe_shift = ilog2(ds);
srq->srqn = mthca_alloc(&dev->srq_table.alloc);
if (srq->srqn == -1)
return -ENOMEM;
if (mthca_is_memfree(dev)) {
err = mthca_table_get(dev, dev->srq_table.table, srq->srqn);
if (err)
goto err_out;
if (!pd->ibpd.uobject) {
srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ,
srq->srqn, &srq->db);
if (srq->db_index < 0) {
err = -ENOMEM;
goto err_out_icm;
}
}
}
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox)) {
err = PTR_ERR(mailbox);
goto err_out_db;
}
err = mthca_alloc_srq_buf(dev, pd, srq);
if (err)
goto err_out_mailbox;
spin_lock_init(&srq->lock);
srq->refcount = 1;
init_waitqueue_head(&srq->wait);
mutex_init(&srq->mutex);
if (mthca_is_memfree(dev))
mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf);
else
mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf);
err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn);
if (err) {
mthca_warn(dev, "SW2HW_SRQ failed (%d)\n", err);
goto err_out_free_buf;
}
spin_lock_irq(&dev->srq_table.lock);
if (mthca_array_set(&dev->srq_table.srq,
srq->srqn & (dev->limits.num_srqs - 1),
srq)) {
spin_unlock_irq(&dev->srq_table.lock);
goto err_out_free_srq;
}
spin_unlock_irq(&dev->srq_table.lock);
mthca_free_mailbox(dev, mailbox);
srq->first_free = 0;
srq->last_free = srq->max - 1;
attr->max_wr = srq->max - 1;
attr->max_sge = srq->max_gs;
return 0;
err_out_free_srq:
err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn);
if (err)
mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
err_out_free_buf:
if (!pd->ibpd.uobject)
mthca_free_srq_buf(dev, srq);
err_out_mailbox:
mthca_free_mailbox(dev, mailbox);
err_out_db:
if (!pd->ibpd.uobject && mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
err_out_icm:
mthca_table_put(dev, dev->srq_table.table, srq->srqn);
err_out:
mthca_free(&dev->srq_table.alloc, srq->srqn);
return err;
}
static inline int get_srq_refcount(struct mthca_dev *dev, struct mthca_srq *srq)
{
int c;
spin_lock_irq(&dev->srq_table.lock);
c = srq->refcount;
spin_unlock_irq(&dev->srq_table.lock);
return c;
}
void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
{
struct mthca_mailbox *mailbox;
int err;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox)) {
mthca_warn(dev, "No memory for mailbox to free SRQ.\n");
return;
}
err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn);
if (err)
mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
spin_lock_irq(&dev->srq_table.lock);
mthca_array_clear(&dev->srq_table.srq,
srq->srqn & (dev->limits.num_srqs - 1));
--srq->refcount;
spin_unlock_irq(&dev->srq_table.lock);
wait_event(srq->wait, !get_srq_refcount(dev, srq));
if (!srq->ibsrq.uobject) {
mthca_free_srq_buf(dev, srq);
if (mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
}
mthca_table_put(dev, dev->srq_table.table, srq->srqn);
mthca_free(&dev->srq_table.alloc, srq->srqn);
mthca_free_mailbox(dev, mailbox);
}
int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(ibsrq->device);
struct mthca_srq *srq = to_msrq(ibsrq);
int ret = 0;
/* We don't support resizing SRQs (yet?) */
if (attr_mask & IB_SRQ_MAX_WR)
return -EINVAL;
if (attr_mask & IB_SRQ_LIMIT) {
u32 max_wr = mthca_is_memfree(dev) ? srq->max - 1 : srq->max;
if (attr->srq_limit > max_wr)
return -EINVAL;
mutex_lock(&srq->mutex);
ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit);
mutex_unlock(&srq->mutex);
}
return ret;
}
int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
{
struct mthca_dev *dev = to_mdev(ibsrq->device);
struct mthca_srq *srq = to_msrq(ibsrq);
struct mthca_mailbox *mailbox;
struct mthca_arbel_srq_context *arbel_ctx;
struct mthca_tavor_srq_context *tavor_ctx;
int err;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox);
if (err)
goto out;
if (mthca_is_memfree(dev)) {
arbel_ctx = mailbox->buf;
srq_attr->srq_limit = be16_to_cpu(arbel_ctx->limit_watermark);
} else {
tavor_ctx = mailbox->buf;
srq_attr->srq_limit = be16_to_cpu(tavor_ctx->limit_watermark);
}
srq_attr->max_wr = srq->max - 1;
srq_attr->max_sge = srq->max_gs;
out:
mthca_free_mailbox(dev, mailbox);
return err;
}
void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
enum ib_event_type event_type)
{
struct mthca_srq *srq;
struct ib_event event;
spin_lock(&dev->srq_table.lock);
srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1));
if (srq)
++srq->refcount;
spin_unlock(&dev->srq_table.lock);
if (!srq) {
mthca_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
return;
}
if (!srq->ibsrq.event_handler)
goto out;
event.device = &dev->ib_dev;
event.event = event_type;
event.element.srq = &srq->ibsrq;
srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context);
out:
spin_lock(&dev->srq_table.lock);
if (!--srq->refcount)
wake_up(&srq->wait);
spin_unlock(&dev->srq_table.lock);
}
/*
* This function must be called with IRQs disabled.
*/
void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr)
{
int ind;
struct mthca_next_seg *last_free;
ind = wqe_addr >> srq->wqe_shift;
spin_lock(&srq->lock);
last_free = get_wqe(srq, srq->last_free);
*wqe_to_link(last_free) = ind;
last_free->nda_op = htonl((ind << srq->wqe_shift) | 1);
*wqe_to_link(get_wqe(srq, ind)) = -1;
srq->last_free = ind;
spin_unlock(&srq->lock);
}
int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
struct mthca_dev *dev = to_mdev(ibsrq->device);
struct mthca_srq *srq = to_msrq(ibsrq);
unsigned long flags;
int err = 0;
int first_ind;
int ind;
int next_ind;
int nreq;
int i;
void *wqe;
void *prev_wqe;
spin_lock_irqsave(&srq->lock, flags);
first_ind = srq->first_free;
for (nreq = 0; wr; wr = wr->next) {
ind = srq->first_free;
wqe = get_wqe(srq, ind);
next_ind = *wqe_to_link(wqe);
if (unlikely(next_ind < 0)) {
mthca_err(dev, "SRQ %06x full\n", srq->srqn);
err = -ENOMEM;
*bad_wr = wr;
break;
}
prev_wqe = srq->last;
srq->last = wqe;
((struct mthca_next_seg *) wqe)->ee_nds = 0;
/* flags field will always remain 0 */
wqe += sizeof (struct mthca_next_seg);
if (unlikely(wr->num_sge > srq->max_gs)) {
err = -EINVAL;
*bad_wr = wr;
srq->last = prev_wqe;
break;
}
for (i = 0; i < wr->num_sge; ++i) {
mthca_set_data_seg(wqe, wr->sg_list + i);
wqe += sizeof (struct mthca_data_seg);
}
if (i < srq->max_gs)
mthca_set_data_seg_inval(wqe);
((struct mthca_next_seg *) prev_wqe)->ee_nds =
cpu_to_be32(MTHCA_NEXT_DBD);
srq->wrid[ind] = wr->wr_id;
srq->first_free = next_ind;
++nreq;
if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
nreq = 0;
/*
* Make sure that descriptors are written
* before doorbell is rung.
*/
wmb();
mthca_write64(first_ind << srq->wqe_shift, srq->srqn << 8,
dev->kar + MTHCA_RECEIVE_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
first_ind = srq->first_free;
}
}
if (likely(nreq)) {
/*
* Make sure that descriptors are written before
* doorbell is rung.
*/
wmb();
mthca_write64(first_ind << srq->wqe_shift, (srq->srqn << 8) | nreq,
dev->kar + MTHCA_RECEIVE_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
/*
* Make sure doorbells don't leak out of SRQ spinlock and
* reach the HCA out of order:
*/
mmiowb();
spin_unlock_irqrestore(&srq->lock, flags);
return err;
}
int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
struct mthca_dev *dev = to_mdev(ibsrq->device);
struct mthca_srq *srq = to_msrq(ibsrq);
unsigned long flags;
int err = 0;
int ind;
int next_ind;
int nreq;
int i;
void *wqe;
spin_lock_irqsave(&srq->lock, flags);
for (nreq = 0; wr; ++nreq, wr = wr->next) {
ind = srq->first_free;
wqe = get_wqe(srq, ind);
next_ind = *wqe_to_link(wqe);
if (unlikely(next_ind < 0)) {
mthca_err(dev, "SRQ %06x full\n", srq->srqn);
err = -ENOMEM;
*bad_wr = wr;
break;
}
((struct mthca_next_seg *) wqe)->ee_nds = 0;
/* flags field will always remain 0 */
wqe += sizeof (struct mthca_next_seg);
if (unlikely(wr->num_sge > srq->max_gs)) {
err = -EINVAL;
*bad_wr = wr;
break;
}
for (i = 0; i < wr->num_sge; ++i) {
mthca_set_data_seg(wqe, wr->sg_list + i);
wqe += sizeof (struct mthca_data_seg);
}
if (i < srq->max_gs)
mthca_set_data_seg_inval(wqe);
srq->wrid[ind] = wr->wr_id;
srq->first_free = next_ind;
}
if (likely(nreq)) {
srq->counter += nreq;
/*
* Make sure that descriptors are written before
* we write doorbell record.
*/
wmb();
*srq->db = cpu_to_be32(srq->counter);
}
spin_unlock_irqrestore(&srq->lock, flags);
return err;
}
int mthca_max_srq_sge(struct mthca_dev *dev)
{
if (mthca_is_memfree(dev))
return dev->limits.max_sg;
/*
* SRQ allocations are based on powers of 2 for Tavor,
* (although they only need to be multiples of 16 bytes).
*
* Therefore, we need to base the max number of sg entries on
* the largest power of 2 descriptor size that is <= to the
* actual max WQE descriptor size, rather than return the
* max_sg value given by the firmware (which is based on WQE
* sizes as multiples of 16, not powers of 2).
*
* If SRQ implementation is changed for Tavor to be based on
* multiples of 16, the calculation below can be deleted and
* the FW max_sg value returned.
*/
return min_t(int, dev->limits.max_sg,
((1 << (fls(dev->limits.max_desc_sz) - 1)) -
sizeof (struct mthca_next_seg)) /
sizeof (struct mthca_data_seg));
}
int mthca_init_srq_table(struct mthca_dev *dev)
{
int err;
if (!(dev->mthca_flags & MTHCA_FLAG_SRQ))
return 0;
spin_lock_init(&dev->srq_table.lock);
err = mthca_alloc_init(&dev->srq_table.alloc,
dev->limits.num_srqs,
dev->limits.num_srqs - 1,
dev->limits.reserved_srqs);
if (err)
return err;
err = mthca_array_init(&dev->srq_table.srq,
dev->limits.num_srqs);
if (err)
mthca_alloc_cleanup(&dev->srq_table.alloc);
return err;
}
void mthca_cleanup_srq_table(struct mthca_dev *dev)
{
if (!(dev->mthca_flags & MTHCA_FLAG_SRQ))
return;
mthca_array_cleanup(&dev->srq_table.srq, dev->limits.num_srqs);
mthca_alloc_cleanup(&dev->srq_table.alloc);
}

76
sys/dev/mthca/mthca_uar.c Normal file
View File

@ -0,0 +1,76 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "mthca_dev.h"
#include "mthca_memfree.h"
int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar)
{
uar->index = mthca_alloc(&dev->uar_table.alloc);
if (uar->index == -1)
return -ENOMEM;
uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
return 0;
}
void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar)
{
mthca_free(&dev->uar_table.alloc, uar->index);
}
int mthca_init_uar_table(struct mthca_dev *dev)
{
int ret;
ret = mthca_alloc_init(&dev->uar_table.alloc,
dev->limits.num_uars,
dev->limits.num_uars - 1,
dev->limits.reserved_uars + 1);
if (ret)
return ret;
ret = mthca_init_db_tab(dev);
if (ret)
mthca_alloc_cleanup(&dev->uar_table.alloc);
return ret;
}
void mthca_cleanup_uar_table(struct mthca_dev *dev)
{
mthca_cleanup_db_tab(dev);
/* XXX check if any UARs are still allocated? */
mthca_alloc_cleanup(&dev->uar_table.alloc);
}

112
sys/dev/mthca/mthca_user.h Normal file
View File

@ -0,0 +1,112 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MTHCA_USER_H
#define MTHCA_USER_H
#include <linux/types.h>
/*
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
#define MTHCA_UVERBS_ABI_VERSION 1
/*
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
* avoid incompatibility between 32-bit userspace and 64-bit kernels).
* In particular do not use pointer types -- pass pointers in __u64
* instead.
*/
struct mthca_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 uarc_size;
};
struct mthca_alloc_pd_resp {
__u32 pdn;
__u32 reserved;
};
struct mthca_reg_mr {
/*
* Mark the memory region with a DMA attribute that causes
* in-flight DMA to be flushed when the region is written to:
*/
#define MTHCA_MR_DMASYNC 0x1
__u32 mr_attrs;
__u32 reserved;
};
struct mthca_create_cq {
__u32 lkey;
__u32 pdn;
__u64 arm_db_page;
__u64 set_db_page;
__u32 arm_db_index;
__u32 set_db_index;
};
struct mthca_create_cq_resp {
__u32 cqn;
__u32 reserved;
};
struct mthca_resize_cq {
__u32 lkey;
__u32 reserved;
};
struct mthca_create_srq {
__u32 lkey;
__u32 db_index;
__u64 db_page;
};
struct mthca_create_srq_resp {
__u32 srqn;
__u32 reserved;
};
struct mthca_create_qp {
__u32 lkey;
__u32 reserved;
__u64 sq_db_page;
__u64 rq_db_page;
__u32 sq_db_index;
__u32 rq_db_index;
};
#endif /* MTHCA_USER_H */

131
sys/dev/mthca/mthca_wqe.h Normal file
View File

@ -0,0 +1,131 @@
/*
* Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MTHCA_WQE_H
#define MTHCA_WQE_H
#include <linux/types.h>
enum {
MTHCA_NEXT_DBD = 1 << 7,
MTHCA_NEXT_FENCE = 1 << 6,
MTHCA_NEXT_CQ_UPDATE = 1 << 3,
MTHCA_NEXT_EVENT_GEN = 1 << 2,
MTHCA_NEXT_SOLICIT = 1 << 1,
MTHCA_NEXT_IP_CSUM = 1 << 4,
MTHCA_NEXT_TCP_UDP_CSUM = 1 << 5,
MTHCA_MLX_VL15 = 1 << 17,
MTHCA_MLX_SLR = 1 << 16
};
enum {
MTHCA_INVAL_LKEY = 0x100,
MTHCA_TAVOR_MAX_WQES_PER_RECV_DB = 256,
MTHCA_ARBEL_MAX_WQES_PER_SEND_DB = 255
};
struct mthca_next_seg {
__be32 nda_op; /* [31:6] next WQE [4:0] next opcode */
__be32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */
__be32 flags; /* [3] CQ [2] Event [1] Solicit */
__be32 imm; /* immediate data */
};
struct mthca_tavor_ud_seg {
u32 reserved1;
__be32 lkey;
__be64 av_addr;
u32 reserved2[4];
__be32 dqpn;
__be32 qkey;
u32 reserved3[2];
};
struct mthca_arbel_ud_seg {
__be32 av[8];
__be32 dqpn;
__be32 qkey;
u32 reserved[2];
};
struct mthca_bind_seg {
__be32 flags; /* [31] Atomic [30] rem write [29] rem read */
u32 reserved;
__be32 new_rkey;
__be32 lkey;
__be64 addr;
__be64 length;
};
struct mthca_raddr_seg {
__be64 raddr;
__be32 rkey;
u32 reserved;
};
struct mthca_atomic_seg {
__be64 swap_add;
__be64 compare;
};
struct mthca_data_seg {
__be32 byte_count;
__be32 lkey;
__be64 addr;
};
struct mthca_mlx_seg {
__be32 nda_op;
__be32 nds;
__be32 flags; /* [17] VL15 [16] SLR [14:12] static rate
[11:8] SL [3] C [2] E */
__be16 rlid;
__be16 vcrc;
};
static __always_inline void mthca_set_data_seg(struct mthca_data_seg *dseg,
struct ib_sge *sg)
{
dseg->byte_count = cpu_to_be32(sg->length);
dseg->lkey = cpu_to_be32(sg->lkey);
dseg->addr = cpu_to_be64(sg->addr);
}
static __always_inline void mthca_set_data_seg_inval(struct mthca_data_seg *dseg)
{
dseg->byte_count = 0;
dseg->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
dseg->addr = 0;
}
#endif /* MTHCA_WQE_H */

View File

@ -534,6 +534,7 @@ hint.mse.0.irq="5"
# iwn: Intel Wireless WiFi Link 1000/105/135/2000/4965/5000/6000/6050 abgn
# 802.11 network adapters
# Requires the iwn firmware module
# mthca: Mellanox HCA InfiniBand
# mlx4ib: Mellanox ConnectX HCA InfiniBand
# mlx4en: Mellanox ConnectX HCA Ethernet
# nfe: nVidia nForce MCP on-board Ethernet Networking (BSD open source)
@ -572,6 +573,7 @@ hint.le.0.at="isa"
hint.le.0.port="0x280"
hint.le.0.irq="10"
hint.le.0.drq="0"
device mthca # Mellanox HCA InfiniBand
device mlx4 # Shared code module between IB and Ethernet
device mlx4ib # Mellanox ConnectX HCA InfiniBand
device mlx4en # Mellanox ConnectX HCA Ethernet

View File

@ -265,6 +265,7 @@ SUBDIR= \
msdosfs_iconv \
${_mse} \
msk \
${_mthca} \
mvs \
mwl \
${_mwlfw} \
@ -675,6 +676,7 @@ _mlx4en= mlx4en
_mlx5en= mlx5en
.endif
.if ${MK_OFED} != "no" || defined(ALL_MODULES)
_mthca= mthca
_mlx4ib= mlx4ib
_mlx5ib= mlx5ib
.endif

View File

@ -0,0 +1,35 @@
# $FreeBSD$
.PATH: ${SRCTOP}/sys/dev/mthca
KMOD= mthca
SRCS= \
mthca_allocator.c \
mthca_av.c \
mthca_catas.c \
mthca_cmd.c \
mthca_cq.c \
mthca_eq.c \
mthca_mad.c \
mthca_main.c \
mthca_mcg.c \
mthca_memfree.c \
mthca_mr.c \
mthca_pd.c \
mthca_profile.c \
mthca_provider.c \
mthca_qp.c \
mthca_reset.c \
mthca_srq.c \
mthca_uar.c \
device_if.h bus_if.h vnode_if.h pci_if.h \
opt_inet.h opt_inet6.h
CFLAGS+= -I${SRCTOP}/sys/ofed/include
CFLAGS+= -I${SRCTOP}/sys/ofed/include/uapi
CFLAGS+= -I${SRCTOP}/sys/compat/linuxkpi/common/include
CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM
CFLAGS+= -DINET6 -DINET
.include <bsd.kmod.mk>
CFLAGS+= -Wno-cast-qual -Wno-pointer-arith

View File

@ -0,0 +1,111 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MTHCA_ABI_USER_H
#define MTHCA_ABI_USER_H
#include <linux/types.h>
/*
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
#define MTHCA_UVERBS_ABI_VERSION 1
/*
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
* avoid incompatibility between 32-bit userspace and 64-bit kernels).
* In particular do not use pointer types -- pass pointers in __u64
* instead.
*/
struct mthca_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 uarc_size;
};
struct mthca_alloc_pd_resp {
__u32 pdn;
__u32 reserved;
};
/*
* Mark the memory region with a DMA attribute that causes
* in-flight DMA to be flushed when the region is written to:
*/
#define MTHCA_MR_DMASYNC 0x1
struct mthca_reg_mr {
__u32 mr_attrs;
__u32 reserved;
};
struct mthca_create_cq {
__u32 lkey;
__u32 pdn;
__u64 arm_db_page;
__u64 set_db_page;
__u32 arm_db_index;
__u32 set_db_index;
};
struct mthca_create_cq_resp {
__u32 cqn;
__u32 reserved;
};
struct mthca_resize_cq {
__u32 lkey;
__u32 reserved;
};
struct mthca_create_srq {
__u32 lkey;
__u32 db_index;
__u64 db_page;
};
struct mthca_create_srq_resp {
__u32 srqn;
__u32 reserved;
};
struct mthca_create_qp {
__u32 lkey;
__u32 reserved;
__u64 sq_db_page;
__u64 rq_db_page;
__u32 sq_db_index;
__u32 rq_db_index;
};
#endif /* MTHCA_ABI_USER_H */