From 73547eeae90e3d79444105ea1ff83d8a2ec27d5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Wed, 17 Sep 2014 08:28:50 +0000 Subject: [PATCH] drm/i915: Add HW context support This feature is required by Mesa 9.2+. Without this, a GL application crashes with the following message: # glxinfo name of display: :0.0 Gen6+ requires Kernel 3.6 or later. Assertion failed: (ctx->Version > 0), function handle_first_current, file ../../src/mesa/main/context.c, line 1498. Abort (core dumped) Now, Mesa 10.2.4 and 10.3-rc3 works fine: # glxinfo name of display: :0 display: :0 screen: 0 direct rendering: Yes ... OpenGL renderer string: Mesa DRI Intel(R) 965GM OpenGL version string: 2.1 Mesa 10.2.4 ... The code was imported from Linux 3.8.13. Reviewed by: kib@ Tested by: kwm@, danfe@, Henry Hu, Lundberg, Johannes , Johannes Dieterich , Lutz Bichler , MFC after: 3 days Relnotes: yes --- sys/dev/drm2/drm_gem_names.c | 18 + sys/dev/drm2/drm_gem_names.h | 1 + sys/dev/drm2/i915/i915_dma.c | 6 + sys/dev/drm2/i915/i915_drm.h | 23 +- sys/dev/drm2/i915/i915_drv.c | 72 ++-- sys/dev/drm2/i915/i915_drv.h | 38 +- sys/dev/drm2/i915/i915_gem.c | 10 +- sys/dev/drm2/i915/i915_gem_context.c | 549 ++++++++++++++++++++++++ sys/dev/drm2/i915/i915_gem_execbuffer.c | 16 + sys/dev/drm2/i915/i915_gem_gtt.c | 5 + sys/dev/drm2/i915/i915_irq.c | 2 +- sys/dev/drm2/i915/i915_reg.h | 30 ++ sys/dev/drm2/i915/intel_ringbuffer.c | 7 + sys/dev/drm2/i915/intel_ringbuffer.h | 13 + sys/modules/drm2/i915kms/Makefile | 1 + sys/sys/param.h | 2 +- 16 files changed, 763 insertions(+), 30 deletions(-) create mode 100644 sys/dev/drm2/i915/i915_gem_context.c diff --git a/sys/dev/drm2/drm_gem_names.c b/sys/dev/drm2/drm_gem_names.c index 084d50f04784..03ef2c807183 100644 --- a/sys/dev/drm2/drm_gem_names.c +++ b/sys/dev/drm2/drm_gem_names.c @@ -127,6 +127,24 @@ drm_gem_find_name(struct drm_gem_names *names, void *ptr) return (arg.res); } +void * +drm_gem_find_ptr(struct drm_gem_names *names, uint32_t name) +{ + struct drm_gem_name *n; + void *res; + + mtx_lock(&names->lock); + LIST_FOREACH(n, gem_name_hash_index(names, name), link) { + if (n->name == name) { + res = n->ptr; + mtx_unlock(&names->lock); + return (res); + } + } + mtx_unlock(&names->lock); + return (NULL); +} + int drm_gem_name_create(struct drm_gem_names *names, void *p, uint32_t *name) { diff --git a/sys/dev/drm2/drm_gem_names.h b/sys/dev/drm2/drm_gem_names.h index 0fe4edd99212..08f6fa470786 100644 --- a/sys/dev/drm2/drm_gem_names.h +++ b/sys/dev/drm2/drm_gem_names.h @@ -54,6 +54,7 @@ struct drm_gem_names { void drm_gem_names_init(struct drm_gem_names *names); void drm_gem_names_fini(struct drm_gem_names *names); uint32_t drm_gem_find_name(struct drm_gem_names *names, void *ptr); +void *drm_gem_find_ptr(struct drm_gem_names *names, uint32_t name); void *drm_gem_name_ref(struct drm_gem_names *names, uint32_t name, void (*ref)(void *)); int drm_gem_name_create(struct drm_gem_names *names, void *obj, uint32_t *name); diff --git a/sys/dev/drm2/i915/i915_dma.c b/sys/dev/drm2/i915/i915_dma.c index 24cd2cd18346..3fceaadb433d 100644 --- a/sys/dev/drm2/i915/i915_dma.c +++ b/sys/dev/drm2/i915/i915_dma.c @@ -1357,6 +1357,7 @@ i915_driver_unload_int(struct drm_device *dev, bool locked) DRM_LOCK(dev); i915_gem_free_all_phys_object(dev); i915_gem_cleanup_ringbuffer(dev); + i915_gem_context_fini(dev); if (!locked) DRM_UNLOCK(dev); i915_gem_cleanup_aliasing_ppgtt(dev); @@ -1413,6 +1414,8 @@ i915_driver_open(struct drm_device *dev, struct drm_file *file_priv) INIT_LIST_HEAD(&i915_file_priv->mm.request_list); file_priv->driver_priv = i915_file_priv; + drm_gem_names_init(&i915_file_priv->context_idr); + return (0); } @@ -1437,6 +1440,7 @@ i915_driver_lastclose(struct drm_device * dev) void i915_driver_preclose(struct drm_device * dev, struct drm_file *file_priv) { + i915_gem_context_close(dev, file_priv); i915_gem_release(dev, file_priv); } @@ -1491,6 +1495,8 @@ struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF(DRM_I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_I915_GET_SPRITE_COLORKEY, intel_sprite_get_colorkey, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED), }; #ifdef COMPAT_FREEBSD32 diff --git a/sys/dev/drm2/i915/i915_drm.h b/sys/dev/drm2/i915/i915_drm.h index d4b0ae8386d9..cf5227f20be2 100644 --- a/sys/dev/drm2/i915/i915_drm.h +++ b/sys/dev/drm2/i915/i915_drm.h @@ -204,6 +204,8 @@ typedef struct drm_i915_sarea { #define DRM_I915_GEM_EXECBUFFER2 0x29 #define DRM_I915_GET_SPRITE_COLORKEY 0x2a #define DRM_I915_SET_SPRITE_COLORKEY 0x2b +#define DRM_I915_GEM_CONTEXT_CREATE 0x2d +#define DRM_I915_GEM_CONTEXT_DESTROY 0x2e #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -248,6 +250,8 @@ typedef struct drm_i915_sarea { #define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs) #define DRM_IOCTL_I915_SET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) #define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) +#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create) +#define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy) /* Asynchronous page flipping: */ @@ -702,7 +706,7 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_NEEDS_FENCE (1<<0) uint64_t flags; - uint64_t rsvd1; + uint64_t rsvd1; /* now used for context info */ uint64_t rsvd2; }; @@ -746,6 +750,12 @@ struct drm_i915_gem_execbuffer2 { /** Resets the SO write offset registers for transform feedback on gen7. */ #define I915_EXEC_GEN7_SOL_RESET (1<<8) +#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) +#define i915_execbuffer2_set_context_id(eb2, context) \ + (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK +#define i915_execbuffer2_get_context_id(eb2) \ + ((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK) + struct drm_i915_gem_pin { /** Handle of the buffer to be pinned. */ uint32_t handle; @@ -968,4 +978,15 @@ struct drm_intel_sprite_colorkey { uint32_t flags; }; +struct drm_i915_gem_context_create { + /* output: id of new context*/ + uint32_t ctx_id; + uint32_t pad; +}; + +struct drm_i915_gem_context_destroy { + uint32_t ctx_id; + uint32_t pad; +}; + #endif /* _I915_DRM_H_ */ diff --git a/sys/dev/drm2/i915/i915_drv.c b/sys/dev/drm2/i915/i915_drv.c index 85ac10e0f97c..2380d237f0fa 100644 --- a/sys/dev/drm2/i915/i915_drv.c +++ b/sys/dev/drm2/i915/i915_drv.c @@ -612,7 +612,7 @@ __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) } static int -i8xx_do_reset(struct drm_device *dev, u8 flags) +i8xx_do_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; int onems; @@ -657,7 +657,7 @@ i965_reset_complete(struct drm_device *dev) } static int -i965_do_reset(struct drm_device *dev, u8 flags) +i965_do_reset(struct drm_device *dev) { u8 gdrst; @@ -667,28 +667,30 @@ i965_do_reset(struct drm_device *dev, u8 flags) * triggers the reset; when done, the hardware will clear it. */ gdrst = pci_read_config(dev->device, I965_GDRST, 1); - pci_write_config(dev->device, I965_GDRST, gdrst | flags | 0x1, 1); + pci_write_config(dev->device, I965_GDRST, + gdrst | GRDOM_RENDER | GRDOM_RESET_ENABLE, 1); return (_intel_wait_for(dev, i965_reset_complete(dev), 500, 1, "915rst")); } static int -ironlake_do_reset(struct drm_device *dev, u8 flags) +ironlake_do_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv; u32 gdrst; dev_priv = dev->dev_private; gdrst = I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR); - I915_WRITE(MCHBAR_MIRROR_BASE + ILK_GDSR, gdrst | flags | 0x1); + I915_WRITE(MCHBAR_MIRROR_BASE + ILK_GDSR, + gdrst | GRDOM_RENDER | GRDOM_RESET_ENABLE); return (_intel_wait_for(dev, (I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR) & 0x1) != 0, 500, 1, "915rst")); } static int -gen6_do_reset(struct drm_device *dev, u8 flags) +gen6_do_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv; int ret; @@ -726,8 +728,43 @@ gen6_do_reset(struct drm_device *dev, u8 flags) return (ret); } +int intel_gpu_reset(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int ret = -ENODEV; + + switch (INTEL_INFO(dev)->gen) { + case 7: + case 6: + ret = gen6_do_reset(dev); + break; + case 5: + ret = ironlake_do_reset(dev); + break; + case 4: + ret = i965_do_reset(dev); + break; + case 2: + ret = i8xx_do_reset(dev); + break; + } + + /* Also reset the gpu hangman. */ + if (dev_priv->stop_rings) { + DRM_DEBUG("Simulated gpu hang, resetting stop_rings\n"); + dev_priv->stop_rings = 0; + if (ret == -ENODEV) { + DRM_ERROR("Reset not implemented, but ignoring " + "error for simulated gpu hangs\n"); + ret = 0; + } + } + + return ret; +} + int -i915_reset(struct drm_device *dev, u8 flags) +i915_reset(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; /* @@ -748,23 +785,9 @@ i915_reset(struct drm_device *dev, u8 flags) ret = -ENODEV; if (time_second - dev_priv->last_gpu_reset < 5) { DRM_ERROR("GPU hanging too fast, declaring wedged!\n"); - } else { - switch (INTEL_INFO(dev)->gen) { - case 7: - case 6: - ret = gen6_do_reset(dev, flags); - break; - case 5: - ret = ironlake_do_reset(dev, flags); - break; - case 4: - ret = i965_do_reset(dev, flags); - break; - case 2: - ret = i8xx_do_reset(dev, flags); - break; - } - } + } else + ret = intel_gpu_reset(dev); + dev_priv->last_gpu_reset = time_second; if (ret) { DRM_ERROR("Failed to reset chip.\n"); @@ -784,6 +807,7 @@ i915_reset(struct drm_device *dev, u8 flags) if (HAS_BLT(dev)) dev_priv->rings[BCS].init(&dev_priv->rings[BCS]); + i915_gem_context_init(dev); i915_gem_init_ppgtt(dev); drm_irq_uninstall(dev); diff --git a/sys/dev/drm2/i915/i915_drv.h b/sys/dev/drm2/i915/i915_drv.h index c332f838804d..b84f4f39a74d 100644 --- a/sys/dev/drm2/i915/i915_drv.h +++ b/sys/dev/drm2/i915/i915_drv.h @@ -174,6 +174,17 @@ struct i915_hw_ppgtt { vm_paddr_t scratch_page_dma_addr; }; + +/* This must match up with the value previously used for execbuf2.rsvd1. */ +#define DEFAULT_CONTEXT_ID 0 +struct i915_hw_context { + uint32_t id; + bool is_initialized; + struct drm_i915_file_private *file_priv; + struct intel_ring_buffer *ring; + struct drm_i915_gem_object *obj; +}; + enum no_fbc_reason { FBC_NO_OUTPUT, /* no outputs enabled to compress */ FBC_STOLEN_TOO_SMALL, /* not enough space to hold compressed buffers */ @@ -700,6 +711,8 @@ typedef struct drm_i915_private { enum no_fbc_reason no_fbc_reason; + unsigned int stop_rings; + unsigned long cfb_size; unsigned int cfb_fb; int cfb_plane; @@ -723,8 +736,16 @@ typedef struct drm_i915_private { struct drm_property *broadcast_rgb_property; struct drm_property *force_audio_property; + + bool hw_contexts_disabled; + uint32_t hw_context_size; } drm_i915_private_t; +/* Iterate over initialised rings */ +#define for_each_ring(ring__, dev_priv__, i__) \ + for ((i__) = 0; (i__) < I915_NUM_RINGS; (i__)++) \ + if (((ring__) = &(dev_priv__)->rings[(i__)]), intel_ring_initialized((ring__))) + enum hdmi_force_audio { HDMI_AUDIO_OFF_DVI = -2, /* no aux data for HDMI-DVI converter */ HDMI_AUDIO_OFF, /* force turn off HDMI audio */ @@ -832,6 +853,7 @@ struct drm_i915_gem_object { unsigned int cache_level:2; unsigned int has_aliasing_ppgtt_mapping:1; + unsigned int has_global_gtt_mapping:1; vm_page_t *pages; @@ -927,6 +949,7 @@ struct drm_i915_file_private { struct list_head request_list; struct mtx lck; } mm; + struct drm_gem_names context_idr; }; struct drm_i915_error_state { @@ -1026,7 +1049,8 @@ extern int i915_enable_hangcheck; const struct intel_device_info *i915_get_device_id(int device); -int i915_reset(struct drm_device *dev, u8 flags); +extern int intel_gpu_reset(struct drm_device *dev); +int i915_reset(struct drm_device *dev); /* i915_debug.c */ int i915_sysctl_init(struct drm_device *dev, struct sysctl_ctx_list *ctx, @@ -1205,6 +1229,17 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file); int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level); +/* i915_gem_context.c */ +void i915_gem_context_init(struct drm_device *dev); +void i915_gem_context_fini(struct drm_device *dev); +void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); +int i915_switch_context(struct intel_ring_buffer *ring, + struct drm_file *file, int to_id); +int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + void i915_gem_free_all_phys_object(struct drm_device *dev); void i915_gem_detach_phys_object(struct drm_device *dev, struct drm_i915_gem_object *obj); @@ -1444,6 +1479,7 @@ __i915_write(64, 64) #define HAS_LLC(dev) (INTEL_INFO(dev)->has_llc) #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws) +#define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_ALIASING_PPGTT(dev) (INTEL_INFO(dev)->gen >=6) #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) diff --git a/sys/dev/drm2/i915/i915_gem.c b/sys/dev/drm2/i915/i915_gem.c index 6d462071d7b9..45e770d30f3e 100644 --- a/sys/dev/drm2/i915/i915_gem.c +++ b/sys/dev/drm2/i915/i915_gem.c @@ -477,6 +477,7 @@ i915_gem_init_hw(struct drm_device *dev) } dev_priv->next_seqno = 1; + i915_gem_context_init(dev); i915_gem_init_ppgtt(dev); return (0); @@ -2586,11 +2587,16 @@ int i915_gpu_idle(struct drm_device *dev, bool do_retire) { drm_i915_private_t *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; int ret, i; /* Flush everything onto the inactive list. */ - for (i = 0; i < I915_NUM_RINGS; i++) { - ret = i915_ring_idle(&dev_priv->rings[i], do_retire); + for_each_ring(ring, dev_priv, i) { + ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); + if (ret) + return ret; + + ret = i915_ring_idle(ring, do_retire); if (ret) return ret; } diff --git a/sys/dev/drm2/i915/i915_gem_context.c b/sys/dev/drm2/i915/i915_gem_context.c new file mode 100644 index 000000000000..07d7a66d09e2 --- /dev/null +++ b/sys/dev/drm2/i915/i915_gem_context.c @@ -0,0 +1,549 @@ +/* + * Copyright © 2011-2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Ben Widawsky + * + */ + +/* + * This file implements HW context support. On gen5+ a HW context consists of an + * opaque GPU object which is referenced at times of context saves and restores. + * With RC6 enabled, the context is also referenced as the GPU enters and exists + * from RC6 (GPU has it's own internal power context, except on gen5). Though + * something like a context does exist for the media ring, the code only + * supports contexts for the render ring. + * + * In software, there is a distinction between contexts created by the user, + * and the default HW context. The default HW context is used by GPU clients + * that do not request setup of their own hardware context. The default + * context's state is never restored to help prevent programming errors. This + * would happen if a client ran and piggy-backed off another clients GPU state. + * The default context only exists to give the GPU some offset to load as the + * current to invoke a save of the context we actually care about. In fact, the + * code could likely be constructed, albeit in a more complicated fashion, to + * never use the default context, though that limits the driver's ability to + * swap out, and/or destroy other contexts. + * + * All other contexts are created as a request by the GPU client. These contexts + * store GPU state, and thus allow GPU clients to not re-emit state (and + * potentially query certain state) at any time. The kernel driver makes + * certain that the appropriate commands are inserted. + * + * The context life cycle is semi-complicated in that context BOs may live + * longer than the context itself because of the way the hardware, and object + * tracking works. Below is a very crude representation of the state machine + * describing the context life. + * refcount pincount active + * S0: initial state 0 0 0 + * S1: context created 1 0 0 + * S2: context is currently running 2 1 X + * S3: GPU referenced, but not current 2 0 1 + * S4: context is current, but destroyed 1 1 0 + * S5: like S3, but destroyed 1 0 1 + * + * The most common (but not all) transitions: + * S0->S1: client creates a context + * S1->S2: client submits execbuf with context + * S2->S3: other clients submits execbuf with context + * S3->S1: context object was retired + * S3->S2: clients submits another execbuf + * S2->S4: context destroy called with current context + * S3->S5->S0: destroy path + * S4->S5->S0: destroy path on current context + * + * There are two confusing terms used above: + * The "current context" means the context which is currently running on the + * GPU. The GPU has loaded it's state already and has stored away the gtt + * offset of the BO. The GPU is not actively referencing the data at this + * offset, but it will on the next context switch. The only way to avoid this + * is to do a GPU reset. + * + * An "active context' is one which was previously the "current context" and is + * on the active list waiting for the next context switch to occur. Until this + * happens, the object must remain at the same gtt offset. It is therefore + * possible to destroy a context, but it is still active. + * + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include "i915_drv.h" + +/* This is a HW constraint. The value below is the largest known requirement + * I've seen in a spec to date, and that was a workaround for a non-shipping + * part. It should be safe to decrease this, but it's more future proof as is. + */ +#define CONTEXT_ALIGN (64<<10) + +static struct i915_hw_context * +i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); +static int do_switch(struct i915_hw_context *to); + +static int get_context_size(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + u32 reg; + + switch (INTEL_INFO(dev)->gen) { + case 6: + reg = I915_READ(CXT_SIZE); + ret = GEN6_CXT_TOTAL_SIZE(reg) * 64; + break; + case 7: + reg = I915_READ(GEN7_CXT_SIZE); +#ifdef FREEBSD_WIP + if (IS_HASWELL(dev)) + ret = HSW_CXT_TOTAL_SIZE(reg) * 64; + else +#endif + ret = GEN7_CXT_TOTAL_SIZE(reg) * 64; + break; + default: + panic("i915_gem_context: Unsupported Intel GPU generation %d", + INTEL_INFO(dev)->gen); + } + + return ret; +} + +static void do_destroy(struct i915_hw_context *ctx) +{ +#if defined(INVARIANTS) + struct drm_device *dev = ctx->obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; +#endif + + if (ctx->file_priv) + drm_gem_names_remove(&ctx->file_priv->context_idr, ctx->id); + else + KASSERT(ctx == dev_priv->rings[RCS].default_context, + ("i915_gem_context: ctx != default_context")); + + drm_gem_object_unreference(&ctx->obj->base); + free(ctx, DRM_I915_GEM); +} + +static int +create_hw_context(struct drm_device *dev, + struct drm_i915_file_private *file_priv, + struct i915_hw_context **ret_ctx) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_hw_context *ctx; + int ret, id; + + ctx = malloc(sizeof(*ctx), DRM_I915_GEM, M_NOWAIT | M_ZERO); + if (ctx == NULL) + return (-ENOMEM); + + ctx->obj = i915_gem_alloc_object(dev, dev_priv->hw_context_size); + if (ctx->obj == NULL) { + free(ctx, DRM_I915_GEM); + DRM_DEBUG_DRIVER("Context object allocated failed\n"); + return (-ENOMEM); + } + + if (INTEL_INFO(dev)->gen >= 7) { + ret = i915_gem_object_set_cache_level(ctx->obj, + I915_CACHE_LLC_MLC); + if (ret) + goto err_out; + } + + /* The ring associated with the context object is handled by the normal + * object tracking code. We give an initial ring value simple to pass an + * assertion in the context switch code. + */ + ctx->ring = &dev_priv->rings[RCS]; + + /* Default context will never have a file_priv */ + if (file_priv == NULL) { + *ret_ctx = ctx; + return (0); + } + + ctx->file_priv = file_priv; + +again: + id = 0; + ret = drm_gem_name_create(&file_priv->context_idr, ctx, &id); + if (ret == 0) + ctx->id = id; + + if (ret == -EAGAIN) + goto again; + else if (ret) + goto err_out; + + *ret_ctx = ctx; + return (0); + +err_out: + do_destroy(ctx); + return (ret); +} + +static inline bool is_default_context(struct i915_hw_context *ctx) +{ + return (ctx == ctx->ring->default_context); +} + +/** + * The default context needs to exist per ring that uses contexts. It stores the + * context state of the GPU for applications that don't utilize HW contexts, as + * well as an idle case. + */ +static int create_default_context(struct drm_i915_private *dev_priv) +{ + struct i915_hw_context *ctx; + int ret; + + DRM_LOCK_ASSERT(dev_priv->dev); + + ret = create_hw_context(dev_priv->dev, NULL, &ctx); + if (ret != 0) + return (ret); + + /* We may need to do things with the shrinker which require us to + * immediately switch back to the default context. This can cause a + * problem as pinning the default context also requires GTT space which + * may not be available. To avoid this we always pin the + * default context. + */ + dev_priv->rings[RCS].default_context = ctx; + ret = i915_gem_object_pin(ctx->obj, CONTEXT_ALIGN, false); + if (ret) + goto err_destroy; + + ret = do_switch(ctx); + if (ret) + goto err_unpin; + + DRM_DEBUG_DRIVER("Default HW context loaded\n"); + return 0; + +err_unpin: + i915_gem_object_unpin(ctx->obj); +err_destroy: + do_destroy(ctx); + return ret; +} + +void i915_gem_context_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t ctx_size; + + if (!HAS_HW_CONTEXTS(dev)) { + dev_priv->hw_contexts_disabled = true; + return; + } + + /* If called from reset, or thaw... we've been here already */ + if (dev_priv->hw_contexts_disabled || + dev_priv->rings[RCS].default_context) + return; + + ctx_size = get_context_size(dev); + dev_priv->hw_context_size = get_context_size(dev); + dev_priv->hw_context_size = roundup(dev_priv->hw_context_size, 4096); + + if (ctx_size <= 0 || ctx_size > (1<<20)) { + dev_priv->hw_contexts_disabled = true; + return; + } + + if (create_default_context(dev_priv)) { + dev_priv->hw_contexts_disabled = true; + return; + } + + DRM_DEBUG_DRIVER("HW context support initialized\n"); +} + +void i915_gem_context_fini(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (dev_priv->hw_contexts_disabled) + return; + + /* The only known way to stop the gpu from accessing the hw context is + * to reset it. Do this as the very last operation to avoid confusing + * other code, leading to spurious errors. */ + intel_gpu_reset(dev); + + i915_gem_object_unpin(dev_priv->rings[RCS].default_context->obj); + + do_destroy(dev_priv->rings[RCS].default_context); +} + +static int context_idr_cleanup(uint32_t id, void *p, void *data) +{ + struct i915_hw_context *ctx = p; + + KASSERT(id != DEFAULT_CONTEXT_ID, ("i915_gem_context: id == DEFAULT_CONTEXT_ID in cleanup")); + + do_destroy(ctx); + + return 0; +} + +void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) +{ + struct drm_i915_file_private *file_priv = file->driver_priv; + + //DRM_LOCK(dev); /* Called from preclose(), the lock is already owned. */ + drm_gem_names_foreach(&file_priv->context_idr, context_idr_cleanup, NULL); + drm_gem_names_fini(&file_priv->context_idr); + //DRM_UNLOCK(dev); +} + +static struct i915_hw_context * +i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) +{ + return (struct i915_hw_context *)drm_gem_find_ptr(&file_priv->context_idr, id); +} + +static inline int +mi_set_context(struct intel_ring_buffer *ring, + struct i915_hw_context *new_context, + u32 hw_flags) +{ + int ret; + + /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB + * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value + * explicitly, so we rely on the value at ring init, stored in + * itlb_before_ctx_switch. + */ + if (IS_GEN6(ring->dev) && ring->itlb_before_ctx_switch) { + ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, 0); + if (ret) + return ret; + } + + ret = intel_ring_begin(ring, 6); + if (ret) + return ret; + + if (IS_GEN7(ring->dev)) + intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); + else + intel_ring_emit(ring, MI_NOOP); + + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_SET_CONTEXT); + intel_ring_emit(ring, new_context->obj->gtt_offset | + MI_MM_SPACE_GTT | + MI_SAVE_EXT_STATE_EN | + MI_RESTORE_EXT_STATE_EN | + hw_flags); + /* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP */ + intel_ring_emit(ring, MI_NOOP); + + if (IS_GEN7(ring->dev)) + intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); + else + intel_ring_emit(ring, MI_NOOP); + + intel_ring_advance(ring); + + return ret; +} + +static int do_switch(struct i915_hw_context *to) +{ + struct intel_ring_buffer *ring = to->ring; + struct drm_i915_gem_object *from_obj = ring->last_context_obj; + u32 hw_flags = 0; + int ret; + + KASSERT(!(from_obj != NULL && from_obj->pin_count == 0), + ("i915_gem_context: invalid \"from\" context")); + + if (from_obj == to->obj) + return 0; + + ret = i915_gem_object_pin(to->obj, CONTEXT_ALIGN, false); + if (ret) + return ret; + + /* Clear this page out of any CPU caches for coherent swap-in/out. Note + * that thanks to write = false in this call and us not setting any gpu + * write domains when putting a context object onto the active list + * (when switching away from it), this won't block. + * XXX: We need a real interface to do this instead of trickery. */ + ret = i915_gem_object_set_to_gtt_domain(to->obj, false); + if (ret) { + i915_gem_object_unpin(to->obj); + return ret; + } + + if (!to->obj->has_global_gtt_mapping) + i915_gem_gtt_bind_object(to->obj); + + if (!to->is_initialized || is_default_context(to)) + hw_flags |= MI_RESTORE_INHIBIT; + else if (from_obj == to->obj) /* not yet expected */ + hw_flags |= MI_FORCE_RESTORE; + + ret = mi_set_context(ring, to, hw_flags); + if (ret) { + i915_gem_object_unpin(to->obj); + return ret; + } + + /* The backing object for the context is done after switching to the + * *next* context. Therefore we cannot retire the previous context until + * the next context has already started running. In fact, the below code + * is a bit suboptimal because the retiring can occur simply after the + * MI_SET_CONTEXT instead of when the next seqno has completed. + */ + if (from_obj != NULL) { + from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; + i915_gem_object_move_to_active(from_obj, ring, + i915_gem_next_request_seqno(ring)); + /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the + * whole damn pipeline, we don't need to explicitly mark the + * object dirty. The only exception is that the context must be + * correct in case the object gets swapped out. Ideally we'd be + * able to defer doing this until we know the object would be + * swapped, but there is no way to do that yet. + */ + from_obj->dirty = 1; + KASSERT(from_obj->ring == ring, ("i915_gem_context: from_ring != ring")); + i915_gem_object_unpin(from_obj); + + drm_gem_object_unreference(&from_obj->base); + } + + drm_gem_object_reference(&to->obj->base); + ring->last_context_obj = to->obj; + to->is_initialized = true; + + return 0; +} + +/** + * i915_switch_context() - perform a GPU context switch. + * @ring: ring for which we'll execute the context switch + * @file_priv: file_priv associated with the context, may be NULL + * @id: context id number + * @seqno: sequence number by which the new context will be switched to + * @flags: + * + * The context life cycle is simple. The context refcount is incremented and + * decremented by 1 and create and destroy. If the context is in use by the GPU, + * it will have a refoucnt > 1. This allows us to destroy the context abstract + * object while letting the normal object tracking destroy the backing BO. + */ +int i915_switch_context(struct intel_ring_buffer *ring, + struct drm_file *file, + int to_id) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct i915_hw_context *to; + + if (dev_priv->hw_contexts_disabled) + return 0; + + if (ring != &dev_priv->rings[RCS]) + return 0; + + if (to_id == DEFAULT_CONTEXT_ID) { + to = ring->default_context; + } else { + if (file == NULL) + return -EINVAL; + + to = i915_gem_context_get(file->driver_priv, to_id); + if (to == NULL) + return -ENOENT; + } + + return do_switch(to); +} + +int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_context_create *args = data; + struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_hw_context *ctx; + int ret; + + if (!(dev->driver->driver_features & DRIVER_GEM)) + return -ENODEV; + + if (dev_priv->hw_contexts_disabled) + return -ENODEV; + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + return ret; + + ret = create_hw_context(dev, file_priv, &ctx); + DRM_UNLOCK(dev); + if (ret != 0) + return (ret); + + args->ctx_id = ctx->id; + DRM_DEBUG_DRIVER("HW context %d created\n", args->ctx_id); + + return 0; +} + +int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_context_destroy *args = data; + struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_hw_context *ctx; + int ret; + + if (!(dev->driver->driver_features & DRIVER_GEM)) + return -ENODEV; + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + return ret; + + ctx = i915_gem_context_get(file_priv, args->ctx_id); + if (!ctx) { + DRM_UNLOCK(dev); + return -ENOENT; + } + + do_destroy(ctx); + + DRM_UNLOCK(dev); + + DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id); + return 0; +} diff --git a/sys/dev/drm2/i915/i915_gem_execbuffer.c b/sys/dev/drm2/i915/i915_gem_execbuffer.c index 21e331f1b9f1..e47141a29aa0 100644 --- a/sys/dev/drm2/i915/i915_gem_execbuffer.c +++ b/sys/dev/drm2/i915/i915_gem_execbuffer.c @@ -1130,6 +1130,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_clip_rect *cliprects = NULL; struct intel_ring_buffer *ring; vm_page_t **relocs_ma; + u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 exec_start, exec_len; u32 seqno; u32 mask; @@ -1158,6 +1159,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } ring = &dev_priv->rings[VCS]; + if (ctx_id != 0) { + DRM_DEBUG("Ring %s doesn't support contexts\n", + ring->name); + return -EPERM; + } break; case I915_EXEC_BLT: if (!HAS_BLT(dev)) { @@ -1165,6 +1171,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } ring = &dev_priv->rings[BCS]; + if (ctx_id != 0) { + DRM_DEBUG("Ring %s doesn't support contexts\n", + ring->name); + return -EPERM; + } break; default: DRM_DEBUG("execbuf with unknown ring: %d\n", @@ -1306,6 +1317,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) goto err; + ret = i915_switch_context(ring, file, ctx_id); + if (ret) + goto err; + seqno = i915_gem_next_request_seqno(ring); for (i = 0; i < I915_NUM_RINGS - 1; i++) { if (seqno < ring->sync_seqno[i]) { @@ -1461,6 +1476,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, exec2.num_cliprects = args->num_cliprects; exec2.cliprects_ptr = args->cliprects_ptr; exec2.flags = I915_EXEC_RENDER; + i915_execbuffer2_set_context_id(exec2, 0); ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); if (!ret) { diff --git a/sys/dev/drm2/i915/i915_gem_gtt.c b/sys/dev/drm2/i915/i915_gem_gtt.c index a4ca76fce7ee..6204c06349cb 100644 --- a/sys/dev/drm2/i915/i915_gem_gtt.c +++ b/sys/dev/drm2/i915/i915_gem_gtt.c @@ -291,6 +291,9 @@ i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj) agp_type = cache_level_to_agp_type(obj->base.dev, obj->cache_level); intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT, obj->base.size >> PAGE_SHIFT, obj->pages, agp_type); + + obj->has_global_gtt_mapping = 1; + return (0); } @@ -308,6 +311,8 @@ i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj, intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT, obj->base.size >> PAGE_SHIFT, obj->pages, agp_type); + + obj->has_global_gtt_mapping = 0; } void diff --git a/sys/dev/drm2/i915/i915_irq.c b/sys/dev/drm2/i915/i915_irq.c index 16afb25ca6c7..15d0a613d101 100644 --- a/sys/dev/drm2/i915/i915_irq.c +++ b/sys/dev/drm2/i915/i915_irq.c @@ -717,7 +717,7 @@ i915_error_work_func(void *context, int pending) if (atomic_load_acq_int(&dev_priv->mm.wedged)) { DRM_DEBUG("i915: resetting chip\n"); /* kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event); */ - if (!i915_reset(dev, GRDOM_RENDER)) { + if (!i915_reset(dev)) { atomic_store_rel_int(&dev_priv->mm.wedged, 0); /* kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event); */ } diff --git a/sys/dev/drm2/i915/i915_reg.h b/sys/dev/drm2/i915/i915_reg.h index 754e535f78fc..492fac480429 100644 --- a/sys/dev/drm2/i915/i915_reg.h +++ b/sys/dev/drm2/i915/i915_reg.h @@ -80,6 +80,7 @@ __FBSDID("$FreeBSD$"); #define GRDOM_FULL (0<<2) #define GRDOM_RENDER (1<<2) #define GRDOM_MEDIA (3<<2) +#define GRDOM_RESET_ENABLE (1<<0) #define GEN6_MBCUNIT_SNPCR 0x900c /* for LLC config */ #define GEN6_MBC_SNPCR_SHIFT 21 @@ -200,6 +201,10 @@ __FBSDID("$FreeBSD$"); #define MI_DISPLAY_FLIP MI_INSTR(0x14, 2) #define MI_DISPLAY_FLIP_I915 MI_INSTR(0x14, 1) #define MI_DISPLAY_FLIP_PLANE(n) ((n) << 20) +#define MI_ARB_ON_OFF MI_INSTR(0x08, 0) +#define MI_ARB_ENABLE (1<<0) +#define MI_ARB_DISABLE (0<<0) + #define MI_SET_CONTEXT MI_INSTR(0x18, 0) #define MI_MM_SPACE_GTT (1<<8) #define MI_MM_SPACE_PHYSICAL (0<<8) @@ -1361,6 +1366,31 @@ __FBSDID("$FreeBSD$"); */ #define CCID 0x2180 #define CCID_EN (1<<0) +#define CXT_SIZE 0x21a0 +#define GEN6_CXT_POWER_SIZE(cxt_reg) ((cxt_reg >> 24) & 0x3f) +#define GEN6_CXT_RING_SIZE(cxt_reg) ((cxt_reg >> 18) & 0x3f) +#define GEN6_CXT_RENDER_SIZE(cxt_reg) ((cxt_reg >> 12) & 0x3f) +#define GEN6_CXT_EXTENDED_SIZE(cxt_reg) ((cxt_reg >> 6) & 0x3f) +#define GEN6_CXT_PIPELINE_SIZE(cxt_reg) ((cxt_reg >> 0) & 0x3f) +#define GEN6_CXT_TOTAL_SIZE(cxt_reg) (GEN6_CXT_POWER_SIZE(cxt_reg) + \ + GEN6_CXT_RING_SIZE(cxt_reg) + \ + GEN6_CXT_RENDER_SIZE(cxt_reg) + \ + GEN6_CXT_EXTENDED_SIZE(cxt_reg) + \ + GEN6_CXT_PIPELINE_SIZE(cxt_reg)) +#define GEN7_CXT_SIZE 0x21a8 +#define GEN7_CXT_POWER_SIZE(ctx_reg) ((ctx_reg >> 25) & 0x7f) +#define GEN7_CXT_RING_SIZE(ctx_reg) ((ctx_reg >> 22) & 0x7) +#define GEN7_CXT_RENDER_SIZE(ctx_reg) ((ctx_reg >> 16) & 0x3f) +#define GEN7_CXT_EXTENDED_SIZE(ctx_reg) ((ctx_reg >> 9) & 0x7f) +#define GEN7_CXT_GT1_SIZE(ctx_reg) ((ctx_reg >> 6) & 0x7) +#define GEN7_CXT_VFSTATE_SIZE(ctx_reg) ((ctx_reg >> 0) & 0x3f) +#define GEN7_CXT_TOTAL_SIZE(ctx_reg) (GEN7_CXT_POWER_SIZE(ctx_reg) + \ + GEN7_CXT_RING_SIZE(ctx_reg) + \ + GEN7_CXT_RENDER_SIZE(ctx_reg) + \ + GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \ + GEN7_CXT_GT1_SIZE(ctx_reg) + \ + GEN7_CXT_VFSTATE_SIZE(ctx_reg)) + /* * Overlay regs */ diff --git a/sys/dev/drm2/i915/intel_ringbuffer.c b/sys/dev/drm2/i915/intel_ringbuffer.c index 7d6bd947b906..26bc6957c412 100644 --- a/sys/dev/drm2/i915/intel_ringbuffer.c +++ b/sys/dev/drm2/i915/intel_ringbuffer.c @@ -430,6 +430,13 @@ static int init_render_ring(struct intel_ring_buffer *ring) */ I915_WRITE(CACHE_MODE_0, CM0_STC_EVICT_DISABLE_LRA_SNB << CM0_MASK_SHIFT); + + /* This is not explicitly set for GEN6, so read the register. + * see intel_ring_mi_set_context() for why we care. + * TODO: consider explicitly setting the bit for GEN5 + */ + ring->itlb_before_ctx_switch = + !!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS); } if (INTEL_INFO(dev)->gen >= 6) { diff --git a/sys/dev/drm2/i915/intel_ringbuffer.h b/sys/dev/drm2/i915/intel_ringbuffer.h index c20777fc9869..c391b12c9f9f 100644 --- a/sys/dev/drm2/i915/intel_ringbuffer.h +++ b/sys/dev/drm2/i915/intel_ringbuffer.h @@ -122,11 +122,24 @@ struct intel_ring_buffer { */ uint32_t outstanding_lazy_request; + /** + * Do an explicit TLB flush before MI_SET_CONTEXT + */ + bool itlb_before_ctx_switch; + struct i915_hw_context *default_context; + struct drm_i915_gem_object *last_context_obj; + drm_local_map_t map; void *private; }; +static inline bool +intel_ring_initialized(struct intel_ring_buffer *ring) +{ + return ring->obj != NULL; +} + static inline unsigned intel_ring_flag(struct intel_ring_buffer *ring) { diff --git a/sys/modules/drm2/i915kms/Makefile b/sys/modules/drm2/i915kms/Makefile index 75f08d24182b..61657c61661c 100644 --- a/sys/modules/drm2/i915kms/Makefile +++ b/sys/modules/drm2/i915kms/Makefile @@ -7,6 +7,7 @@ SRCS = \ i915_dma.c \ i915_drv.c \ i915_gem.c \ + i915_gem_context.c \ i915_gem_execbuffer.c \ i915_gem_evict.c \ i915_gem_gtt.c \ diff --git a/sys/sys/param.h b/sys/sys/param.h index dd7cb83c676a..e85fcd923668 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1100034 /* Master, propagated to newvers */ +#define __FreeBSD_version 1100035 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,