hc
2024-05-10 cde9070d9970eef1f7ec2360586c802a16230ad8
kernel/drivers/gpu/drm/i915/gvt/cmd_parser.c
....@@ -35,7 +35,9 @@
3535 */
3636
3737 #include <linux/slab.h>
38
+
3839 #include "i915_drv.h"
40
+#include "gt/intel_ring.h"
3941 #include "gvt.h"
4042 #include "i915_pvinfo.h"
4143 #include "trace.h"
....@@ -55,10 +57,10 @@
5557 int low;
5658 };
5759 struct decode_info {
58
- char *name;
60
+ const char *name;
5961 int op_len;
6062 int nr_sub_op;
61
- struct sub_op_bits *sub_op;
63
+ const struct sub_op_bits *sub_op;
6264 };
6365
6466 #define MAX_CMD_BUDGET 0x7fffffff
....@@ -162,6 +164,7 @@
162164 #define OP_STATE_BASE_ADDRESS OP_3D_MEDIA(0x0, 0x1, 0x01)
163165 #define OP_STATE_SIP OP_3D_MEDIA(0x0, 0x1, 0x02)
164166 #define OP_3D_MEDIA_0_1_4 OP_3D_MEDIA(0x0, 0x1, 0x04)
167
+#define OP_SWTESS_BASE_ADDRESS OP_3D_MEDIA(0x0, 0x1, 0x03)
165168
166169 #define OP_3DSTATE_VF_STATISTICS_GM45 OP_3D_MEDIA(0x1, 0x0, 0x0B)
167170
....@@ -374,35 +377,51 @@
374377 #define ADDR_FIX_4(x1, x2, x3, x4) (ADDR_FIX_1(x1) | ADDR_FIX_3(x2, x3, x4))
375378 #define ADDR_FIX_5(x1, x2, x3, x4, x5) (ADDR_FIX_1(x1) | ADDR_FIX_4(x2, x3, x4, x5))
376379
380
+#define DWORD_FIELD(dword, end, start) \
381
+ FIELD_GET(GENMASK(end, start), cmd_val(s, dword))
382
+
383
+#define OP_LENGTH_BIAS 2
384
+#define CMD_LEN(value) (value + OP_LENGTH_BIAS)
385
+
386
+static int gvt_check_valid_cmd_length(int len, int valid_len)
387
+{
388
+ if (valid_len != len) {
389
+ gvt_err("len is not valid: len=%u valid_len=%u\n",
390
+ len, valid_len);
391
+ return -EFAULT;
392
+ }
393
+ return 0;
394
+}
395
+
377396 struct cmd_info {
378
- char *name;
397
+ const char *name;
379398 u32 opcode;
380399
381
-#define F_LEN_MASK (1U<<0)
400
+#define F_LEN_MASK 3U
382401 #define F_LEN_CONST 1U
383402 #define F_LEN_VAR 0U
403
+/* value is const although LEN maybe variable */
404
+#define F_LEN_VAR_FIXED (1<<1)
384405
385406 /*
386407 * command has its own ip advance logic
387408 * e.g. MI_BATCH_START, MI_BATCH_END
388409 */
389
-#define F_IP_ADVANCE_CUSTOM (1<<1)
390
-
391
-#define F_POST_HANDLE (1<<2)
410
+#define F_IP_ADVANCE_CUSTOM (1<<2)
392411 u32 flag;
393412
394
-#define R_RCS (1 << RCS)
395
-#define R_VCS1 (1 << VCS)
396
-#define R_VCS2 (1 << VCS2)
413
+#define R_RCS BIT(RCS0)
414
+#define R_VCS1 BIT(VCS0)
415
+#define R_VCS2 BIT(VCS1)
397416 #define R_VCS (R_VCS1 | R_VCS2)
398
-#define R_BCS (1 << BCS)
399
-#define R_VECS (1 << VECS)
417
+#define R_BCS BIT(BCS0)
418
+#define R_VECS BIT(VECS0)
400419 #define R_ALL (R_RCS | R_VCS | R_BCS | R_VECS)
401420 /* rings that support this cmd: BLT/RCS/VCS/VECS */
402
- uint16_t rings;
421
+ u16 rings;
403422
404423 /* devices that support this cmd: SNB/IVB/HSW/... */
405
- uint16_t devices;
424
+ u16 devices;
406425
407426 /* which DWords are address that need fix up.
408427 * bit 0 means a 32-bit non address operand in command
....@@ -412,20 +431,23 @@
412431 * No matter the address length, each address only takes
413432 * one bit in the bitmap.
414433 */
415
- uint16_t addr_bitmap;
434
+ u16 addr_bitmap;
416435
417436 /* flag == F_LEN_CONST : command length
418437 * flag == F_LEN_VAR : length bias bits
419438 * Note: length is in DWord
420439 */
421
- uint8_t len;
440
+ u32 len;
422441
423442 parser_cmd_handler handler;
443
+
444
+ /* valid length in DWord */
445
+ u32 valid_len;
424446 };
425447
426448 struct cmd_entry {
427449 struct hlist_node hlist;
428
- struct cmd_info *info;
450
+ const struct cmd_info *info;
429451 };
430452
431453 enum {
....@@ -441,7 +463,7 @@
441463
442464 struct parser_exec_state {
443465 struct intel_vgpu *vgpu;
444
- int ring_id;
466
+ const struct intel_engine_cs *engine;
445467
446468 int buf_type;
447469
....@@ -474,7 +496,7 @@
474496 int saved_buf_addr_type;
475497 bool is_ctx_wa;
476498
477
- struct cmd_info *info;
499
+ const struct cmd_info *info;
478500
479501 struct intel_vgpu_workload *workload;
480502 };
....@@ -485,12 +507,12 @@
485507 static unsigned long bypass_scan_mask = 0;
486508
487509 /* ring ALL, type = 0 */
488
-static struct sub_op_bits sub_op_mi[] = {
510
+static const struct sub_op_bits sub_op_mi[] = {
489511 {31, 29},
490512 {28, 23},
491513 };
492514
493
-static struct decode_info decode_info_mi = {
515
+static const struct decode_info decode_info_mi = {
494516 "MI",
495517 OP_LEN_MI,
496518 ARRAY_SIZE(sub_op_mi),
....@@ -498,12 +520,12 @@
498520 };
499521
500522 /* ring RCS, command type 2 */
501
-static struct sub_op_bits sub_op_2d[] = {
523
+static const struct sub_op_bits sub_op_2d[] = {
502524 {31, 29},
503525 {28, 22},
504526 };
505527
506
-static struct decode_info decode_info_2d = {
528
+static const struct decode_info decode_info_2d = {
507529 "2D",
508530 OP_LEN_2D,
509531 ARRAY_SIZE(sub_op_2d),
....@@ -511,14 +533,14 @@
511533 };
512534
513535 /* ring RCS, command type 3 */
514
-static struct sub_op_bits sub_op_3d_media[] = {
536
+static const struct sub_op_bits sub_op_3d_media[] = {
515537 {31, 29},
516538 {28, 27},
517539 {26, 24},
518540 {23, 16},
519541 };
520542
521
-static struct decode_info decode_info_3d_media = {
543
+static const struct decode_info decode_info_3d_media = {
522544 "3D_Media",
523545 OP_LEN_3D_MEDIA,
524546 ARRAY_SIZE(sub_op_3d_media),
....@@ -526,7 +548,7 @@
526548 };
527549
528550 /* ring VCS, command type 3 */
529
-static struct sub_op_bits sub_op_mfx_vc[] = {
551
+static const struct sub_op_bits sub_op_mfx_vc[] = {
530552 {31, 29},
531553 {28, 27},
532554 {26, 24},
....@@ -534,7 +556,7 @@
534556 {20, 16},
535557 };
536558
537
-static struct decode_info decode_info_mfx_vc = {
559
+static const struct decode_info decode_info_mfx_vc = {
538560 "MFX_VC",
539561 OP_LEN_MFX_VC,
540562 ARRAY_SIZE(sub_op_mfx_vc),
....@@ -542,7 +564,7 @@
542564 };
543565
544566 /* ring VECS, command type 3 */
545
-static struct sub_op_bits sub_op_vebox[] = {
567
+static const struct sub_op_bits sub_op_vebox[] = {
546568 {31, 29},
547569 {28, 27},
548570 {26, 24},
....@@ -550,15 +572,15 @@
550572 {20, 16},
551573 };
552574
553
-static struct decode_info decode_info_vebox = {
575
+static const struct decode_info decode_info_vebox = {
554576 "VEBOX",
555577 OP_LEN_VEBOX,
556578 ARRAY_SIZE(sub_op_vebox),
557579 sub_op_vebox,
558580 };
559581
560
-static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
561
- [RCS] = {
582
+static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
583
+ [RCS0] = {
562584 &decode_info_mi,
563585 NULL,
564586 NULL,
....@@ -569,7 +591,7 @@
569591 NULL,
570592 },
571593
572
- [VCS] = {
594
+ [VCS0] = {
573595 &decode_info_mi,
574596 NULL,
575597 NULL,
....@@ -580,7 +602,7 @@
580602 NULL,
581603 },
582604
583
- [BCS] = {
605
+ [BCS0] = {
584606 &decode_info_mi,
585607 NULL,
586608 &decode_info_2d,
....@@ -591,7 +613,7 @@
591613 NULL,
592614 },
593615
594
- [VECS] = {
616
+ [VECS0] = {
595617 &decode_info_mi,
596618 NULL,
597619 NULL,
....@@ -602,7 +624,7 @@
602624 NULL,
603625 },
604626
605
- [VCS2] = {
627
+ [VCS1] = {
606628 &decode_info_mi,
607629 NULL,
608630 NULL,
....@@ -614,40 +636,42 @@
614636 },
615637 };
616638
617
-static inline u32 get_opcode(u32 cmd, int ring_id)
639
+static inline u32 get_opcode(u32 cmd, const struct intel_engine_cs *engine)
618640 {
619
- struct decode_info *d_info;
641
+ const struct decode_info *d_info;
620642
621
- d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)];
643
+ d_info = ring_decode_info[engine->id][CMD_TYPE(cmd)];
622644 if (d_info == NULL)
623645 return INVALID_OP;
624646
625647 return cmd >> (32 - d_info->op_len);
626648 }
627649
628
-static inline struct cmd_info *find_cmd_entry(struct intel_gvt *gvt,
629
- unsigned int opcode, int ring_id)
650
+static inline const struct cmd_info *
651
+find_cmd_entry(struct intel_gvt *gvt, unsigned int opcode,
652
+ const struct intel_engine_cs *engine)
630653 {
631654 struct cmd_entry *e;
632655
633656 hash_for_each_possible(gvt->cmd_table, e, hlist, opcode) {
634
- if ((opcode == e->info->opcode) &&
635
- (e->info->rings & (1 << ring_id)))
657
+ if (opcode == e->info->opcode &&
658
+ e->info->rings & engine->mask)
636659 return e->info;
637660 }
638661 return NULL;
639662 }
640663
641
-static inline struct cmd_info *get_cmd_info(struct intel_gvt *gvt,
642
- u32 cmd, int ring_id)
664
+static inline const struct cmd_info *
665
+get_cmd_info(struct intel_gvt *gvt, u32 cmd,
666
+ const struct intel_engine_cs *engine)
643667 {
644668 u32 opcode;
645669
646
- opcode = get_opcode(cmd, ring_id);
670
+ opcode = get_opcode(cmd, engine);
647671 if (opcode == INVALID_OP)
648672 return NULL;
649673
650
- return find_cmd_entry(gvt, opcode, ring_id);
674
+ return find_cmd_entry(gvt, opcode, engine);
651675 }
652676
653677 static inline u32 sub_op_val(u32 cmd, u32 hi, u32 low)
....@@ -655,12 +679,12 @@
655679 return (cmd >> low) & ((1U << (hi - low + 1)) - 1);
656680 }
657681
658
-static inline void print_opcode(u32 cmd, int ring_id)
682
+static inline void print_opcode(u32 cmd, const struct intel_engine_cs *engine)
659683 {
660
- struct decode_info *d_info;
684
+ const struct decode_info *d_info;
661685 int i;
662686
663
- d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)];
687
+ d_info = ring_decode_info[engine->id][CMD_TYPE(cmd)];
664688 if (d_info == NULL)
665689 return;
666690
....@@ -689,10 +713,11 @@
689713 int cnt = 0;
690714 int i;
691715
692
- gvt_dbg_cmd(" vgpu%d RING%d: ring_start(%08lx) ring_end(%08lx)"
693
- " ring_head(%08lx) ring_tail(%08lx)\n", s->vgpu->id,
694
- s->ring_id, s->ring_start, s->ring_start + s->ring_size,
695
- s->ring_head, s->ring_tail);
716
+ gvt_dbg_cmd(" vgpu%d RING%s: ring_start(%08lx) ring_end(%08lx)"
717
+ " ring_head(%08lx) ring_tail(%08lx)\n",
718
+ s->vgpu->id, s->engine->name,
719
+ s->ring_start, s->ring_start + s->ring_size,
720
+ s->ring_head, s->ring_tail);
696721
697722 gvt_dbg_cmd(" %s %s ip_gma(%08lx) ",
698723 s->buf_type == RING_BUFFER_INSTRUCTION ?
....@@ -709,7 +734,7 @@
709734 s->ip_va, cmd_val(s, 0), cmd_val(s, 1),
710735 cmd_val(s, 2), cmd_val(s, 3));
711736
712
- print_opcode(cmd_val(s, 0), s->ring_id);
737
+ print_opcode(cmd_val(s, 0), s->engine);
713738
714739 s->ip_va = (u32 *)((((u64)s->ip_va) >> 12) << 12);
715740
....@@ -776,7 +801,7 @@
776801 return 0;
777802 }
778803
779
-static inline int get_cmd_length(struct cmd_info *info, u32 cmd)
804
+static inline int get_cmd_length(const struct cmd_info *info, u32 cmd)
780805 {
781806 if ((info->flag & F_LEN_MASK) == F_LEN_CONST)
782807 return info->len;
....@@ -820,7 +845,6 @@
820845 unsigned int data;
821846 u32 ring_base;
822847 u32 nopid;
823
- struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
824848
825849 if (!strcmp(cmd, "lri"))
826850 data = cmd_val(s, index + 1);
....@@ -830,7 +854,7 @@
830854 return -EINVAL;
831855 }
832856
833
- ring_base = dev_priv->engine[s->ring_id]->mmio_base;
857
+ ring_base = s->engine->mmio_base;
834858 nopid = i915_mmio_reg_offset(RING_NOPID(ring_base));
835859
836860 if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data) &&
....@@ -858,6 +882,47 @@
858882 return 0;
859883 }
860884
885
+static int is_cmd_update_pdps(unsigned int offset,
886
+ struct parser_exec_state *s)
887
+{
888
+ u32 base = s->workload->engine->mmio_base;
889
+ return i915_mmio_reg_equal(_MMIO(offset), GEN8_RING_PDP_UDW(base, 0));
890
+}
891
+
892
+static int cmd_pdp_mmio_update_handler(struct parser_exec_state *s,
893
+ unsigned int offset, unsigned int index)
894
+{
895
+ struct intel_vgpu *vgpu = s->vgpu;
896
+ struct intel_vgpu_mm *shadow_mm = s->workload->shadow_mm;
897
+ struct intel_vgpu_mm *mm;
898
+ u64 pdps[GEN8_3LVL_PDPES];
899
+
900
+ if (shadow_mm->ppgtt_mm.root_entry_type ==
901
+ GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
902
+ pdps[0] = (u64)cmd_val(s, 2) << 32;
903
+ pdps[0] |= cmd_val(s, 4);
904
+
905
+ mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
906
+ if (!mm) {
907
+ gvt_vgpu_err("failed to get the 4-level shadow vm\n");
908
+ return -EINVAL;
909
+ }
910
+ intel_vgpu_mm_get(mm);
911
+ list_add_tail(&mm->ppgtt_mm.link,
912
+ &s->workload->lri_shadow_mm);
913
+ *cmd_ptr(s, 2) = upper_32_bits(mm->ppgtt_mm.shadow_pdps[0]);
914
+ *cmd_ptr(s, 4) = lower_32_bits(mm->ppgtt_mm.shadow_pdps[0]);
915
+ } else {
916
+ /* Currently all guests use PML4 table and now can't
917
+ * have a guest with 3-level table but uses LRI for
918
+ * PPGTT update. So this is simply un-testable. */
919
+ GEM_BUG_ON(1);
920
+ gvt_vgpu_err("invalid shared shadow vm type\n");
921
+ return -EINVAL;
922
+ }
923
+ return 0;
924
+}
925
+
861926 static int cmd_reg_handler(struct parser_exec_state *s,
862927 unsigned int offset, unsigned int index, char *cmd)
863928 {
....@@ -871,7 +936,7 @@
871936 return -EFAULT;
872937 }
873938
874
- if (!intel_gvt_mmio_is_cmd_access(gvt, offset)) {
939
+ if (!intel_gvt_mmio_is_cmd_accessible(gvt, offset)) {
875940 gvt_vgpu_err("%s access to non-render register (%x)\n",
876941 cmd, offset);
877942 return -EBADRQC;
....@@ -896,14 +961,23 @@
896961 patch_value(s, cmd_ptr(s, index), VGT_PVINFO_PAGE);
897962 }
898963
964
+ if (is_cmd_update_pdps(offset, s) &&
965
+ cmd_pdp_mmio_update_handler(s, offset, index))
966
+ return -EINVAL;
967
+
899968 /* TODO
900
- * Right now only scan LRI command on KBL and in inhibit context.
901
- * It's good enough to support initializing mmio by lri command in
902
- * vgpu inhibit context on KBL.
969
+ * In order to let workload with inhibit context to generate
970
+ * correct image data into memory, vregs values will be loaded to
971
+ * hw via LRIs in the workload with inhibit context. But as
972
+ * indirect context is loaded prior to LRIs in workload, we don't
973
+ * want reg values specified in indirect context overwritten by
974
+ * LRIs in workloads. So, when scanning an indirect context, we
975
+ * update reg values in it into vregs, so LRIs in workload with
976
+ * inhibit context will restore with correct values
903977 */
904
- if (IS_KABYLAKE(s->vgpu->gvt->dev_priv) &&
905
- intel_gvt_mmio_is_in_ctx(gvt, offset) &&
906
- !strncmp(cmd, "lri", 3)) {
978
+ if (IS_GEN(s->engine->i915, 9) &&
979
+ intel_gvt_mmio_is_sr_in_ctx(gvt, offset) &&
980
+ !strncmp(cmd, "lri", 3)) {
907981 intel_gvt_hypervisor_read_gpa(s->vgpu,
908982 s->workload->ring_context_gpa + 12, &ctx_sr_ctl, 4);
909983 /* check inhibit context */
....@@ -918,8 +992,6 @@
918992 }
919993 }
920994
921
- /* TODO: Update the global mask if this MMIO is a masked-MMIO */
922
- intel_gvt_mmio_set_cmd_accessed(gvt, offset);
923995 return 0;
924996 }
925997
....@@ -939,18 +1011,14 @@
9391011 {
9401012 int i, ret = 0;
9411013 int cmd_len = cmd_length(s);
942
- struct intel_gvt *gvt = s->vgpu->gvt;
9431014
9441015 for (i = 1; i < cmd_len; i += 2) {
945
- if (IS_BROADWELL(gvt->dev_priv) &&
946
- (s->ring_id != RCS)) {
947
- if (s->ring_id == BCS &&
948
- cmd_reg(s, i) ==
949
- i915_mmio_reg_offset(DERRMR))
1016
+ if (IS_BROADWELL(s->engine->i915) && s->engine->id != RCS0) {
1017
+ if (s->engine->id == BCS0 &&
1018
+ cmd_reg(s, i) == i915_mmio_reg_offset(DERRMR))
9501019 ret |= 0;
9511020 else
952
- ret |= (cmd_reg_inhibit(s, i)) ?
953
- -EBADRQC : 0;
1021
+ ret |= cmd_reg_inhibit(s, i) ? -EBADRQC : 0;
9541022 }
9551023 if (ret)
9561024 break;
....@@ -967,9 +1035,9 @@
9671035 int cmd_len = cmd_length(s);
9681036
9691037 for (i = 1; i < cmd_len; i += 2) {
970
- if (IS_BROADWELL(s->vgpu->gvt->dev_priv))
1038
+ if (IS_BROADWELL(s->engine->i915))
9711039 ret |= ((cmd_reg_inhibit(s, i) ||
972
- (cmd_reg_inhibit(s, i + 1)))) ?
1040
+ (cmd_reg_inhibit(s, i + 1)))) ?
9731041 -EBADRQC : 0;
9741042 if (ret)
9751043 break;
....@@ -995,7 +1063,7 @@
9951063 int cmd_len = cmd_length(s);
9961064
9971065 for (i = 1; i < cmd_len;) {
998
- if (IS_BROADWELL(gvt->dev_priv))
1066
+ if (IS_BROADWELL(s->engine->i915))
9991067 ret |= (cmd_reg_inhibit(s, i)) ? -EBADRQC : 0;
10001068 if (ret)
10011069 break;
....@@ -1046,27 +1114,27 @@
10461114 };
10471115
10481116 static struct cmd_interrupt_event cmd_interrupt_events[] = {
1049
- [RCS] = {
1117
+ [RCS0] = {
10501118 .pipe_control_notify = RCS_PIPE_CONTROL,
10511119 .mi_flush_dw = INTEL_GVT_EVENT_RESERVED,
10521120 .mi_user_interrupt = RCS_MI_USER_INTERRUPT,
10531121 },
1054
- [BCS] = {
1122
+ [BCS0] = {
10551123 .pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
10561124 .mi_flush_dw = BCS_MI_FLUSH_DW,
10571125 .mi_user_interrupt = BCS_MI_USER_INTERRUPT,
10581126 },
1059
- [VCS] = {
1127
+ [VCS0] = {
10601128 .pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
10611129 .mi_flush_dw = VCS_MI_FLUSH_DW,
10621130 .mi_user_interrupt = VCS_MI_USER_INTERRUPT,
10631131 },
1064
- [VCS2] = {
1132
+ [VCS1] = {
10651133 .pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
10661134 .mi_flush_dw = VCS2_MI_FLUSH_DW,
10671135 .mi_user_interrupt = VCS2_MI_USER_INTERRUPT,
10681136 },
1069
- [VECS] = {
1137
+ [VECS0] = {
10701138 .pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
10711139 .mi_flush_dw = VECS_MI_FLUSH_DW,
10721140 .mi_user_interrupt = VECS_MI_USER_INTERRUPT,
....@@ -1080,6 +1148,7 @@
10801148 bool index_mode = false;
10811149 unsigned int post_sync;
10821150 int ret = 0;
1151
+ u32 hws_pga, val;
10831152
10841153 post_sync = (cmd_val(s, 1) & PIPE_CONTROL_POST_SYNC_OP_MASK) >> 14;
10851154
....@@ -1103,6 +1172,15 @@
11031172 index_mode = true;
11041173 ret |= cmd_address_audit(s, gma, sizeof(u64),
11051174 index_mode);
1175
+ if (ret)
1176
+ return ret;
1177
+ if (index_mode) {
1178
+ hws_pga = s->vgpu->hws_pga[s->engine->id];
1179
+ gma = hws_pga + gma;
1180
+ patch_value(s, cmd_ptr(s, 2), gma);
1181
+ val = cmd_val(s, 1) & (~(1 << 21));
1182
+ patch_value(s, cmd_ptr(s, 1), val);
1183
+ }
11061184 }
11071185 }
11081186 }
....@@ -1111,15 +1189,15 @@
11111189 return ret;
11121190
11131191 if (cmd_val(s, 1) & PIPE_CONTROL_NOTIFY)
1114
- set_bit(cmd_interrupt_events[s->ring_id].pipe_control_notify,
1115
- s->workload->pending_events);
1192
+ set_bit(cmd_interrupt_events[s->engine->id].pipe_control_notify,
1193
+ s->workload->pending_events);
11161194 return 0;
11171195 }
11181196
11191197 static int cmd_handler_mi_user_interrupt(struct parser_exec_state *s)
11201198 {
1121
- set_bit(cmd_interrupt_events[s->ring_id].mi_user_interrupt,
1122
- s->workload->pending_events);
1199
+ set_bit(cmd_interrupt_events[s->engine->id].mi_user_interrupt,
1200
+ s->workload->pending_events);
11231201 patch_value(s, cmd_ptr(s, 0), MI_NOOP);
11241202 return 0;
11251203 }
....@@ -1169,7 +1247,7 @@
11691247 static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
11701248 struct mi_display_flip_command_info *info)
11711249 {
1172
- struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
1250
+ struct drm_i915_private *dev_priv = s->engine->i915;
11731251 struct plane_code_mapping gen8_plane_code[] = {
11741252 [0] = {PIPE_A, PLANE_A, PRIMARY_A_FLIP_DONE},
11751253 [1] = {PIPE_B, PLANE_A, PRIMARY_B_FLIP_DONE},
....@@ -1186,7 +1264,7 @@
11861264 dword2 = cmd_val(s, 2);
11871265
11881266 v = (dword0 & GENMASK(21, 19)) >> 19;
1189
- if (WARN_ON(v >= ARRAY_SIZE(gen8_plane_code)))
1267
+ if (drm_WARN_ON(&dev_priv->drm, v >= ARRAY_SIZE(gen8_plane_code)))
11901268 return -EBADRQC;
11911269
11921270 info->pipe = gen8_plane_code[v].pipe;
....@@ -1206,7 +1284,7 @@
12061284 info->stride_reg = SPRSTRIDE(info->pipe);
12071285 info->surf_reg = SPRSURF(info->pipe);
12081286 } else {
1209
- WARN_ON(1);
1287
+ drm_WARN_ON(&dev_priv->drm, 1);
12101288 return -EBADRQC;
12111289 }
12121290 return 0;
....@@ -1215,7 +1293,7 @@
12151293 static int skl_decode_mi_display_flip(struct parser_exec_state *s,
12161294 struct mi_display_flip_command_info *info)
12171295 {
1218
- struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
1296
+ struct drm_i915_private *dev_priv = s->engine->i915;
12191297 struct intel_vgpu *vgpu = s->vgpu;
12201298 u32 dword0 = cmd_val(s, 0);
12211299 u32 dword1 = cmd_val(s, 1);
....@@ -1274,15 +1352,12 @@
12741352 static int gen8_check_mi_display_flip(struct parser_exec_state *s,
12751353 struct mi_display_flip_command_info *info)
12761354 {
1277
- struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
12781355 u32 stride, tile;
12791356
12801357 if (!info->async_flip)
12811358 return 0;
12821359
1283
- if (IS_SKYLAKE(dev_priv)
1284
- || IS_KABYLAKE(dev_priv)
1285
- || IS_BROXTON(dev_priv)) {
1360
+ if (INTEL_GEN(s->engine->i915) >= 9) {
12861361 stride = vgpu_vreg_t(s->vgpu, info->stride_reg) & GENMASK(9, 0);
12871362 tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) &
12881363 GENMASK(12, 10)) >> 10;
....@@ -1305,14 +1380,12 @@
13051380 struct parser_exec_state *s,
13061381 struct mi_display_flip_command_info *info)
13071382 {
1308
- struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
1383
+ struct drm_i915_private *dev_priv = s->engine->i915;
13091384 struct intel_vgpu *vgpu = s->vgpu;
13101385
13111386 set_mask_bits(&vgpu_vreg_t(vgpu, info->surf_reg), GENMASK(31, 12),
13121387 info->surf_val << 12);
1313
- if (IS_SKYLAKE(dev_priv)
1314
- || IS_KABYLAKE(dev_priv)
1315
- || IS_BROXTON(dev_priv)) {
1388
+ if (INTEL_GEN(dev_priv) >= 9) {
13161389 set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(9, 0),
13171390 info->stride_val);
13181391 set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(12, 10),
....@@ -1324,21 +1397,23 @@
13241397 info->tile_val << 10);
13251398 }
13261399
1327
- vgpu_vreg_t(vgpu, PIPE_FRMCOUNT_G4X(info->pipe))++;
1328
- intel_vgpu_trigger_virtual_event(vgpu, info->event);
1400
+ if (info->plane == PLANE_PRIMARY)
1401
+ vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(info->pipe))++;
1402
+
1403
+ if (info->async_flip)
1404
+ intel_vgpu_trigger_virtual_event(vgpu, info->event);
1405
+ else
1406
+ set_bit(info->event, vgpu->irq.flip_done_event[info->pipe]);
1407
+
13291408 return 0;
13301409 }
13311410
13321411 static int decode_mi_display_flip(struct parser_exec_state *s,
13331412 struct mi_display_flip_command_info *info)
13341413 {
1335
- struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
1336
-
1337
- if (IS_BROADWELL(dev_priv))
1414
+ if (IS_BROADWELL(s->engine->i915))
13381415 return gen8_decode_mi_display_flip(s, info);
1339
- if (IS_SKYLAKE(dev_priv)
1340
- || IS_KABYLAKE(dev_priv)
1341
- || IS_BROXTON(dev_priv))
1416
+ if (INTEL_GEN(s->engine->i915) >= 9)
13421417 return skl_decode_mi_display_flip(s, info);
13431418
13441419 return -ENODEV;
....@@ -1364,6 +1439,15 @@
13641439 int ret;
13651440 int i;
13661441 int len = cmd_length(s);
1442
+ u32 valid_len = CMD_LEN(1);
1443
+
1444
+ /* Flip Type == Stereo 3D Flip */
1445
+ if (DWORD_FIELD(2, 1, 0) == 2)
1446
+ valid_len++;
1447
+ ret = gvt_check_valid_cmd_length(cmd_length(s),
1448
+ valid_len);
1449
+ if (ret)
1450
+ return ret;
13671451
13681452 ret = decode_mi_display_flip(s, &info);
13691453 if (ret) {
....@@ -1483,11 +1567,20 @@
14831567 int op_size = (cmd_length(s) - 3) * sizeof(u32);
14841568 int core_id = (cmd_val(s, 2) & (1 << 0)) ? 1 : 0;
14851569 unsigned long gma, gma_low, gma_high;
1570
+ u32 valid_len = CMD_LEN(2);
14861571 int ret = 0;
14871572
14881573 /* check ppggt */
14891574 if (!(cmd_val(s, 0) & (1 << 22)))
14901575 return 0;
1576
+
1577
+ /* check if QWORD */
1578
+ if (DWORD_FIELD(0, 21, 21))
1579
+ valid_len++;
1580
+ ret = gvt_check_valid_cmd_length(cmd_length(s),
1581
+ valid_len);
1582
+ if (ret)
1583
+ return ret;
14911584
14921585 gma = cmd_val(s, 2) & GENMASK(31, 2);
14931586
....@@ -1531,9 +1624,18 @@
15311624 int op_size = (1 << ((cmd_val(s, 0) & GENMASK(20, 19)) >> 19)) *
15321625 sizeof(u32);
15331626 unsigned long gma, gma_high;
1627
+ u32 valid_len = CMD_LEN(1);
15341628 int ret = 0;
15351629
15361630 if (!(cmd_val(s, 0) & (1 << 22)))
1631
+ return ret;
1632
+
1633
+ /* check inline data */
1634
+ if (cmd_val(s, 0) & BIT(18))
1635
+ valid_len = CMD_LEN(9);
1636
+ ret = gvt_check_valid_cmd_length(cmd_length(s),
1637
+ valid_len);
1638
+ if (ret)
15371639 return ret;
15381640
15391641 gma = cmd_val(s, 1) & GENMASK(31, 2);
....@@ -1572,6 +1674,17 @@
15721674 unsigned long gma;
15731675 bool index_mode = false;
15741676 int ret = 0;
1677
+ u32 hws_pga, val;
1678
+ u32 valid_len = CMD_LEN(2);
1679
+
1680
+ ret = gvt_check_valid_cmd_length(cmd_length(s),
1681
+ valid_len);
1682
+ if (ret) {
1683
+ /* Check again for Qword */
1684
+ ret = gvt_check_valid_cmd_length(cmd_length(s),
1685
+ ++valid_len);
1686
+ return ret;
1687
+ }
15751688
15761689 /* Check post-sync and ppgtt bit */
15771690 if (((cmd_val(s, 0) >> 14) & 0x3) && (cmd_val(s, 1) & (1 << 2))) {
....@@ -1582,11 +1695,20 @@
15821695 if (cmd_val(s, 0) & (1 << 21))
15831696 index_mode = true;
15841697 ret = cmd_address_audit(s, gma, sizeof(u64), index_mode);
1698
+ if (ret)
1699
+ return ret;
1700
+ if (index_mode) {
1701
+ hws_pga = s->vgpu->hws_pga[s->engine->id];
1702
+ gma = hws_pga + gma;
1703
+ patch_value(s, cmd_ptr(s, 1), gma);
1704
+ val = cmd_val(s, 0) & (~(1 << 21));
1705
+ patch_value(s, cmd_ptr(s, 0), val);
1706
+ }
15851707 }
15861708 /* Check notify bit */
15871709 if ((cmd_val(s, 0) & (1 << 8)))
1588
- set_bit(cmd_interrupt_events[s->ring_id].mi_flush_dw,
1589
- s->workload->pending_events);
1710
+ set_bit(cmd_interrupt_events[s->engine->id].mi_flush_dw,
1711
+ s->workload->pending_events);
15901712 return ret;
15911713 }
15921714
....@@ -1634,17 +1756,25 @@
16341756 static int batch_buffer_needs_scan(struct parser_exec_state *s)
16351757 {
16361758 /* Decide privilege based on address space */
1637
- if (cmd_val(s, 0) & (1 << 8) &&
1638
- !(s->vgpu->scan_nonprivbb & (1 << s->ring_id)))
1759
+ if (cmd_val(s, 0) & BIT(8) &&
1760
+ !(s->vgpu->scan_nonprivbb & s->engine->mask))
16391761 return 0;
1762
+
16401763 return 1;
16411764 }
16421765
1643
-static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size)
1766
+static const char *repr_addr_type(unsigned int type)
1767
+{
1768
+ return type == PPGTT_BUFFER ? "ppgtt" : "ggtt";
1769
+}
1770
+
1771
+static int find_bb_size(struct parser_exec_state *s,
1772
+ unsigned long *bb_size,
1773
+ unsigned long *bb_end_cmd_offset)
16441774 {
16451775 unsigned long gma = 0;
1646
- struct cmd_info *info;
1647
- uint32_t cmd_len = 0;
1776
+ const struct cmd_info *info;
1777
+ u32 cmd_len = 0;
16481778 bool bb_end = false;
16491779 struct intel_vgpu *vgpu = s->vgpu;
16501780 u32 cmd;
....@@ -1652,6 +1782,7 @@
16521782 s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm;
16531783
16541784 *bb_size = 0;
1785
+ *bb_end_cmd_offset = 0;
16551786
16561787 /* get the start gm address of the batch buffer */
16571788 gma = get_gma_bb_from_cmd(s, 1);
....@@ -1659,24 +1790,24 @@
16591790 return -EFAULT;
16601791
16611792 cmd = cmd_val(s, 0);
1662
- info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
1793
+ info = get_cmd_info(s->vgpu->gvt, cmd, s->engine);
16631794 if (info == NULL) {
1664
- gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n",
1665
- cmd, get_opcode(cmd, s->ring_id),
1666
- (s->buf_addr_type == PPGTT_BUFFER) ?
1667
- "ppgtt" : "ggtt", s->ring_id, s->workload);
1795
+ gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n",
1796
+ cmd, get_opcode(cmd, s->engine),
1797
+ repr_addr_type(s->buf_addr_type),
1798
+ s->engine->name, s->workload);
16681799 return -EBADRQC;
16691800 }
16701801 do {
16711802 if (copy_gma_to_hva(s->vgpu, mm,
1672
- gma, gma + 4, &cmd) < 0)
1803
+ gma, gma + 4, &cmd) < 0)
16731804 return -EFAULT;
1674
- info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
1805
+ info = get_cmd_info(s->vgpu->gvt, cmd, s->engine);
16751806 if (info == NULL) {
1676
- gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n",
1677
- cmd, get_opcode(cmd, s->ring_id),
1678
- (s->buf_addr_type == PPGTT_BUFFER) ?
1679
- "ppgtt" : "ggtt", s->ring_id, s->workload);
1807
+ gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n",
1808
+ cmd, get_opcode(cmd, s->engine),
1809
+ repr_addr_type(s->buf_addr_type),
1810
+ s->engine->name, s->workload);
16801811 return -EBADRQC;
16811812 }
16821813
....@@ -1687,6 +1818,10 @@
16871818 /* chained batch buffer */
16881819 bb_end = true;
16891820 }
1821
+
1822
+ if (bb_end)
1823
+ *bb_end_cmd_offset = *bb_size;
1824
+
16901825 cmd_len = get_cmd_length(info, cmd) << 2;
16911826 *bb_size += cmd_len;
16921827 gma += cmd_len;
....@@ -1695,23 +1830,47 @@
16951830 return 0;
16961831 }
16971832
1833
+static int audit_bb_end(struct parser_exec_state *s, void *va)
1834
+{
1835
+ struct intel_vgpu *vgpu = s->vgpu;
1836
+ u32 cmd = *(u32 *)va;
1837
+ const struct cmd_info *info;
1838
+
1839
+ info = get_cmd_info(s->vgpu->gvt, cmd, s->engine);
1840
+ if (info == NULL) {
1841
+ gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n",
1842
+ cmd, get_opcode(cmd, s->engine),
1843
+ repr_addr_type(s->buf_addr_type),
1844
+ s->engine->name, s->workload);
1845
+ return -EBADRQC;
1846
+ }
1847
+
1848
+ if ((info->opcode == OP_MI_BATCH_BUFFER_END) ||
1849
+ ((info->opcode == OP_MI_BATCH_BUFFER_START) &&
1850
+ (BATCH_BUFFER_2ND_LEVEL_BIT(cmd) == 0)))
1851
+ return 0;
1852
+
1853
+ return -EBADRQC;
1854
+}
1855
+
16981856 static int perform_bb_shadow(struct parser_exec_state *s)
16991857 {
17001858 struct intel_vgpu *vgpu = s->vgpu;
17011859 struct intel_vgpu_shadow_bb *bb;
17021860 unsigned long gma = 0;
17031861 unsigned long bb_size;
1862
+ unsigned long bb_end_cmd_offset;
17041863 int ret = 0;
17051864 struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ?
17061865 s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm;
1707
- unsigned long gma_start_offset = 0;
1866
+ unsigned long start_offset = 0;
17081867
17091868 /* get the start gm address of the batch buffer */
17101869 gma = get_gma_bb_from_cmd(s, 1);
17111870 if (gma == INTEL_GVT_INVALID_ADDR)
17121871 return -EFAULT;
17131872
1714
- ret = find_bb_size(s, &bb_size);
1873
+ ret = find_bb_size(s, &bb_size, &bb_end_cmd_offset);
17151874 if (ret)
17161875 return ret;
17171876
....@@ -1721,7 +1880,7 @@
17211880
17221881 bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true;
17231882
1724
- /* the gma_start_offset stores the batch buffer's start gma's
1883
+ /* the start_offset stores the batch buffer's start gma's
17251884 * offset relative to page boundary. so for non-privileged batch
17261885 * buffer, the shadowed gem object holds exactly the same page
17271886 * layout as original gem object. This is for the convience of
....@@ -1733,43 +1892,39 @@
17331892 * that of shadowed page.
17341893 */
17351894 if (bb->ppgtt)
1736
- gma_start_offset = gma & ~I915_GTT_PAGE_MASK;
1895
+ start_offset = gma & ~I915_GTT_PAGE_MASK;
17371896
1738
- bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv,
1739
- roundup(bb_size + gma_start_offset, PAGE_SIZE));
1897
+ bb->obj = i915_gem_object_create_shmem(s->engine->i915,
1898
+ round_up(bb_size + start_offset,
1899
+ PAGE_SIZE));
17401900 if (IS_ERR(bb->obj)) {
17411901 ret = PTR_ERR(bb->obj);
17421902 goto err_free_bb;
17431903 }
17441904
1745
- ret = i915_gem_obj_prepare_shmem_write(bb->obj, &bb->clflush);
1746
- if (ret)
1747
- goto err_free_obj;
1748
-
17491905 bb->va = i915_gem_object_pin_map(bb->obj, I915_MAP_WB);
17501906 if (IS_ERR(bb->va)) {
17511907 ret = PTR_ERR(bb->va);
1752
- goto err_finish_shmem_access;
1753
- }
1754
-
1755
- if (bb->clflush & CLFLUSH_BEFORE) {
1756
- drm_clflush_virt_range(bb->va, bb->obj->base.size);
1757
- bb->clflush &= ~CLFLUSH_BEFORE;
1908
+ goto err_free_obj;
17581909 }
17591910
17601911 ret = copy_gma_to_hva(s->vgpu, mm,
17611912 gma, gma + bb_size,
1762
- bb->va + gma_start_offset);
1913
+ bb->va + start_offset);
17631914 if (ret < 0) {
17641915 gvt_vgpu_err("fail to copy guest ring buffer\n");
17651916 ret = -EFAULT;
17661917 goto err_unmap;
17671918 }
17681919
1920
+ ret = audit_bb_end(s, bb->va + start_offset + bb_end_cmd_offset);
1921
+ if (ret)
1922
+ goto err_unmap;
1923
+
1924
+ i915_gem_object_unlock(bb->obj);
17691925 INIT_LIST_HEAD(&bb->list);
17701926 list_add(&bb->list, &s->workload->shadow_bb);
17711927
1772
- bb->accessing = true;
17731928 bb->bb_start_cmd_va = s->ip_va;
17741929
17751930 if ((s->buf_type == BATCH_BUFFER_INSTRUCTION) && (!s->is_ctx_wa))
....@@ -1785,13 +1940,11 @@
17851940 * buffer's gma in pair. After all, we don't want to pin the shadow
17861941 * buffer here (too early).
17871942 */
1788
- s->ip_va = bb->va + gma_start_offset;
1943
+ s->ip_va = bb->va + start_offset;
17891944 s->ip_gma = gma;
17901945 return 0;
17911946 err_unmap:
17921947 i915_gem_object_unpin_map(bb->obj);
1793
-err_finish_shmem_access:
1794
- i915_gem_obj_finish_shmem_access(bb->obj);
17951948 err_free_obj:
17961949 i915_gem_object_put(bb->obj);
17971950 err_free_bb:
....@@ -1840,7 +1993,9 @@
18401993 return ret;
18411994 }
18421995
1843
-static struct cmd_info cmd_info[] = {
1996
+static int mi_noop_index;
1997
+
1998
+static const struct cmd_info cmd_info[] = {
18441999 {"MI_NOOP", OP_MI_NOOP, F_LEN_CONST, R_ALL, D_ALL, 0, 1, NULL},
18452000
18462001 {"MI_SET_PREDICATE", OP_MI_SET_PREDICATE, F_LEN_CONST, R_ALL, D_ALL,
....@@ -1888,21 +2043,24 @@
18882043 {"MI_RS_CONTEXT", OP_MI_RS_CONTEXT, F_LEN_CONST, R_RCS, D_ALL, 0, 1,
18892044 NULL},
18902045
1891
- {"MI_DISPLAY_FLIP", OP_MI_DISPLAY_FLIP, F_LEN_VAR | F_POST_HANDLE,
2046
+ {"MI_DISPLAY_FLIP", OP_MI_DISPLAY_FLIP, F_LEN_VAR,
18922047 R_RCS | R_BCS, D_ALL, 0, 8, cmd_handler_mi_display_flip},
18932048
1894
- {"MI_SEMAPHORE_MBOX", OP_MI_SEMAPHORE_MBOX, F_LEN_VAR, R_ALL, D_ALL,
1895
- 0, 8, NULL},
2049
+ {"MI_SEMAPHORE_MBOX", OP_MI_SEMAPHORE_MBOX, F_LEN_VAR | F_LEN_VAR_FIXED,
2050
+ R_ALL, D_ALL, 0, 8, NULL, CMD_LEN(1)},
18962051
18972052 {"MI_MATH", OP_MI_MATH, F_LEN_VAR, R_ALL, D_ALL, 0, 8, NULL},
18982053
1899
- {"MI_URB_CLEAR", OP_MI_URB_CLEAR, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
2054
+ {"MI_URB_CLEAR", OP_MI_URB_CLEAR, F_LEN_VAR | F_LEN_VAR_FIXED, R_RCS,
2055
+ D_ALL, 0, 8, NULL, CMD_LEN(0)},
19002056
1901
- {"ME_SEMAPHORE_SIGNAL", OP_MI_SEMAPHORE_SIGNAL, F_LEN_VAR, R_ALL,
1902
- D_BDW_PLUS, 0, 8, NULL},
2057
+ {"MI_SEMAPHORE_SIGNAL", OP_MI_SEMAPHORE_SIGNAL,
2058
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_BDW_PLUS, 0, 8,
2059
+ NULL, CMD_LEN(0)},
19032060
1904
- {"ME_SEMAPHORE_WAIT", OP_MI_SEMAPHORE_WAIT, F_LEN_VAR, R_ALL, D_BDW_PLUS,
1905
- ADDR_FIX_1(2), 8, cmd_handler_mi_semaphore_wait},
2061
+ {"MI_SEMAPHORE_WAIT", OP_MI_SEMAPHORE_WAIT,
2062
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_BDW_PLUS, ADDR_FIX_1(2),
2063
+ 8, cmd_handler_mi_semaphore_wait, CMD_LEN(2)},
19062064
19072065 {"MI_STORE_DATA_IMM", OP_MI_STORE_DATA_IMM, F_LEN_VAR, R_ALL, D_BDW_PLUS,
19082066 ADDR_FIX_1(1), 10, cmd_handler_mi_store_data_imm},
....@@ -1916,8 +2074,9 @@
19162074 {"MI_UPDATE_GTT", OP_MI_UPDATE_GTT, F_LEN_VAR, R_ALL, D_BDW_PLUS, 0, 10,
19172075 cmd_handler_mi_update_gtt},
19182076
1919
- {"MI_STORE_REGISTER_MEM", OP_MI_STORE_REGISTER_MEM, F_LEN_VAR, R_ALL,
1920
- D_ALL, ADDR_FIX_1(2), 8, cmd_handler_srm},
2077
+ {"MI_STORE_REGISTER_MEM", OP_MI_STORE_REGISTER_MEM,
2078
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(2), 8,
2079
+ cmd_handler_srm, CMD_LEN(2)},
19212080
19222081 {"MI_FLUSH_DW", OP_MI_FLUSH_DW, F_LEN_VAR, R_ALL, D_ALL, 0, 6,
19232082 cmd_handler_mi_flush_dw},
....@@ -1925,26 +2084,30 @@
19252084 {"MI_CLFLUSH", OP_MI_CLFLUSH, F_LEN_VAR, R_ALL, D_ALL, ADDR_FIX_1(1),
19262085 10, cmd_handler_mi_clflush},
19272086
1928
- {"MI_REPORT_PERF_COUNT", OP_MI_REPORT_PERF_COUNT, F_LEN_VAR, R_ALL,
1929
- D_ALL, ADDR_FIX_1(1), 6, cmd_handler_mi_report_perf_count},
2087
+ {"MI_REPORT_PERF_COUNT", OP_MI_REPORT_PERF_COUNT,
2088
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(1), 6,
2089
+ cmd_handler_mi_report_perf_count, CMD_LEN(2)},
19302090
1931
- {"MI_LOAD_REGISTER_MEM", OP_MI_LOAD_REGISTER_MEM, F_LEN_VAR, R_ALL,
1932
- D_ALL, ADDR_FIX_1(2), 8, cmd_handler_lrm},
2091
+ {"MI_LOAD_REGISTER_MEM", OP_MI_LOAD_REGISTER_MEM,
2092
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(2), 8,
2093
+ cmd_handler_lrm, CMD_LEN(2)},
19332094
1934
- {"MI_LOAD_REGISTER_REG", OP_MI_LOAD_REGISTER_REG, F_LEN_VAR, R_ALL,
1935
- D_ALL, 0, 8, cmd_handler_lrr},
2095
+ {"MI_LOAD_REGISTER_REG", OP_MI_LOAD_REGISTER_REG,
2096
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, 0, 8,
2097
+ cmd_handler_lrr, CMD_LEN(1)},
19362098
1937
- {"MI_RS_STORE_DATA_IMM", OP_MI_RS_STORE_DATA_IMM, F_LEN_VAR, R_RCS,
1938
- D_ALL, 0, 8, NULL},
2099
+ {"MI_RS_STORE_DATA_IMM", OP_MI_RS_STORE_DATA_IMM,
2100
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_RCS, D_ALL, 0,
2101
+ 8, NULL, CMD_LEN(2)},
19392102
1940
- {"MI_LOAD_URB_MEM", OP_MI_LOAD_URB_MEM, F_LEN_VAR, R_RCS, D_ALL,
1941
- ADDR_FIX_1(2), 8, NULL},
2103
+ {"MI_LOAD_URB_MEM", OP_MI_LOAD_URB_MEM, F_LEN_VAR | F_LEN_VAR_FIXED,
2104
+ R_RCS, D_ALL, ADDR_FIX_1(2), 8, NULL, CMD_LEN(2)},
19422105
19432106 {"MI_STORE_URM_MEM", OP_MI_STORE_URM_MEM, F_LEN_VAR, R_RCS, D_ALL,
19442107 ADDR_FIX_1(2), 8, NULL},
19452108
1946
- {"MI_OP_2E", OP_MI_2E, F_LEN_VAR, R_ALL, D_BDW_PLUS, ADDR_FIX_2(1, 2),
1947
- 8, cmd_handler_mi_op_2e},
2109
+ {"MI_OP_2E", OP_MI_2E, F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_BDW_PLUS,
2110
+ ADDR_FIX_2(1, 2), 8, cmd_handler_mi_op_2e, CMD_LEN(3)},
19482111
19492112 {"MI_OP_2F", OP_MI_2F, F_LEN_VAR, R_ALL, D_BDW_PLUS, ADDR_FIX_1(1),
19502113 8, cmd_handler_mi_op_2f},
....@@ -1954,8 +2117,8 @@
19542117 cmd_handler_mi_batch_buffer_start},
19552118
19562119 {"MI_CONDITIONAL_BATCH_BUFFER_END", OP_MI_CONDITIONAL_BATCH_BUFFER_END,
1957
- F_LEN_VAR, R_ALL, D_ALL, ADDR_FIX_1(2), 8,
1958
- cmd_handler_mi_conditional_batch_buffer_end},
2120
+ F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(2), 8,
2121
+ cmd_handler_mi_conditional_batch_buffer_end, CMD_LEN(2)},
19592122
19602123 {"MI_LOAD_SCAN_LINES_INCL", OP_MI_LOAD_SCAN_LINES_INCL, F_LEN_CONST,
19612124 R_RCS | R_BCS, D_ALL, 0, 2, NULL},
....@@ -2343,6 +2506,9 @@
23432506 {"OP_3D_MEDIA_0_1_4", OP_3D_MEDIA_0_1_4, F_LEN_VAR, R_RCS, D_ALL,
23442507 ADDR_FIX_1(1), 8, NULL},
23452508
2509
+ {"OP_SWTESS_BASE_ADDRESS", OP_SWTESS_BASE_ADDRESS,
2510
+ F_LEN_VAR, R_RCS, D_ALL, ADDR_FIX_2(1, 2), 3, NULL},
2511
+
23462512 {"3DSTATE_VS", OP_3DSTATE_VS, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
23472513
23482514 {"3DSTATE_SF", OP_3DSTATE_SF, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
....@@ -2507,7 +2673,7 @@
25072673 0, 12, NULL},
25082674
25092675 {"VEB_DI_IECP", OP_VEB_DNDI_IECP_STATE, F_LEN_VAR, R_VECS, D_BDW_PLUS,
2510
- 0, 20, NULL},
2676
+ 0, 12, NULL},
25112677 };
25122678
25132679 static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e)
....@@ -2519,26 +2685,38 @@
25192685 static int cmd_parser_exec(struct parser_exec_state *s)
25202686 {
25212687 struct intel_vgpu *vgpu = s->vgpu;
2522
- struct cmd_info *info;
2688
+ const struct cmd_info *info;
25232689 u32 cmd;
25242690 int ret = 0;
25252691
25262692 cmd = cmd_val(s, 0);
25272693
2528
- info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
2694
+ /* fastpath for MI_NOOP */
2695
+ if (cmd == MI_NOOP)
2696
+ info = &cmd_info[mi_noop_index];
2697
+ else
2698
+ info = get_cmd_info(s->vgpu->gvt, cmd, s->engine);
2699
+
25292700 if (info == NULL) {
2530
- gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n",
2531
- cmd, get_opcode(cmd, s->ring_id),
2532
- (s->buf_addr_type == PPGTT_BUFFER) ?
2533
- "ppgtt" : "ggtt", s->ring_id, s->workload);
2701
+ gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n",
2702
+ cmd, get_opcode(cmd, s->engine),
2703
+ repr_addr_type(s->buf_addr_type),
2704
+ s->engine->name, s->workload);
25342705 return -EBADRQC;
25352706 }
25362707
25372708 s->info = info;
25382709
2539
- trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va,
2710
+ trace_gvt_command(vgpu->id, s->engine->id, s->ip_gma, s->ip_va,
25402711 cmd_length(s), s->buf_type, s->buf_addr_type,
25412712 s->workload, info->name);
2713
+
2714
+ if ((info->flag & F_LEN_MASK) == F_LEN_VAR_FIXED) {
2715
+ ret = gvt_check_valid_cmd_length(cmd_length(s),
2716
+ info->valid_len);
2717
+ if (ret)
2718
+ return ret;
2719
+ }
25422720
25432721 if (info->handler) {
25442722 ret = info->handler(s);
....@@ -2632,7 +2810,7 @@
26322810 s.buf_type = RING_BUFFER_INSTRUCTION;
26332811 s.buf_addr_type = GTT_BUFFER;
26342812 s.vgpu = workload->vgpu;
2635
- s.ring_id = workload->ring_id;
2813
+ s.engine = workload->engine;
26362814 s.ring_start = workload->rb_start;
26372815 s.ring_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
26382816 s.ring_head = gma_head;
....@@ -2641,14 +2819,8 @@
26412819 s.workload = workload;
26422820 s.is_ctx_wa = false;
26432821
2644
- if ((bypass_scan_mask & (1 << workload->ring_id)) ||
2645
- gma_head == gma_tail)
2822
+ if (bypass_scan_mask & workload->engine->mask || gma_head == gma_tail)
26462823 return 0;
2647
-
2648
- if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) {
2649
- ret = -EINVAL;
2650
- goto out;
2651
- }
26522824
26532825 ret = ip_gma_set(&s, gma_head);
26542826 if (ret)
....@@ -2676,7 +2848,7 @@
26762848 I915_GTT_PAGE_SIZE)))
26772849 return -EINVAL;
26782850
2679
- ring_tail = wa_ctx->indirect_ctx.size + 3 * sizeof(uint32_t);
2851
+ ring_tail = wa_ctx->indirect_ctx.size + 3 * sizeof(u32);
26802852 ring_size = round_up(wa_ctx->indirect_ctx.size + CACHELINE_BYTES,
26812853 PAGE_SIZE);
26822854 gma_head = wa_ctx->indirect_ctx.guest_gma;
....@@ -2686,7 +2858,7 @@
26862858 s.buf_type = RING_BUFFER_INSTRUCTION;
26872859 s.buf_addr_type = GTT_BUFFER;
26882860 s.vgpu = workload->vgpu;
2689
- s.ring_id = workload->ring_id;
2861
+ s.engine = workload->engine;
26902862 s.ring_start = wa_ctx->indirect_ctx.guest_gma;
26912863 s.ring_size = ring_size;
26922864 s.ring_head = gma_head;
....@@ -2694,11 +2866,6 @@
26942866 s.rb_va = wa_ctx->indirect_ctx.shadow_va;
26952867 s.workload = workload;
26962868 s.is_ctx_wa = true;
2697
-
2698
- if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) {
2699
- ret = -EINVAL;
2700
- goto out;
2701
- }
27022869
27032870 ret = ip_gma_set(&s, gma_head);
27042871 if (ret)
....@@ -2716,7 +2883,6 @@
27162883 struct intel_vgpu_submission *s = &vgpu->submission;
27172884 unsigned long gma_head, gma_tail, gma_top, guest_rb_size;
27182885 void *shadow_ring_buffer_va;
2719
- int ring_id = workload->ring_id;
27202886 int ret;
27212887
27222888 guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
....@@ -2729,21 +2895,21 @@
27292895 gma_tail = workload->rb_start + workload->rb_tail;
27302896 gma_top = workload->rb_start + guest_rb_size;
27312897
2732
- if (workload->rb_len > s->ring_scan_buffer_size[ring_id]) {
2898
+ if (workload->rb_len > s->ring_scan_buffer_size[workload->engine->id]) {
27332899 void *p;
27342900
27352901 /* realloc the new ring buffer if needed */
2736
- p = krealloc(s->ring_scan_buffer[ring_id], workload->rb_len,
2737
- GFP_KERNEL);
2902
+ p = krealloc(s->ring_scan_buffer[workload->engine->id],
2903
+ workload->rb_len, GFP_KERNEL);
27382904 if (!p) {
27392905 gvt_vgpu_err("fail to re-alloc ring scan buffer\n");
27402906 return -ENOMEM;
27412907 }
2742
- s->ring_scan_buffer[ring_id] = p;
2743
- s->ring_scan_buffer_size[ring_id] = workload->rb_len;
2908
+ s->ring_scan_buffer[workload->engine->id] = p;
2909
+ s->ring_scan_buffer_size[workload->engine->id] = workload->rb_len;
27442910 }
27452911
2746
- shadow_ring_buffer_va = s->ring_scan_buffer[ring_id];
2912
+ shadow_ring_buffer_va = s->ring_scan_buffer[workload->engine->id];
27472913
27482914 /* get shadow ring buffer va */
27492915 workload->shadow_ring_buffer_va = shadow_ring_buffer_va;
....@@ -2801,9 +2967,9 @@
28012967 int ret = 0;
28022968 void *map;
28032969
2804
- obj = i915_gem_object_create(workload->vgpu->gvt->dev_priv,
2805
- roundup(ctx_size + CACHELINE_BYTES,
2806
- PAGE_SIZE));
2970
+ obj = i915_gem_object_create_shmem(workload->engine->i915,
2971
+ roundup(ctx_size + CACHELINE_BYTES,
2972
+ PAGE_SIZE));
28072973 if (IS_ERR(obj))
28082974 return PTR_ERR(obj);
28092975
....@@ -2815,7 +2981,9 @@
28152981 goto put_obj;
28162982 }
28172983
2984
+ i915_gem_object_lock(obj, NULL);
28182985 ret = i915_gem_object_set_to_cpu_domain(obj, false);
2986
+ i915_gem_object_unlock(obj);
28192987 if (ret) {
28202988 gvt_vgpu_err("failed to set shadow indirect ctx to CPU\n");
28212989 goto unmap_src;
....@@ -2843,7 +3011,7 @@
28433011
28443012 static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
28453013 {
2846
- uint32_t per_ctx_start[CACHELINE_DWORDS] = {0};
3014
+ u32 per_ctx_start[CACHELINE_DWORDS] = {0};
28473015 unsigned char *bb_start_sva;
28483016
28493017 if (!wa_ctx->per_ctx.valid)
....@@ -2888,30 +3056,14 @@
28883056 return 0;
28893057 }
28903058
2891
-static struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt,
2892
- unsigned int opcode, unsigned long rings)
2893
-{
2894
- struct cmd_info *info = NULL;
2895
- unsigned int ring;
2896
-
2897
- for_each_set_bit(ring, &rings, I915_NUM_ENGINES) {
2898
- info = find_cmd_entry(gvt, opcode, ring);
2899
- if (info)
2900
- break;
2901
- }
2902
- return info;
2903
-}
2904
-
29053059 static int init_cmd_table(struct intel_gvt *gvt)
29063060 {
3061
+ unsigned int gen_type = intel_gvt_get_device_type(gvt);
29073062 int i;
2908
- struct cmd_entry *e;
2909
- struct cmd_info *info;
2910
- unsigned int gen_type;
2911
-
2912
- gen_type = intel_gvt_get_device_type(gvt);
29133063
29143064 for (i = 0; i < ARRAY_SIZE(cmd_info); i++) {
3065
+ struct cmd_entry *e;
3066
+
29153067 if (!(cmd_info[i].devices & gen_type))
29163068 continue;
29173069
....@@ -2920,21 +3072,16 @@
29203072 return -ENOMEM;
29213073
29223074 e->info = &cmd_info[i];
2923
- info = find_cmd_entry_any_ring(gvt,
2924
- e->info->opcode, e->info->rings);
2925
- if (info) {
2926
- gvt_err("%s %s duplicated\n", e->info->name,
2927
- info->name);
2928
- kfree(e);
2929
- return -EEXIST;
2930
- }
3075
+ if (cmd_info[i].opcode == OP_MI_NOOP)
3076
+ mi_noop_index = i;
29313077
29323078 INIT_HLIST_NODE(&e->hlist);
29333079 add_cmd_entry(gvt, e);
29343080 gvt_dbg_cmd("add %-30s op %04x flag %x devs %02x rings %02x\n",
2935
- e->info->name, e->info->opcode, e->info->flag,
2936
- e->info->devices, e->info->rings);
3081
+ e->info->name, e->info->opcode, e->info->flag,
3082
+ e->info->devices, e->info->rings);
29373083 }
3084
+
29383085 return 0;
29393086 }
29403087