summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs')
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c43
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h8
-rw-r--r--drivers/misc/habanalabs/gaudi2/gaudi2.c72
-rw-r--r--drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h1
4 files changed, 117 insertions, 7 deletions
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 304e4f3b0e7e..cf4118515678 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -13,7 +13,7 @@
#define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
- HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
+ HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND)
#define MAX_TS_ITER_NUM 10
@@ -1244,6 +1244,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
return CS_RESERVE_SIGNALS;
else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
return CS_UNRESERVE_SIGNALS;
+ else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
+ return CS_TYPE_ENGINE_CORE;
else
return CS_TYPE_DEFAULT;
}
@@ -2355,6 +2357,41 @@ out:
return rc;
}
+static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
+ u32 num_engine_cores, u32 core_command)
+{
+ int rc;
+ struct hl_device *hdev = hpriv->hdev;
+ void __user *engine_cores_arr;
+ u32 *cores;
+
+ if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
+ dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
+ return -EINVAL;
+ }
+
+ if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) {
+ dev_err(hdev->dev, "Engine core command is invalid\n");
+ return -EINVAL;
+ }
+
+ engine_cores_arr = (void __user *) (uintptr_t) engine_cores;
+ cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL);
+ if (!cores)
+ return -ENOMEM;
+
+ if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) {
+ dev_err(hdev->dev, "Failed to copy core-ids array from user\n");
+ kfree(cores);
+ return -EFAULT;
+ }
+
+ rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command);
+ kfree(cores);
+
+ return rc;
+}
+
int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
{
union hl_cs_args *args = data;
@@ -2407,6 +2444,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
rc = cs_ioctl_unreserve_signals(hpriv,
args->in.encaps_sig_handle_id);
break;
+ case CS_TYPE_ENGINE_CORE:
+ rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
+ args->in.num_engine_cores, args->in.core_command);
+ break;
default:
rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
args->in.cs_flags,
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 8d9e96c6092a..ae3f5832fe58 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -345,7 +345,8 @@ enum hl_cs_type {
CS_TYPE_WAIT,
CS_TYPE_COLLECTIVE_WAIT,
CS_RESERVE_SIGNALS,
- CS_UNRESERVE_SIGNALS
+ CS_UNRESERVE_SIGNALS,
+ CS_TYPE_ENGINE_CORE
};
/*
@@ -617,6 +618,7 @@ struct hl_hints_range {
* which the property supports_user_set_page_size is true
* (i.e. the DRAM supports multiple page sizes), otherwise
* it will shall be equal to dram_page_size.
+ * @num_engine_cores: number of engine cpu cores
* @collective_first_sob: first sync object available for collective use
* @collective_first_mon: first monitor available for collective use
* @sync_stream_first_sob: first sync object available for sync stream use
@@ -737,6 +739,7 @@ struct asic_fixed_properties {
u32 faulty_dram_cluster_map;
u32 xbar_edge_enabled_mask;
u32 device_mem_alloc_default_page_size;
+ u32 num_engine_cores;
u16 collective_first_sob;
u16 collective_first_mon;
u16 sync_stream_first_sob;
@@ -1511,6 +1514,7 @@ struct engines_data {
* @check_if_razwi_happened: check if there was a razwi due to RR violation.
* @access_dev_mem: access device memory
* @set_dram_bar_base: set the base of the DRAM BAR
+ * @set_engine_cores: set a config command to enigne cores
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@@ -1645,6 +1649,8 @@ struct hl_asic_funcs {
int (*access_dev_mem)(struct hl_device *hdev, enum pci_region region_type,
u64 addr, u64 *val, enum debugfs_access_type acc_type);
u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
+ int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids,
+ u32 num_cores, u32 core_command);
};
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 9ccde0258eca..676419961f86 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -1989,6 +1989,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
}
+ prop->num_engine_cores = CPU_ID_MAX;
prop->cfg_size = CFG_SIZE;
prop->max_asid = MAX_ASID;
prop->num_of_events = GAUDI2_EVENT_SIZE;
@@ -3751,14 +3752,16 @@ static void gaudi2_stop_dec(struct hl_device *hdev)
gaudi2_stop_pcie_dec(hdev);
}
-static void gaudi2_halt_arc(struct hl_device *hdev, u32 cpu_id)
+static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
{
u32 reg_base, reg_val;
reg_base = gaudi2_arc_blocks_bases[cpu_id];
+ if (run_mode == HL_ENGINE_CORE_RUN)
+ reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
+ else
+ reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
- /* Halt ARC */
- reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
}
@@ -3768,10 +3771,37 @@ static void gaudi2_halt_arcs(struct hl_device *hdev)
for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
if (gaudi2_is_arc_enabled(hdev, arc_id))
- gaudi2_halt_arc(hdev, arc_id);
+ gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
}
}
+static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
+{
+ int rc;
+ u32 reg_base, val, ack_mask, timeout_usec = 100000;
+
+ if (hdev->pldm)
+ timeout_usec *= 100;
+
+ reg_base = gaudi2_arc_blocks_bases[cpu_id];
+ if (run_mode == HL_ENGINE_CORE_RUN)
+ ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
+ else
+ ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
+
+ rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
+ val, ((val & ack_mask) == ack_mask),
+ 1000, timeout_usec);
+
+ if (!rc) {
+ /* Clear */
+ val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
+ WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
+ }
+
+ return rc;
+}
+
static void gaudi2_reset_arcs(struct hl_device *hdev)
{
struct gaudi2_device *gaudi2 = hdev->asic_specific;
@@ -3796,8 +3826,39 @@ static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
- for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN)
+ for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
+ if (!(hdev->nic_ports_mask & BIT(i)))
+ continue;
+
gaudi2_qman_manual_flush_common(hdev, queue_id);
+ }
+}
+
+static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
+ u32 num_cores, u32 core_command)
+{
+ int i, rc;
+
+
+ for (i = 0 ; i < num_cores ; i++) {
+ if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
+ gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
+ }
+
+ for (i = 0 ; i < num_cores ; i++) {
+ if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
+ rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
+
+ if (rc) {
+ dev_err(hdev->dev, "failed to %s arc: %d\n",
+ (core_command == HL_ENGINE_CORE_HALT) ?
+ "HALT" : "RUN", core_ids[i]);
+ return -1;
+ }
+ }
+ }
+
+ return 0;
}
static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
@@ -9968,6 +10029,7 @@ static const struct hl_asic_funcs gaudi2_funcs = {
.mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
.access_dev_mem = hl_access_dev_mem,
.set_dram_bar_base = gaudi2_set_hbm_bar_base,
+ .set_engine_cores = gaudi2_set_engine_cores,
};
void gaudi2_set_asic_funcs(struct hl_device *hdev)
diff --git a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
index d0e2c68a639f..bfda4223bdc8 100644
--- a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
+++ b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
@@ -239,6 +239,7 @@
#define SFT_IF_RTR_OFFSET (mmSFT0_HBW_RTR_IF1_RTR_H3_BASE - mmSFT0_HBW_RTR_IF0_RTR_H3_BASE)
#define ARC_HALT_REQ_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_REQ - mmARC_FARM_ARC0_AUX_BASE)
+#define ARC_HALT_ACK_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_ACK - mmARC_FARM_ARC0_AUX_BASE)
#define ARC_REGION_CFG_OFFSET(region) \
(mmARC_FARM_ARC0_AUX_ARC_REGION_CFG_0 + (region * 4) - mmARC_FARM_ARC0_AUX_BASE)