summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/radeon/r600_cs.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/radeon/r600_cs.c')
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c731
1 files changed, 427 insertions, 304 deletions
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 211c40252fe0..01a3ec83f284 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -31,12 +31,7 @@
#include "r600d.h"
#include "r600_reg_safe.h"
-static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
- struct radeon_cs_reloc **cs_reloc);
-static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
- struct radeon_cs_reloc **cs_reloc);
-typedef int (*next_reloc_t)(struct radeon_cs_parser*, struct radeon_cs_reloc**);
-static next_reloc_t r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_mm;
+static int r600_nomm;
extern void r600_cs_legacy_get_tiling_conf(struct drm_device *dev, u32 *npipes, u32 *nbanks, u32 *group_size);
@@ -657,87 +652,30 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
/* nby is npipes htiles aligned == npipes * 8 pixel aligned */
nby = round_up(nby, track->npipes * 8);
} else {
- /* htile widht & nby (8 or 4) make 2 bits number */
- tmp = track->htile_surface & 3;
+ /* always assume 8x8 htile */
/* align is htile align * 8, htile align vary according to
* number of pipe and tile width and nby
*/
switch (track->npipes) {
case 8:
- switch (tmp) {
- case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
- nbx = round_up(nbx, 64 * 8);
- nby = round_up(nby, 64 * 8);
- break;
- case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
- case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 64 * 8);
- nby = round_up(nby, 32 * 8);
- break;
- case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 32 * 8);
- break;
- default:
- return -EINVAL;
- }
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+ nbx = round_up(nbx, 64 * 8);
+ nby = round_up(nby, 64 * 8);
break;
case 4:
- switch (tmp) {
- case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
- nbx = round_up(nbx, 64 * 8);
- nby = round_up(nby, 32 * 8);
- break;
- case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
- case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 32 * 8);
- break;
- case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 16 * 8);
- break;
- default:
- return -EINVAL;
- }
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+ nbx = round_up(nbx, 64 * 8);
+ nby = round_up(nby, 32 * 8);
break;
case 2:
- switch (tmp) {
- case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 32 * 8);
- break;
- case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
- case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 16 * 8);
- break;
- case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 16 * 8);
- nby = round_up(nby, 16 * 8);
- break;
- default:
- return -EINVAL;
- }
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+ nbx = round_up(nbx, 32 * 8);
+ nby = round_up(nby, 32 * 8);
break;
case 1:
- switch (tmp) {
- case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 16 * 8);
- break;
- case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
- case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 16 * 8);
- nby = round_up(nby, 16 * 8);
- break;
- case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 16 * 8);
- nby = round_up(nby, 8 * 8);
- break;
- default:
- return -EINVAL;
- }
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+ nbx = round_up(nbx, 32 * 8);
+ nby = round_up(nby, 16 * 8);
break;
default:
dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
@@ -746,9 +684,10 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
}
}
/* compute number of htile */
- nbx = G_028D24_HTILE_WIDTH(track->htile_surface) ? nbx / 8 : nbx / 4;
- nby = G_028D24_HTILE_HEIGHT(track->htile_surface) ? nby / 8 : nby / 4;
- size = nbx * nby * 4;
+ nbx = nbx >> 3;
+ nby = nby >> 3;
+ /* size must be aligned on npipes * 2K boundary */
+ size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
size += track->htile_offset;
if (size > radeon_bo_size(track->htile_bo)) {
@@ -840,170 +779,29 @@ static int r600_cs_track_check(struct radeon_cs_parser *p)
}
/**
- * r600_cs_packet_parse() - parse cp packet and point ib index to next packet
- * @parser: parser structure holding parsing context.
- * @pkt: where to store packet informations
- *
- * Assume that chunk_ib_index is properly set. Will return -EINVAL
- * if packet is bigger than remaining ib size. or if packets is unknown.
- **/
-static int r600_cs_packet_parse(struct radeon_cs_parser *p,
- struct radeon_cs_packet *pkt,
- unsigned idx)
-{
- struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
- uint32_t header;
-
- if (idx >= ib_chunk->length_dw) {
- DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
- idx, ib_chunk->length_dw);
- return -EINVAL;
- }
- header = radeon_get_ib_value(p, idx);
- pkt->idx = idx;
- pkt->type = CP_PACKET_GET_TYPE(header);
- pkt->count = CP_PACKET_GET_COUNT(header);
- pkt->one_reg_wr = 0;
- switch (pkt->type) {
- case PACKET_TYPE0:
- pkt->reg = CP_PACKET0_GET_REG(header);
- break;
- case PACKET_TYPE3:
- pkt->opcode = CP_PACKET3_GET_OPCODE(header);
- break;
- case PACKET_TYPE2:
- pkt->count = -1;
- break;
- default:
- DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
- return -EINVAL;
- }
- if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
- DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
- pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
- return -EINVAL;
- }
- return 0;
-}
-
-/**
- * r600_cs_packet_next_reloc_mm() - parse next packet which should be reloc packet3
+ * r600_cs_packet_parse_vline() - parse userspace VLINE packet
* @parser: parser structure holding parsing context.
- * @data: pointer to relocation data
- * @offset_start: starting offset
- * @offset_mask: offset mask (to align start offset on)
- * @reloc: reloc informations
*
- * Check next packet is relocation packet3, do bo validation and compute
- * GPU offset using the provided start.
- **/
-static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
- struct radeon_cs_reloc **cs_reloc)
-{
- struct radeon_cs_chunk *relocs_chunk;
- struct radeon_cs_packet p3reloc;
- unsigned idx;
- int r;
-
- if (p->chunk_relocs_idx == -1) {
- DRM_ERROR("No relocation chunk !\n");
- return -EINVAL;
- }
- *cs_reloc = NULL;
- relocs_chunk = &p->chunks[p->chunk_relocs_idx];
- r = r600_cs_packet_parse(p, &p3reloc, p->idx);
- if (r) {
- return r;
- }
- p->idx += p3reloc.count + 2;
- if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
- DRM_ERROR("No packet3 for relocation for packet at %d.\n",
- p3reloc.idx);
- return -EINVAL;
- }
- idx = radeon_get_ib_value(p, p3reloc.idx + 1);
- if (idx >= relocs_chunk->length_dw) {
- DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
- idx, relocs_chunk->length_dw);
- return -EINVAL;
- }
- /* FIXME: we assume reloc size is 4 dwords */
- *cs_reloc = p->relocs_ptr[(idx / 4)];
- return 0;
-}
-
-/**
- * r600_cs_packet_next_reloc_nomm() - parse next packet which should be reloc packet3
- * @parser: parser structure holding parsing context.
- * @data: pointer to relocation data
- * @offset_start: starting offset
- * @offset_mask: offset mask (to align start offset on)
- * @reloc: reloc informations
- *
- * Check next packet is relocation packet3, do bo validation and compute
- * GPU offset using the provided start.
- **/
-static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
- struct radeon_cs_reloc **cs_reloc)
-{
- struct radeon_cs_chunk *relocs_chunk;
- struct radeon_cs_packet p3reloc;
- unsigned idx;
- int r;
-
- if (p->chunk_relocs_idx == -1) {
- DRM_ERROR("No relocation chunk !\n");
- return -EINVAL;
- }
- *cs_reloc = NULL;
- relocs_chunk = &p->chunks[p->chunk_relocs_idx];
- r = r600_cs_packet_parse(p, &p3reloc, p->idx);
- if (r) {
- return r;
- }
- p->idx += p3reloc.count + 2;
- if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
- DRM_ERROR("No packet3 for relocation for packet at %d.\n",
- p3reloc.idx);
- return -EINVAL;
- }
- idx = radeon_get_ib_value(p, p3reloc.idx + 1);
- if (idx >= relocs_chunk->length_dw) {
- DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
- idx, relocs_chunk->length_dw);
- return -EINVAL;
- }
- *cs_reloc = p->relocs;
- (*cs_reloc)->lobj.gpu_offset = (u64)relocs_chunk->kdata[idx + 3] << 32;
- (*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0];
- return 0;
-}
-
-/**
- * r600_cs_packet_next_is_pkt3_nop() - test if next packet is packet3 nop for reloc
- * @parser: parser structure holding parsing context.
- *
- * Check next packet is relocation packet3, do bo validation and compute
- * GPU offset using the provided start.
- **/
-static int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
+ * This is an R600-specific function for parsing VLINE packets.
+ * Real work is done by r600_cs_common_vline_parse function.
+ * Here we just set up ASIC-specific register table and call
+ * the common implementation function.
+ */
+static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p)
{
- struct radeon_cs_packet p3reloc;
- int r;
+ static uint32_t vline_start_end[2] = {AVIVO_D1MODE_VLINE_START_END,
+ AVIVO_D2MODE_VLINE_START_END};
+ static uint32_t vline_status[2] = {AVIVO_D1MODE_VLINE_STATUS,
+ AVIVO_D2MODE_VLINE_STATUS};
- r = r600_cs_packet_parse(p, &p3reloc, p->idx);
- if (r) {
- return 0;
- }
- if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
- return 0;
- }
- return 1;
+ return r600_cs_common_vline_parse(p, vline_start_end, vline_status);
}
/**
- * r600_cs_packet_next_vline() - parse userspace VLINE packet
+ * r600_cs_common_vline_parse() - common vline parser
* @parser: parser structure holding parsing context.
+ * @vline_start_end: table of vline_start_end registers
+ * @vline_status: table of vline_status registers
*
* Userspace sends a special sequence for VLINE waits.
* PACKET0 - VLINE_START_END + value
@@ -1013,9 +811,16 @@ static int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
* This function parses this and relocates the VLINE START END
* and WAIT_REG_MEM packets to the correct crtc.
* It also detects a switched off crtc and nulls out the
- * wait in that case.
+ * wait in that case. This function is common for all ASICs that
+ * are R600 and newer. The parsing algorithm is the same, and only
+ * differs in which registers are used.
+ *
+ * Caller is the ASIC-specific function which passes the parser
+ * context and ASIC-specific register table
*/
-static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p)
+int r600_cs_common_vline_parse(struct radeon_cs_parser *p,
+ uint32_t *vline_start_end,
+ uint32_t *vline_status)
{
struct drm_mode_object *obj;
struct drm_crtc *crtc;
@@ -1029,12 +834,12 @@ static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p)
ib = p->ib.ptr;
/* parse the WAIT_REG_MEM */
- r = r600_cs_packet_parse(p, &wait_reg_mem, p->idx);
+ r = radeon_cs_packet_parse(p, &wait_reg_mem, p->idx);
if (r)
return r;
/* check its a WAIT_REG_MEM */
- if (wait_reg_mem.type != PACKET_TYPE3 ||
+ if (wait_reg_mem.type != RADEON_PACKET_TYPE3 ||
wait_reg_mem.opcode != PACKET3_WAIT_REG_MEM) {
DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n");
return -EINVAL;
@@ -1043,7 +848,12 @@ static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p)
wait_reg_mem_info = radeon_get_ib_value(p, wait_reg_mem.idx + 1);
/* bit 4 is reg (0) or mem (1) */
if (wait_reg_mem_info & 0x10) {
- DRM_ERROR("vline WAIT_REG_MEM waiting on MEM rather than REG\n");
+ DRM_ERROR("vline WAIT_REG_MEM waiting on MEM instead of REG\n");
+ return -EINVAL;
+ }
+ /* bit 8 is me (0) or pfp (1) */
+ if (wait_reg_mem_info & 0x100) {
+ DRM_ERROR("vline WAIT_REG_MEM waiting on PFP instead of ME\n");
return -EINVAL;
}
/* waiting for value to be equal */
@@ -1051,18 +861,18 @@ static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p)
DRM_ERROR("vline WAIT_REG_MEM function not equal\n");
return -EINVAL;
}
- if ((radeon_get_ib_value(p, wait_reg_mem.idx + 2) << 2) != AVIVO_D1MODE_VLINE_STATUS) {
+ if ((radeon_get_ib_value(p, wait_reg_mem.idx + 2) << 2) != vline_status[0]) {
DRM_ERROR("vline WAIT_REG_MEM bad reg\n");
return -EINVAL;
}
- if (radeon_get_ib_value(p, wait_reg_mem.idx + 5) != AVIVO_D1MODE_VLINE_STAT) {
+ if (radeon_get_ib_value(p, wait_reg_mem.idx + 5) != RADEON_VLINE_STAT) {
DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n");
return -EINVAL;
}
/* jump over the NOP */
- r = r600_cs_packet_parse(p, &p3reloc, p->idx + wait_reg_mem.count + 2);
+ r = radeon_cs_packet_parse(p, &p3reloc, p->idx + wait_reg_mem.count + 2);
if (r)
return r;
@@ -1072,7 +882,7 @@ static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p)
header = radeon_get_ib_value(p, h_idx);
crtc_id = radeon_get_ib_value(p, h_idx + 2 + 7 + 1);
- reg = CP_PACKET0_GET_REG(header);
+ reg = R600_CP_PACKET0_GET_REG(header);
obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
if (!obj) {
@@ -1084,7 +894,7 @@ static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p)
crtc_id = radeon_crtc->crtc_id;
if (!crtc->enabled) {
- /* if the CRTC isn't enabled - we need to nop out the WAIT_REG_MEM */
+ /* CRTC isn't enabled - we need to nop out the WAIT_REG_MEM */
ib[h_idx + 2] = PACKET2(0);
ib[h_idx + 3] = PACKET2(0);
ib[h_idx + 4] = PACKET2(0);
@@ -1092,20 +902,15 @@ static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p)
ib[h_idx + 6] = PACKET2(0);
ib[h_idx + 7] = PACKET2(0);
ib[h_idx + 8] = PACKET2(0);
- } else if (crtc_id == 1) {
- switch (reg) {
- case AVIVO_D1MODE_VLINE_START_END:
- header &= ~R600_CP_PACKET0_REG_MASK;
- header |= AVIVO_D2MODE_VLINE_START_END >> 2;
- break;
- default:
- DRM_ERROR("unknown crtc reloc\n");
- return -EINVAL;
- }
+ } else if (reg == vline_start_end[0]) {
+ header &= ~R600_CP_PACKET0_REG_MASK;
+ header |= vline_start_end[crtc_id] >> 2;
ib[h_idx] = header;
- ib[h_idx + 4] = AVIVO_D2MODE_VLINE_STATUS >> 2;
+ ib[h_idx + 4] = vline_status[crtc_id] >> 2;
+ } else {
+ DRM_ERROR("unknown crtc reloc\n");
+ return -EINVAL;
}
-
return 0;
}
@@ -1211,8 +1016,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
break;
case R_028010_DB_DEPTH_INFO:
if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) &&
- r600_cs_packet_next_is_pkt3_nop(p)) {
- r = r600_cs_packet_next_reloc(p, &reloc);
+ radeon_cs_packet_next_is_pkt3_nop(p)) {
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
dev_warn(p->dev, "bad SET_CONTEXT_REG "
"0x%04X\n", reg);
@@ -1254,7 +1059,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
case VGT_STRMOUT_BUFFER_BASE_1:
case VGT_STRMOUT_BUFFER_BASE_2:
case VGT_STRMOUT_BUFFER_BASE_3:
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
dev_warn(p->dev, "bad SET_CONTEXT_REG "
"0x%04X\n", reg);
@@ -1277,7 +1082,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
track->streamout_dirty = true;
break;
case CP_COHER_BASE:
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
"0x%04X\n", reg);
@@ -1312,8 +1117,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
case R_0280B8_CB_COLOR6_INFO:
case R_0280BC_CB_COLOR7_INFO:
if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) &&
- r600_cs_packet_next_is_pkt3_nop(p)) {
- r = r600_cs_packet_next_reloc(p, &reloc);
+ radeon_cs_packet_next_is_pkt3_nop(p)) {
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
return -EINVAL;
@@ -1376,7 +1181,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
case R_0280F8_CB_COLOR6_FRAG:
case R_0280FC_CB_COLOR7_FRAG:
tmp = (reg - R_0280E0_CB_COLOR0_FRAG) / 4;
- if (!r600_cs_packet_next_is_pkt3_nop(p)) {
+ if (!radeon_cs_packet_next_is_pkt3_nop(p)) {
if (!track->cb_color_base_last[tmp]) {
dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg);
return -EINVAL;
@@ -1385,7 +1190,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
track->cb_color_frag_offset[tmp] = track->cb_color_bo_offset[tmp];
ib[idx] = track->cb_color_base_last[tmp];
} else {
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
return -EINVAL;
@@ -1407,7 +1212,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
case R_0280D8_CB_COLOR6_TILE:
case R_0280DC_CB_COLOR7_TILE:
tmp = (reg - R_0280C0_CB_COLOR0_TILE) / 4;
- if (!r600_cs_packet_next_is_pkt3_nop(p)) {
+ if (!radeon_cs_packet_next_is_pkt3_nop(p)) {
if (!track->cb_color_base_last[tmp]) {
dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg);
return -EINVAL;
@@ -1416,7 +1221,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
track->cb_color_tile_offset[tmp] = track->cb_color_bo_offset[tmp];
ib[idx] = track->cb_color_base_last[tmp];
} else {
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
return -EINVAL;
@@ -1451,7 +1256,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
case CB_COLOR5_BASE:
case CB_COLOR6_BASE:
case CB_COLOR7_BASE:
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
dev_warn(p->dev, "bad SET_CONTEXT_REG "
"0x%04X\n", reg);
@@ -1466,7 +1271,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
track->cb_dirty = true;
break;
case DB_DEPTH_BASE:
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
dev_warn(p->dev, "bad SET_CONTEXT_REG "
"0x%04X\n", reg);
@@ -1479,7 +1284,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
track->db_dirty = true;
break;
case DB_HTILE_DATA_BASE:
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
dev_warn(p->dev, "bad SET_CONTEXT_REG "
"0x%04X\n", reg);
@@ -1492,6 +1297,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
break;
case DB_HTILE_SURFACE:
track->htile_surface = radeon_get_ib_value(p, idx);
+ /* force 8x8 htile width and height */
+ ib[idx] |= 3;
track->db_dirty = true;
break;
case SQ_PGM_START_FS:
@@ -1547,7 +1354,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
case SQ_ALU_CONST_CACHE_VS_13:
case SQ_ALU_CONST_CACHE_VS_14:
case SQ_ALU_CONST_CACHE_VS_15:
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
dev_warn(p->dev, "bad SET_CONTEXT_REG "
"0x%04X\n", reg);
@@ -1556,7 +1363,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
break;
case SX_MEMORY_EXPORT_BASE:
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
dev_warn(p->dev, "bad SET_CONFIG_REG "
"0x%04X\n", reg);
@@ -1842,7 +1649,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
return -EINVAL;
}
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
DRM_ERROR("bad SET PREDICATION\n");
return -EINVAL;
@@ -1883,7 +1690,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
DRM_ERROR("bad DRAW_INDEX\n");
return -EINVAL;
}
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
DRM_ERROR("bad DRAW_INDEX\n");
return -EINVAL;
@@ -1935,7 +1742,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
if (idx_value & 0x10) {
uint64_t offset;
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
DRM_ERROR("bad WAIT_REG_MEM\n");
return -EINVAL;
@@ -1947,8 +1754,83 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffff0);
ib[idx+2] = upper_32_bits(offset) & 0xff;
+ } else if (idx_value & 0x100) {
+ DRM_ERROR("cannot use PFP on REG wait\n");
+ return -EINVAL;
}
break;
+ case PACKET3_CP_DMA:
+ {
+ u32 command, size;
+ u64 offset, tmp;
+ if (pkt->count != 4) {
+ DRM_ERROR("bad CP DMA\n");
+ return -EINVAL;
+ }
+ command = radeon_get_ib_value(p, idx+4);
+ size = command & 0x1fffff;
+ if (command & PACKET3_CP_DMA_CMD_SAS) {
+ /* src address space is register */
+ DRM_ERROR("CP DMA SAS not supported\n");
+ return -EINVAL;
+ } else {
+ if (command & PACKET3_CP_DMA_CMD_SAIC) {
+ DRM_ERROR("CP DMA SAIC only supported for registers\n");
+ return -EINVAL;
+ }
+ /* src address space is memory */
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
+ if (r) {
+ DRM_ERROR("bad CP DMA SRC\n");
+ return -EINVAL;
+ }
+
+ tmp = radeon_get_ib_value(p, idx) +
+ ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+
+ offset = reloc->lobj.gpu_offset + tmp;
+
+ if ((tmp + size) > radeon_bo_size(reloc->robj)) {
+ dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
+ tmp + size, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+
+ ib[idx] = offset;
+ ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
+ }
+ if (command & PACKET3_CP_DMA_CMD_DAS) {
+ /* dst address space is register */
+ DRM_ERROR("CP DMA DAS not supported\n");
+ return -EINVAL;
+ } else {
+ /* dst address space is memory */
+ if (command & PACKET3_CP_DMA_CMD_DAIC) {
+ DRM_ERROR("CP DMA DAIC only supported for registers\n");
+ return -EINVAL;
+ }
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
+ if (r) {
+ DRM_ERROR("bad CP DMA DST\n");
+ return -EINVAL;
+ }
+
+ tmp = radeon_get_ib_value(p, idx+2) +
+ ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
+
+ offset = reloc->lobj.gpu_offset + tmp;
+
+ if ((tmp + size) > radeon_bo_size(reloc->robj)) {
+ dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
+ tmp + size, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+
+ ib[idx+2] = offset;
+ ib[idx+3] = upper_32_bits(offset) & 0xff;
+ }
+ break;
+ }
case PACKET3_SURFACE_SYNC:
if (pkt->count != 3) {
DRM_ERROR("bad SURFACE_SYNC\n");
@@ -1957,7 +1839,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
/* 0xffffffff/0x0 is flush all cache flag */
if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
radeon_get_ib_value(p, idx + 2) != 0) {
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
DRM_ERROR("bad SURFACE_SYNC\n");
return -EINVAL;
@@ -1973,7 +1855,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
if (pkt->count) {
uint64_t offset;
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
DRM_ERROR("bad EVENT_WRITE\n");
return -EINVAL;
@@ -1994,7 +1876,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
DRM_ERROR("bad EVENT_WRITE_EOP\n");
return -EINVAL;
}
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
DRM_ERROR("bad EVENT_WRITE\n");
return -EINVAL;
@@ -2060,7 +1942,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
switch (G__SQ_VTX_CONSTANT_TYPE(radeon_get_ib_value(p, idx+(i*7)+6+1))) {
case SQ_TEX_VTX_VALID_TEXTURE:
/* tex base */
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
DRM_ERROR("bad SET_RESOURCE\n");
return -EINVAL;
@@ -2074,7 +1956,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
}
texture = reloc->robj;
/* tex mip base */
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
DRM_ERROR("bad SET_RESOURCE\n");
return -EINVAL;
@@ -2095,7 +1977,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
{
uint64_t offset64;
/* vtx base */
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
DRM_ERROR("bad SET_RESOURCE\n");
return -EINVAL;
@@ -2196,7 +2078,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
{
u64 offset;
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
DRM_ERROR("bad STRMOUT_BASE_UPDATE reloc\n");
return -EINVAL;
@@ -2240,7 +2122,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
/* Updating memory at DST_ADDRESS. */
if (idx_value & 0x1) {
u64 offset;
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
return -EINVAL;
@@ -2259,7 +2141,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
/* Reading data from SRC_ADDRESS. */
if (((idx_value >> 1) & 0x3) == 2) {
u64 offset;
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
return -EINVAL;
@@ -2276,6 +2158,35 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
ib[idx+4] = upper_32_bits(offset) & 0xff;
}
break;
+ case PACKET3_MEM_WRITE:
+ {
+ u64 offset;
+
+ if (pkt->count != 3) {
+ DRM_ERROR("bad MEM_WRITE (invalid count)\n");
+ return -EINVAL;
+ }
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
+ if (r) {
+ DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
+ return -EINVAL;
+ }
+ offset = radeon_get_ib_value(p, idx+0);
+ offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
+ if (offset & 0x7) {
+ DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
+ return -EINVAL;
+ }
+ if ((offset + 8) > radeon_bo_size(reloc->robj)) {
+ DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
+ offset + 8, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+ offset += reloc->lobj.gpu_offset;
+ ib[idx+0] = offset;
+ ib[idx+1] = upper_32_bits(offset) & 0xff;
+ break;
+ }
case PACKET3_COPY_DW:
if (pkt->count != 4) {
DRM_ERROR("bad COPY_DW (invalid count)\n");
@@ -2284,7 +2195,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
if (idx_value & 0x1) {
u64 offset;
/* SRC is memory. */
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
DRM_ERROR("bad COPY_DW (missing src reloc)\n");
return -EINVAL;
@@ -2308,7 +2219,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
if (idx_value & 0x2) {
u64 offset;
/* DST is memory. */
- r = r600_cs_packet_next_reloc(p, &reloc);
+ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
if (r) {
DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
return -EINVAL;
@@ -2363,7 +2274,7 @@ int r600_cs_parse(struct radeon_cs_parser *p)
p->track = track;
}
do {
- r = r600_cs_packet_parse(p, &pkt, p->idx);
+ r = radeon_cs_packet_parse(p, &pkt, p->idx);
if (r) {
kfree(p->track);
p->track = NULL;
@@ -2371,12 +2282,12 @@ int r600_cs_parse(struct radeon_cs_parser *p)
}
p->idx += pkt.count + 2;
switch (pkt.type) {
- case PACKET_TYPE0:
+ case RADEON_PACKET_TYPE0:
r = r600_cs_parse_packet0(p, &pkt);
break;
- case PACKET_TYPE2:
+ case RADEON_PACKET_TYPE2:
break;
- case PACKET_TYPE3:
+ case RADEON_PACKET_TYPE3:
r = r600_packet3_check(p, &pkt);
break;
default:
@@ -2402,17 +2313,7 @@ int r600_cs_parse(struct radeon_cs_parser *p)
return 0;
}
-static int r600_cs_parser_relocs_legacy(struct radeon_cs_parser *p)
-{
- if (p->chunk_relocs_idx == -1) {
- return 0;
- }
- p->relocs = kzalloc(sizeof(struct radeon_cs_reloc), GFP_KERNEL);
- if (p->relocs == NULL) {
- return -ENOMEM;
- }
- return 0;
-}
+#ifdef CONFIG_DRM_RADEON_UMS
/**
* cs_parser_fini() - clean parser states
@@ -2429,13 +2330,27 @@ static void r600_cs_parser_fini(struct radeon_cs_parser *parser, int error)
kfree(parser->relocs);
for (i = 0; i < parser->nchunks; i++) {
kfree(parser->chunks[i].kdata);
- kfree(parser->chunks[i].kpage[0]);
- kfree(parser->chunks[i].kpage[1]);
+ if (parser->rdev && (parser->rdev->flags & RADEON_IS_AGP)) {
+ kfree(parser->chunks[i].kpage[0]);
+ kfree(parser->chunks[i].kpage[1]);
+ }
}
kfree(parser->chunks);
kfree(parser->chunks_array);
}
+static int r600_cs_parser_relocs_legacy(struct radeon_cs_parser *p)
+{
+ if (p->chunk_relocs_idx == -1) {
+ return 0;
+ }
+ p->relocs = kzalloc(sizeof(struct radeon_cs_reloc), GFP_KERNEL);
+ if (p->relocs == NULL) {
+ return -ENOMEM;
+ }
+ return 0;
+}
+
int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
unsigned family, u32 *ib, int *l)
{
@@ -2494,5 +2409,213 @@ int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
void r600_cs_legacy_init(void)
{
- r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
+ r600_nomm = 1;
+}
+
+#endif
+
+/*
+ * DMA
+ */
+/**
+ * r600_dma_cs_next_reloc() - parse next reloc
+ * @p: parser structure holding parsing context.
+ * @cs_reloc: reloc informations
+ *
+ * Return the next reloc, do bo validation and compute
+ * GPU offset using the provided start.
+ **/
+int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
+ struct radeon_cs_reloc **cs_reloc)
+{
+ struct radeon_cs_chunk *relocs_chunk;
+ unsigned idx;
+
+ *cs_reloc = NULL;
+ if (p->chunk_relocs_idx == -1) {
+ DRM_ERROR("No relocation chunk !\n");
+ return -EINVAL;
+ }
+ relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+ idx = p->dma_reloc_idx;
+ if (idx >= p->nrelocs) {
+ DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+ idx, p->nrelocs);
+ return -EINVAL;
+ }
+ *cs_reloc = p->relocs_ptr[idx];
+ p->dma_reloc_idx++;
+ return 0;
+}
+
+#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
+#define GET_DMA_COUNT(h) ((h) & 0x0000ffff)
+#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
+
+/**
+ * r600_dma_cs_parse() - parse the DMA IB
+ * @p: parser structure holding parsing context.
+ *
+ * Parses the DMA IB from the CS ioctl and updates
+ * the GPU addresses based on the reloc information and
+ * checks for errors. (R6xx-R7xx)
+ * Returns 0 for success and an error on failure.
+ **/
+int r600_dma_cs_parse(struct radeon_cs_parser *p)
+{
+ struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+ struct radeon_cs_reloc *src_reloc, *dst_reloc;
+ u32 header, cmd, count, tiled;
+ volatile u32 *ib = p->ib.ptr;
+ u32 idx, idx_value;
+ u64 src_offset, dst_offset;
+ int r;
+
+ do {
+ if (p->idx >= ib_chunk->length_dw) {
+ DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
+ p->idx, ib_chunk->length_dw);
+ return -EINVAL;
+ }
+ idx = p->idx;
+ header = radeon_get_ib_value(p, idx);
+ cmd = GET_DMA_CMD(header);
+ count = GET_DMA_COUNT(header);
+ tiled = GET_DMA_T(header);
+
+ switch (cmd) {
+ case DMA_PACKET_WRITE:
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_WRITE\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ dst_offset = radeon_get_ib_value(p, idx+1);
+ dst_offset <<= 8;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ p->idx += count + 5;
+ } else {
+ dst_offset = radeon_get_ib_value(p, idx+1);
+ dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += count + 3;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ break;
+ case DMA_PACKET_COPY:
+ r = r600_dma_cs_next_reloc(p, &src_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ idx_value = radeon_get_ib_value(p, idx + 2);
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ src_offset = radeon_get_ib_value(p, idx+1);
+ src_offset <<= 8;
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+ dst_offset = radeon_get_ib_value(p, idx+5);
+ dst_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
+ ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ } else {
+ /* linear src, tiled dst */
+ src_offset = radeon_get_ib_value(p, idx+5);
+ src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
+ ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+ dst_offset = radeon_get_ib_value(p, idx+1);
+ dst_offset <<= 8;
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ }
+ p->idx += 7;
+ } else {
+ if (p->family >= CHIP_RV770) {
+ src_offset = radeon_get_ib_value(p, idx+2);
+ src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
+ dst_offset = radeon_get_ib_value(p, idx+1);
+ dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 5;
+ } else {
+ src_offset = radeon_get_ib_value(p, idx+2);
+ src_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
+ dst_offset = radeon_get_ib_value(p, idx+1);
+ dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff0000)) << 16;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff) << 16;
+ p->idx += 4;
+ }
+ }
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ break;
+ case DMA_PACKET_CONSTANT_FILL:
+ if (p->family < CHIP_RV770) {
+ DRM_ERROR("Constant Fill is 7xx only !\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_WRITE\n");
+ return -EINVAL;
+ }
+ dst_offset = radeon_get_ib_value(p, idx+1);
+ dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
+ p->idx += 4;
+ break;
+ case DMA_PACKET_NOP:
+ p->idx += 1;
+ break;
+ default:
+ DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
+ return -EINVAL;
+ }
+ } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+#if 0
+ for (r = 0; r < p->ib->length_dw; r++) {
+ printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
+ mdelay(1);
+ }
+#endif
+ return 0;
}