summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2012-04-03 16:23:41 +0100
committerDave Airlie <airlied@redhat.com>2012-04-12 09:42:54 +0100
commit6a7068b4ef17dfb9de3191321f1adc91fa1659ca (patch)
tree555cc6a57df84d95a24faafd796868101ee1943a
parent2099810f903caa1920f3ef6014fb7f36e4786490 (diff)
downloadlinux-6a7068b4ef17dfb9de3191321f1adc91fa1659ca.tar.bz2
drm/radeon/kms: attempt to avoid copying data twice on coherent cards. (v3)
On coherent systems (not-AGP) the IB should be in cached memory so should be just as fast, so we can avoid copying to temporary pages and just use it directly. provides minor speedups on rv530: gears ~1820->1860, ipers: 29.9->30.6, but always good to use less CPU if we can. v3: cleanup unneeded bits. Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
-rw-r--r--drivers/gpu/drm/radeon/radeon_cs.c37
1 files changed, 24 insertions, 13 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 5cac83278338..e7b0b5d51bc3 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -278,11 +278,16 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
p->chunks[p->chunk_ib_idx].length_dw);
return -EINVAL;
}
- p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
- p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL ||
- p->chunks[p->chunk_ib_idx].kpage[1] == NULL)
- return -ENOMEM;
+ if ((p->rdev->flags & RADEON_IS_AGP)) {
+ p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL ||
+ p->chunks[p->chunk_ib_idx].kpage[1] == NULL) {
+ kfree(p->chunks[i].kpage[0]);
+ kfree(p->chunks[i].kpage[1]);
+ return -ENOMEM;
+ }
+ }
p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1;
p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1;
p->chunks[p->chunk_ib_idx].last_copied_page = -1;
@@ -323,8 +328,10 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
kfree(parser->relocs_ptr);
for (i = 0; i < parser->nchunks; i++) {
kfree(parser->chunks[i].kdata);
- kfree(parser->chunks[i].kpage[0]);
- kfree(parser->chunks[i].kpage[1]);
+ if ((parser->rdev->flags & RADEON_IS_AGP)) {
+ kfree(parser->chunks[i].kpage[0]);
+ kfree(parser->chunks[i].kpage[1]);
+ }
}
kfree(parser->chunks);
kfree(parser->chunks_array);
@@ -573,6 +580,7 @@ int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx)
struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
int i;
int size = PAGE_SIZE;
+ bool copy1 = (p->rdev->flags & RADEON_IS_AGP) ? false : true;
for (i = ibc->last_copied_page + 1; i < pg_idx; i++) {
if (DRM_COPY_FROM_USER(p->ib->ptr + (i * (PAGE_SIZE/4)),
@@ -583,14 +591,16 @@ int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx)
}
}
- new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
-
if (pg_idx == ibc->last_page_index) {
size = (ibc->length_dw * 4) % PAGE_SIZE;
- if (size == 0)
- size = PAGE_SIZE;
+ if (size == 0)
+ size = PAGE_SIZE;
}
+ new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
+ if (copy1)
+ ibc->kpage[new_page] = p->ib->ptr + (pg_idx * (PAGE_SIZE / 4));
+
if (DRM_COPY_FROM_USER(ibc->kpage[new_page],
ibc->user_ptr + (pg_idx * PAGE_SIZE),
size)) {
@@ -598,8 +608,9 @@ int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx)
return 0;
}
- /* copy to IB here */
- memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size);
+ /* copy to IB for non single case */
+ if (!copy1)
+ memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size);
ibc->last_copied_page = pg_idx;
ibc->kpage_idx[new_page] = pg_idx;