diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-08-01 12:47:04 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-08-01 12:47:04 -0700 |
commit | 3f6d9e0896b325c95e5155ee8e7bcb47443ad413 (patch) | |
tree | 1d81c52daca19cd4e1f08b37324eebf3a1e4e75d | |
parent | 3270c8eacc81f000e2f5f03e36f80d6617e2f5c4 (diff) | |
parent | 8c8fe97b2b8a216523e2faf1ccca66ddab634e3e (diff) | |
download | linux-3f6d9e0896b325c95e5155ee8e7bcb47443ad413.tar.bz2 |
Merge tag 'dmaengine-fix-4.2-rc5' of git://git.infradead.org/users/vkoul/slave-dma
Pull dmaengine fixes from Vinod Koul:
"We had a regression due to reuse of descriptor so we have reverted
that.
The rest are driver fixes:
- at_hdmac and at_xdmac for residue, trannfer width, and channel config
- pl330 final fix for dma fails and overflow issue
- xgene resouce map fix
- mv_xor big endian op fix"
* tag 'dmaengine-fix-4.2-rc5' of git://git.infradead.org/users/vkoul/slave-dma:
Revert "dmaengine: virt-dma: don't always free descriptor upon completion"
dmaengine: mv_xor: fix big endian operation in register mode
dmaengine: xgene-dma: Fix the resource map to handle overlapping
dmaengine: at_xdmac: fix transfer data width in at_xdmac_prep_slave_sg()
dmaengine: at_hdmac: fix residue computation
dmaengine: at_xdmac: fix bug about channel configuration
dmaengine: pl330: Really fix choppy sound because of wrong residue calculation
dmaengine: pl330: Fix overflow when reporting residue in memcpy
-rw-r--r-- | Documentation/devicetree/bindings/dma/apm-xgene-dma.txt | 2 | ||||
-rw-r--r-- | arch/arm64/boot/dts/apm/apm-storm.dtsi | 2 | ||||
-rw-r--r-- | drivers/dma/at_hdmac.c | 132 | ||||
-rw-r--r-- | drivers/dma/at_hdmac_regs.h | 3 | ||||
-rw-r--r-- | drivers/dma/at_xdmac.c | 26 | ||||
-rw-r--r-- | drivers/dma/mv_xor.c | 9 | ||||
-rw-r--r-- | drivers/dma/pl330.c | 3 | ||||
-rw-r--r-- | drivers/dma/virt-dma.c | 19 | ||||
-rw-r--r-- | drivers/dma/virt-dma.h | 13 | ||||
-rw-r--r-- | drivers/dma/xgene-dma.c | 3 |
10 files changed, 121 insertions, 91 deletions
diff --git a/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt b/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt index d3058768b23d..c53e0b08032f 100644 --- a/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt +++ b/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt @@ -35,7 +35,7 @@ Example: device_type = "dma"; reg = <0x0 0x1f270000 0x0 0x10000>, <0x0 0x1f200000 0x0 0x10000>, - <0x0 0x1b008000 0x0 0x2000>, + <0x0 0x1b000000 0x0 0x400000>, <0x0 0x1054a000 0x0 0x100>; interrupts = <0x0 0x82 0x4>, <0x0 0xb8 0x4>, diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index 0689c3fb56e3..58093edeea2e 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -823,7 +823,7 @@ device_type = "dma"; reg = <0x0 0x1f270000 0x0 0x10000>, <0x0 0x1f200000 0x0 0x10000>, - <0x0 0x1b008000 0x0 0x2000>, + <0x0 0x1b000000 0x0 0x400000>, <0x0 0x1054a000 0x0 0x100>; interrupts = <0x0 0x82 0x4>, <0x0 0xb8 0x4>, diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 59892126d175..d3629b7482dd 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -48,6 +48,8 @@ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) +#define ATC_MAX_DSCR_TRIALS 10 + /* * Initial number of descriptors to allocate for each channel. This could * be increased during dma usage. @@ -285,28 +287,19 @@ static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan, * * @current_len: the number of bytes left before reading CTRLA * @ctrla: the value of CTRLA - * @desc: the descriptor containing the transfer width */ -static inline int atc_calc_bytes_left(int current_len, u32 ctrla, - struct at_desc *desc) +static inline int atc_calc_bytes_left(int current_len, u32 ctrla) { - return current_len - ((ctrla & ATC_BTSIZE_MAX) << desc->tx_width); -} + u32 btsize = (ctrla & ATC_BTSIZE_MAX); + u32 src_width = ATC_REG_TO_SRC_WIDTH(ctrla); -/** - * atc_calc_bytes_left_from_reg - calculates the number of bytes left according - * to the current value of CTRLA. - * - * @current_len: the number of bytes left before reading CTRLA - * @atchan: the channel to read CTRLA for - * @desc: the descriptor containing the transfer width - */ -static inline int atc_calc_bytes_left_from_reg(int current_len, - struct at_dma_chan *atchan, struct at_desc *desc) -{ - u32 ctrla = channel_readl(atchan, CTRLA); - - return atc_calc_bytes_left(current_len, ctrla, desc); + /* + * According to the datasheet, when reading the Control A Register + * (ctrla), the Buffer Transfer Size (btsize) bitfield refers to the + * number of transfers completed on the Source Interface. + * So btsize is always a number of source width transfers. + */ + return current_len - (btsize << src_width); } /** @@ -320,7 +313,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) struct at_desc *desc_first = atc_first_active(atchan); struct at_desc *desc; int ret; - u32 ctrla, dscr; + u32 ctrla, dscr, trials; /* * If the cookie doesn't match to the currently running transfer then @@ -346,15 +339,82 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) * the channel's DSCR register and compare it against the value * of the hardware linked list structure of each child * descriptor. + * + * The CTRLA register provides us with the amount of data + * already read from the source for the current child + * descriptor. So we can compute a more accurate residue by also + * removing the number of bytes corresponding to this amount of + * data. + * + * However, the DSCR and CTRLA registers cannot be read both + * atomically. Hence a race condition may occur: the first read + * register may refer to one child descriptor whereas the second + * read may refer to a later child descriptor in the list + * because of the DMA transfer progression inbetween the two + * reads. + * + * One solution could have been to pause the DMA transfer, read + * the DSCR and CTRLA then resume the DMA transfer. Nonetheless, + * this approach presents some drawbacks: + * - If the DMA transfer is paused, RX overruns or TX underruns + * are more likey to occur depending on the system latency. + * Taking the USART driver as an example, it uses a cyclic DMA + * transfer to read data from the Receive Holding Register + * (RHR) to avoid RX overruns since the RHR is not protected + * by any FIFO on most Atmel SoCs. So pausing the DMA transfer + * to compute the residue would break the USART driver design. + * - The atc_pause() function masks interrupts but we'd rather + * avoid to do so for system latency purpose. + * + * Then we'd rather use another solution: the DSCR is read a + * first time, the CTRLA is read in turn, next the DSCR is read + * a second time. If the two consecutive read values of the DSCR + * are the same then we assume both refers to the very same + * child descriptor as well as the CTRLA value read inbetween + * does. For cyclic tranfers, the assumption is that a full loop + * is "not so fast". + * If the two DSCR values are different, we read again the CTRLA + * then the DSCR till two consecutive read values from DSCR are + * equal or till the maxium trials is reach. + * This algorithm is very unlikely not to find a stable value for + * DSCR. */ - ctrla = channel_readl(atchan, CTRLA); - rmb(); /* ensure CTRLA is read before DSCR */ dscr = channel_readl(atchan, DSCR); + rmb(); /* ensure DSCR is read before CTRLA */ + ctrla = channel_readl(atchan, CTRLA); + for (trials = 0; trials < ATC_MAX_DSCR_TRIALS; ++trials) { + u32 new_dscr; + + rmb(); /* ensure DSCR is read after CTRLA */ + new_dscr = channel_readl(atchan, DSCR); + + /* + * If the DSCR register value has not changed inside the + * DMA controller since the previous read, we assume + * that both the dscr and ctrla values refers to the + * very same descriptor. + */ + if (likely(new_dscr == dscr)) + break; + + /* + * DSCR has changed inside the DMA controller, so the + * previouly read value of CTRLA may refer to an already + * processed descriptor hence could be outdated. + * We need to update ctrla to match the current + * descriptor. + */ + dscr = new_dscr; + rmb(); /* ensure DSCR is read before CTRLA */ + ctrla = channel_readl(atchan, CTRLA); + } + if (unlikely(trials >= ATC_MAX_DSCR_TRIALS)) + return -ETIMEDOUT; /* for the first descriptor we can be more accurate */ if (desc_first->lli.dscr == dscr) - return atc_calc_bytes_left(ret, ctrla, desc_first); + return atc_calc_bytes_left(ret, ctrla); ret -= desc_first->len; list_for_each_entry(desc, &desc_first->tx_list, desc_node) { @@ -365,16 +425,14 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) } /* - * For the last descriptor in the chain we can calculate + * For the current descriptor in the chain we can calculate * the remaining bytes using the channel's register. - * Note that the transfer width of the first and last - * descriptor may differ. */ - if (!desc->lli.dscr) - ret = atc_calc_bytes_left_from_reg(ret, atchan, desc); + ret = atc_calc_bytes_left(ret, ctrla); } else { /* single transfer */ - ret = atc_calc_bytes_left_from_reg(ret, atchan, desc_first); + ctrla = channel_readl(atchan, CTRLA); + ret = atc_calc_bytes_left(ret, ctrla); } return ret; @@ -726,7 +784,6 @@ atc_prep_dma_interleaved(struct dma_chan *chan, desc->txd.cookie = -EBUSY; desc->total_len = desc->len = len; - desc->tx_width = dwidth; /* set end-of-link to the last link descriptor of list*/ set_desc_eol(desc); @@ -804,10 +861,6 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, first->txd.cookie = -EBUSY; first->total_len = len; - /* set transfer width for the calculation of the residue */ - first->tx_width = src_width; - prev->tx_width = src_width; - /* set end-of-link to the last link descriptor of list*/ set_desc_eol(desc); @@ -956,10 +1009,6 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, first->txd.cookie = -EBUSY; first->total_len = total_len; - /* set transfer width for the calculation of the residue */ - first->tx_width = reg_width; - prev->tx_width = reg_width; - /* first link descriptor of list is responsible of flags */ first->txd.flags = flags; /* client is in control of this ack */ @@ -1077,12 +1126,6 @@ atc_prep_dma_sg(struct dma_chan *chan, desc->txd.cookie = 0; desc->len = len; - /* - * Although we only need the transfer width for the first and - * the last descriptor, its easier to set it to all descriptors. - */ - desc->tx_width = src_width; - atc_desc_chain(&first, &prev, desc); /* update the lengths and addresses for the next loop cycle */ @@ -1256,7 +1299,6 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, /* First descriptor of the chain embedds additional information */ first->txd.cookie = -EBUSY; first->total_len = buf_len; - first->tx_width = reg_width; return &first->txd; diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index bc8d5ebedd19..7f5a08230f76 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -112,6 +112,7 @@ #define ATC_SRC_WIDTH_BYTE (0x0 << 24) #define ATC_SRC_WIDTH_HALFWORD (0x1 << 24) #define ATC_SRC_WIDTH_WORD (0x2 << 24) +#define ATC_REG_TO_SRC_WIDTH(r) (((r) >> 24) & 0x3) #define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */ #define ATC_DST_WIDTH(x) ((x) << 28) #define ATC_DST_WIDTH_BYTE (0x0 << 28) @@ -182,7 +183,6 @@ struct at_lli { * @txd: support for the async_tx api * @desc_node: node on the channed descriptors list * @len: descriptor byte count - * @tx_width: transfer width * @total_len: total transaction byte count */ struct at_desc { @@ -194,7 +194,6 @@ struct at_desc { struct dma_async_tx_descriptor txd; struct list_head desc_node; size_t len; - u32 tx_width; size_t total_len; /* Interleaved data */ diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index cf1213de7865..40afa2a16cfc 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -359,18 +359,19 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan, * descriptor view 2 since some fields of the configuration register * depend on transfer size and src/dest addresses. */ - if (at_xdmac_chan_is_cyclic(atchan)) { + if (at_xdmac_chan_is_cyclic(atchan)) reg = AT_XDMAC_CNDC_NDVIEW_NDV1; - at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg); - } else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3) { + else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3) reg = AT_XDMAC_CNDC_NDVIEW_NDV3; - } else { - /* - * No need to write AT_XDMAC_CC reg, it will be done when the - * descriptor is fecthed. - */ + else reg = AT_XDMAC_CNDC_NDVIEW_NDV2; - } + /* + * Even if the register will be updated from the configuration in the + * descriptor when using view 2 or higher, the PROT bit won't be set + * properly. This bit can be modified only by using the channel + * configuration register. + */ + at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg); reg |= AT_XDMAC_CNDC_NDDUP | AT_XDMAC_CNDC_NDSUP @@ -681,15 +682,16 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, desc->lld.mbr_sa = mem; desc->lld.mbr_da = atchan->sconfig.dst_addr; } - desc->lld.mbr_cfg = atchan->cfg; - dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg); + dwidth = at_xdmac_get_dwidth(atchan->cfg); fixed_dwidth = IS_ALIGNED(len, 1 << dwidth) - ? at_xdmac_get_dwidth(desc->lld.mbr_cfg) + ? dwidth : AT_XDMAC_CC_DWIDTH_BYTE; desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2 /* next descriptor view */ | AT_XDMAC_MBR_UBC_NDEN /* next descriptor dst parameter update */ | AT_XDMAC_MBR_UBC_NSEN /* next descriptor src parameter update */ | (len >> fixed_dwidth); /* microblock length */ + desc->lld.mbr_cfg = (atchan->cfg & ~AT_XDMAC_CC_DWIDTH_MASK) | + AT_XDMAC_CC_DWIDTH(fixed_dwidth); dev_dbg(chan2dev(chan), "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n", __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc); diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index fbaf1ead2597..f1325f62563e 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -162,10 +162,11 @@ static void mv_chan_set_mode(struct mv_xor_chan *chan, config &= ~0x7; config |= op_mode; - if (IS_ENABLED(__BIG_ENDIAN)) - config |= XOR_DESCRIPTOR_SWAP; - else - config &= ~XOR_DESCRIPTOR_SWAP; +#if defined(__BIG_ENDIAN) + config |= XOR_DESCRIPTOR_SWAP; +#else + config &= ~XOR_DESCRIPTOR_SWAP; +#endif writel_relaxed(config, XOR_CONFIG(chan)); chan->current_type = type; diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index f513f77b1d85..ecab4ea059b4 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2328,7 +2328,7 @@ static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx) desc->txd.callback = last->txd.callback; desc->txd.callback_param = last->txd.callback_param; } - last->last = false; + desc->last = false; dma_cookie_assign(&desc->txd); @@ -2623,6 +2623,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, desc->rqcfg.brst_len = 1; desc->rqcfg.brst_len = get_burst_len(desc, len); + desc->bytes_requested = len; desc->txd.flags = flags; diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c index 7d2c17d8d30f..6f80432a3f0a 100644 --- a/drivers/dma/virt-dma.c +++ b/drivers/dma/virt-dma.c @@ -29,7 +29,7 @@ dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_irqsave(&vc->lock, flags); cookie = dma_cookie_assign(tx); - list_move_tail(&vd->node, &vc->desc_submitted); + list_add_tail(&vd->node, &vc->desc_submitted); spin_unlock_irqrestore(&vc->lock, flags); dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: submitted\n", @@ -83,10 +83,8 @@ static void vchan_complete(unsigned long arg) cb_data = vd->tx.callback_param; list_del(&vd->node); - if (async_tx_test_ack(&vd->tx)) - list_add(&vd->node, &vc->desc_allocated); - else - vc->desc_free(vd); + + vc->desc_free(vd); if (cb) cb(cb_data); @@ -98,13 +96,9 @@ void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head) while (!list_empty(head)) { struct virt_dma_desc *vd = list_first_entry(head, struct virt_dma_desc, node); - if (async_tx_test_ack(&vd->tx)) { - list_move_tail(&vd->node, &vc->desc_allocated); - } else { - dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd); - list_del(&vd->node); - vc->desc_free(vd); - } + list_del(&vd->node); + dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd); + vc->desc_free(vd); } } EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list); @@ -114,7 +108,6 @@ void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev) dma_cookie_init(&vc->chan); spin_lock_init(&vc->lock); - INIT_LIST_HEAD(&vc->desc_allocated); INIT_LIST_HEAD(&vc->desc_submitted); INIT_LIST_HEAD(&vc->desc_issued); INIT_LIST_HEAD(&vc->desc_completed); diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h index 189e75dbcb15..181b95267866 100644 --- a/drivers/dma/virt-dma.h +++ b/drivers/dma/virt-dma.h @@ -29,7 +29,6 @@ struct virt_dma_chan { spinlock_t lock; /* protected by vc.lock */ - struct list_head desc_allocated; struct list_head desc_submitted; struct list_head desc_issued; struct list_head desc_completed; @@ -56,16 +55,11 @@ static inline struct dma_async_tx_descriptor *vchan_tx_prep(struct virt_dma_chan struct virt_dma_desc *vd, unsigned long tx_flags) { extern dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *); - unsigned long flags; dma_async_tx_descriptor_init(&vd->tx, &vc->chan); vd->tx.flags = tx_flags; vd->tx.tx_submit = vchan_tx_submit; - spin_lock_irqsave(&vc->lock, flags); - list_add_tail(&vd->node, &vc->desc_allocated); - spin_unlock_irqrestore(&vc->lock, flags); - return &vd->tx; } @@ -128,8 +122,7 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc) } /** - * vchan_get_all_descriptors - obtain all allocated, submitted and issued - * descriptors + * vchan_get_all_descriptors - obtain all submitted and issued descriptors * vc: virtual channel to get descriptors from * head: list of descriptors found * @@ -141,7 +134,6 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc) static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc, struct list_head *head) { - list_splice_tail_init(&vc->desc_allocated, head); list_splice_tail_init(&vc->desc_submitted, head); list_splice_tail_init(&vc->desc_issued, head); list_splice_tail_init(&vc->desc_completed, head); @@ -149,14 +141,11 @@ static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc, static inline void vchan_free_chan_resources(struct virt_dma_chan *vc) { - struct virt_dma_desc *vd; unsigned long flags; LIST_HEAD(head); spin_lock_irqsave(&vc->lock, flags); vchan_get_all_descriptors(vc, &head); - list_for_each_entry(vd, &head, node) - async_tx_clear_ack(&vd->tx); spin_unlock_irqrestore(&vc->lock, flags); vchan_dma_desc_free_list(vc, &head); diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c index 620fd55ec766..dff22ab01851 100644 --- a/drivers/dma/xgene-dma.c +++ b/drivers/dma/xgene-dma.c @@ -111,6 +111,7 @@ #define XGENE_DMA_MEM_RAM_SHUTDOWN 0xD070 #define XGENE_DMA_BLK_MEM_RDY 0xD074 #define XGENE_DMA_BLK_MEM_RDY_VAL 0xFFFFFFFF +#define XGENE_DMA_RING_CMD_SM_OFFSET 0x8000 /* X-Gene SoC EFUSE csr register and bit defination */ #define XGENE_SOC_JTAG1_SHADOW 0x18 @@ -1887,6 +1888,8 @@ static int xgene_dma_get_resources(struct platform_device *pdev, return -ENOMEM; } + pdma->csr_ring_cmd += XGENE_DMA_RING_CMD_SM_OFFSET; + /* Get efuse csr region */ res = platform_get_resource(pdev, IORESOURCE_MEM, 3); if (!res) { |