/* * Rockchip VPU codec driver * * Copyright (C) 2014 Google, Inc. * Tomasz Figa * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and * may be copied, distributed, and modified under those terms. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ #include "rockchip_vpu_common.h" #include #include #include #include #include #include #include #include #include #include #include #include /* Various parameters specific to VP8 encoder. */ #define VP8_KEY_FRAME_HDR_SIZE 10 #define VP8_INTER_FRAME_HDR_SIZE 3 #define VP8_FRAME_TAG_KEY_FRAME_BIT BIT(0) #define VP8_FRAME_TAG_LENGTH_SHIFT 5 #define VP8_FRAME_TAG_LENGTH_MASK (0x7ffff << 5) /** * struct rockchip_vpu_codec_ops - codec mode specific operations * * @codec_mode: Codec mode related to this format. See * enum rockchip_vpu_codec_mode. * @init: Prepare for streaming. Called from VB2 .start_streaming() * when streaming from both queues is being enabled. * @exit: Clean-up after streaming. Called from VB2 .stop_streaming() * when streaming from first of both enabled queues is being * disabled. * @irq: Handle {en,de}code irq. Check and clear interrupt. * @run: Start single {en,de)coding run. Called from non-atomic context * to indicate that a pair of buffers is ready and the hardware * should be programmed and started. * @done: Read back processing results and additional data from hardware. * @reset: Reset the hardware in case of a timeout. */ struct rockchip_vpu_codec_ops { enum rockchip_vpu_codec_mode codec_mode; int (*init)(struct rockchip_vpu_ctx *); void (*exit)(struct rockchip_vpu_ctx *); int (*irq)(int, struct rockchip_vpu_dev *); void (*run)(struct rockchip_vpu_ctx *); void (*done)(struct rockchip_vpu_ctx *, enum vb2_buffer_state); void (*reset)(struct rockchip_vpu_ctx *); }; /* * Hardware control routines. */ void rockchip_vpu_power_on(struct rockchip_vpu_dev *vpu) { vpu_debug_enter(); /* TODO: Clock gating. */ pm_runtime_get_sync(vpu->dev); vpu_debug_leave(); } static void rockchip_vpu_power_off(struct rockchip_vpu_dev *vpu) { vpu_debug_enter(); pm_runtime_mark_last_busy(vpu->dev); pm_runtime_put_autosuspend(vpu->dev); /* TODO: Clock gating. */ vpu_debug_leave(); } /* * Interrupt handlers. */ static irqreturn_t vepu_irq(int irq, void *dev_id) { struct rockchip_vpu_dev *vpu = dev_id; struct rockchip_vpu_ctx *ctx = vpu->current_ctx; if (!ctx->hw.codec_ops->irq(irq, vpu)) { rockchip_vpu_power_off(vpu); cancel_delayed_work(&vpu->watchdog_work); ctx->hw.codec_ops->done(ctx, VB2_BUF_STATE_DONE); } return IRQ_HANDLED; } static irqreturn_t vdpu_irq(int irq, void *dev_id) { struct rockchip_vpu_dev *vpu = dev_id; struct rockchip_vpu_ctx *ctx = vpu->current_ctx; if (!ctx->hw.codec_ops->irq(irq, vpu)) { rockchip_vpu_power_off(vpu); cancel_delayed_work(&vpu->watchdog_work); ctx->hw.codec_ops->done(ctx, VB2_BUF_STATE_DONE); } return IRQ_HANDLED; } static void rockchip_vpu_watchdog(struct work_struct *work) { struct rockchip_vpu_dev *vpu = container_of(to_delayed_work(work), struct rockchip_vpu_dev, watchdog_work); struct rockchip_vpu_ctx *ctx = vpu->current_ctx; unsigned long flags; spin_lock_irqsave(&vpu->irqlock, flags); ctx->hw.codec_ops->reset(ctx); spin_unlock_irqrestore(&vpu->irqlock, flags); vpu_err("frame processing timed out!\n"); rockchip_vpu_power_off(vpu); ctx->hw.codec_ops->done(ctx, VB2_BUF_STATE_ERROR); } /* * Initialization/clean-up. */ #if defined(CONFIG_ROCKCHIP_IOMMU) static int rockchip_vpu_iommu_init(struct rockchip_vpu_dev *vpu) { int ret; vpu->dev->dma_parms = devm_kzalloc(vpu->dev, sizeof(*vpu->dev->dma_parms), GFP_KERNEL); if (!vpu->dev->dma_parms) return -ENOMEM; vpu->domain = iommu_domain_alloc(vpu->dev->bus); if (!vpu->domain) { ret = -ENOMEM; goto err_free_parms; } ret = iommu_get_dma_cookie(vpu->domain); if (ret) goto err_free_domain; ret = dma_set_coherent_mask(vpu->dev, DMA_BIT_MASK(32)); if (ret) goto err_put_cookie; dma_set_max_seg_size(vpu->dev, DMA_BIT_MASK(32)); ret = iommu_attach_device(vpu->domain, vpu->dev); if (ret) goto err_put_cookie; common_iommu_setup_dma_ops(vpu->dev, 0x10000000, SZ_2G, vpu->domain->ops); return 0; err_put_cookie: iommu_put_dma_cookie(vpu->domain); err_free_domain: iommu_domain_free(vpu->domain); err_free_parms: return ret; } static void rockchip_vpu_iommu_cleanup(struct rockchip_vpu_dev *vpu) { iommu_detach_device(vpu->domain, vpu->dev); iommu_put_dma_cookie(vpu->domain); iommu_domain_free(vpu->domain); } #else /* CONFIG_ROCKCHIP_IOMMU */ static inline int rockchip_vpu_iommu_init(struct rockchip_vpu_dev *vpu) { return 0; } static inline void rockchip_vpu_iommu_cleanup(struct rockchip_vpu_dev *vpu) { } #endif /* CONFIG_ROCKCHIP_IOMMU */ int rockchip_vpu_hw_probe(struct rockchip_vpu_dev *vpu) { struct resource *res; int irq_enc, irq_dec; int ret; pr_info("probe device %s\n", dev_name(vpu->dev)); INIT_DELAYED_WORK(&vpu->watchdog_work, rockchip_vpu_watchdog); vpu->aclk = devm_clk_get(vpu->dev, "aclk"); if (IS_ERR(vpu->aclk)) { dev_err(vpu->dev, "failed to get aclk\n"); return PTR_ERR(vpu->aclk); } vpu->hclk = devm_clk_get(vpu->dev, "hclk"); if (IS_ERR(vpu->hclk)) { dev_err(vpu->dev, "failed to get hclk\n"); return PTR_ERR(vpu->hclk); } res = platform_get_resource(vpu->pdev, IORESOURCE_MEM, 0); vpu->base = devm_ioremap_resource(vpu->dev, res); if (IS_ERR(vpu->base)) return PTR_ERR(vpu->base); clk_prepare_enable(vpu->aclk); clk_prepare_enable(vpu->hclk); vpu->enc_base = vpu->base + vpu->variant->enc_offset; vpu->dec_base = vpu->base + vpu->variant->dec_offset; ret = dma_set_coherent_mask(vpu->dev, DMA_BIT_MASK(32)); if (ret) { dev_err(vpu->dev, "could not set DMA coherent mask\n"); goto err_power; } ret = rockchip_vpu_iommu_init(vpu); if (ret) goto err_power; irq_enc = platform_get_irq_byname(vpu->pdev, "vepu"); if (irq_enc <= 0) { dev_err(vpu->dev, "could not get vepu IRQ\n"); ret = -ENXIO; goto err_iommu; } ret = devm_request_threaded_irq(vpu->dev, irq_enc, NULL, vepu_irq, IRQF_ONESHOT, dev_name(vpu->dev), vpu); if (ret) { dev_err(vpu->dev, "could not request vepu IRQ\n"); goto err_iommu; } irq_dec = platform_get_irq_byname(vpu->pdev, "vdpu"); if (irq_dec <= 0) { dev_err(vpu->dev, "could not get vdpu IRQ\n"); ret = -ENXIO; goto err_iommu; } ret = devm_request_threaded_irq(vpu->dev, irq_dec, NULL, vdpu_irq, IRQF_ONESHOT, dev_name(vpu->dev), vpu); if (ret) { dev_err(vpu->dev, "could not request vdpu IRQ\n"); goto err_iommu; } pm_runtime_set_autosuspend_delay(vpu->dev, 100); pm_runtime_use_autosuspend(vpu->dev); pm_runtime_enable(vpu->dev); return 0; err_iommu: rockchip_vpu_iommu_cleanup(vpu); err_power: clk_disable_unprepare(vpu->hclk); clk_disable_unprepare(vpu->aclk); return ret; } void rockchip_vpu_hw_remove(struct rockchip_vpu_dev *vpu) { rockchip_vpu_iommu_cleanup(vpu); pm_runtime_disable(vpu->dev); clk_disable_unprepare(vpu->hclk); clk_disable_unprepare(vpu->aclk); } static const struct rockchip_vpu_codec_ops mode_ops[] = { { .codec_mode = RK3288_VPU_CODEC_VP8E, .init = rk3288_vpu_vp8e_init, .exit = rk3288_vpu_vp8e_exit, .irq = rk3288_vpu_enc_irq, .run = rk3288_vpu_vp8e_run, .done = rk3288_vpu_vp8e_done, .reset = rk3288_vpu_enc_reset, }, { .codec_mode = RK3288_VPU_CODEC_VP8D, .init = rk3288_vpu_vp8d_init, .exit = rk3288_vpu_vp8d_exit, .irq = rk3288_vpu_dec_irq, .run = rk3288_vpu_vp8d_run, .done = rockchip_vpu_run_done, .reset = rk3288_vpu_dec_reset, }, { .codec_mode = RK3288_VPU_CODEC_H264E, .init = rk3288_vpu_h264e_init, .exit = rk3288_vpu_h264e_exit, .irq = rk3288_vpu_enc_irq, .run = rk3288_vpu_h264e_run, .done = rk3288_vpu_h264e_done, .reset = rk3288_vpu_enc_reset, }, { .codec_mode = RK3288_VPU_CODEC_H264D, .init = rk3288_vpu_h264d_init, .exit = rk3288_vpu_h264d_exit, .irq = rk3288_vpu_dec_irq, .run = rk3288_vpu_h264d_run, .done = rockchip_vpu_run_done, .reset = rk3288_vpu_dec_reset, }, }; void rockchip_vpu_run(struct rockchip_vpu_ctx *ctx) { ctx->hw.codec_ops->run(ctx); } int rockchip_vpu_init(struct rockchip_vpu_ctx *ctx) { enum rockchip_vpu_codec_mode codec_mode; int i; if (rockchip_vpu_ctx_is_encoder(ctx)) codec_mode = ctx->vpu_dst_fmt->codec_mode; /* Encoder */ else codec_mode = ctx->vpu_src_fmt->codec_mode; /* Decoder */ for (i = 0; i < ARRAY_SIZE(mode_ops); i++) { if (mode_ops[i].codec_mode == codec_mode) { ctx->hw.codec_ops = &mode_ops[i]; break; } } if (!ctx->hw.codec_ops) return -1; return ctx->hw.codec_ops->init(ctx); } void rockchip_vpu_deinit(struct rockchip_vpu_ctx *ctx) { ctx->hw.codec_ops->exit(ctx); } /* * The hardware takes care only of ext hdr and dct partition. The software * must take care of frame header. * * Buffer layout as received from hardware: * |<--gap-->|<--ext hdr-->|<-gap->|<---dct part--- * |<-------dct part offset------->| * * Required buffer layout: * |<--hdr-->|<--ext hdr-->|<---dct part--- */ void rockchip_vpu_vp8e_assemble_bitstream(struct rockchip_vpu_ctx *ctx, struct rockchip_vpu_buf *dst_buf) { struct vb2_v4l2_buffer *vb2_dst = to_vb2_v4l2_buffer(&dst_buf->vb.vb2_buf); size_t ext_hdr_size = dst_buf->vp8e.ext_hdr_size; size_t dct_size = dst_buf->vp8e.dct_size; size_t hdr_size = dst_buf->vp8e.hdr_size; size_t dst_size; size_t tag_size; void *dst; u32 *tag; dst_size = vb2_plane_size(&dst_buf->vb.vb2_buf, 0); dst = vb2_plane_vaddr(&dst_buf->vb.vb2_buf, 0); tag = dst; /* To access frame tag words. */ if (WARN_ON(hdr_size + ext_hdr_size + dct_size > dst_size)) return; if (WARN_ON(dst_buf->vp8e.dct_offset + dct_size > dst_size)) return; vpu_debug(1, "%s: hdr_size = %d, ext_hdr_size = %d, dct_size = %d\n", __func__, hdr_size, ext_hdr_size, dct_size); memmove(dst + hdr_size + ext_hdr_size, dst + dst_buf->vp8e.dct_offset, dct_size); memcpy(dst, dst_buf->vp8e.header, hdr_size); /* Patch frame tag at first 32-bit word of the frame. */ if (vb2_dst->flags & V4L2_BUF_FLAG_KEYFRAME) { tag_size = VP8_KEY_FRAME_HDR_SIZE; tag[0] &= ~VP8_FRAME_TAG_KEY_FRAME_BIT; } else { tag_size = VP8_INTER_FRAME_HDR_SIZE; tag[0] |= VP8_FRAME_TAG_KEY_FRAME_BIT; } tag[0] &= ~VP8_FRAME_TAG_LENGTH_MASK; tag[0] |= (hdr_size + ext_hdr_size - tag_size) << VP8_FRAME_TAG_LENGTH_SHIFT; vb2_set_plane_payload(&dst_buf->vb.vb2_buf, 0, hdr_size + ext_hdr_size + dct_size); } void rockchip_vpu_h264e_assemble_bitstream(struct rockchip_vpu_ctx *ctx, struct rockchip_vpu_buf *dst_buf) { size_t sps_size = dst_buf->h264e.sps_size; size_t pps_size = dst_buf->h264e.pps_size; size_t slices_size = dst_buf->h264e.slices_size; size_t dst_size; void *dst; struct stream_s *sps = &ctx->run.h264e.sps; struct stream_s *pps = &ctx->run.h264e.pps; dst_size = vb2_plane_size(&dst_buf->vb.vb2_buf, 0); dst = vb2_plane_vaddr(&dst_buf->vb.vb2_buf, 0); if (WARN_ON(sps_size + pps_size + slices_size > dst_size)) return; vpu_debug(1, "%s: sps_size = %u, pps_size = %u, slices_size = %u\n", __func__, sps_size, pps_size, slices_size); memcpy(dst, sps->buffer, sps_size); memcpy(dst + sps_size, pps->buffer, pps_size); vb2_set_plane_payload(&dst_buf->vb.vb2_buf, 0, sps_size + pps_size + slices_size); }