1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
| static Execution* _createUnit(const Tensor* input, const Tensor* output, Backend* backend, const Convolution2D* conv2d, const float* originWeight, size_t originWeightSize, const float* bias, size_t biasSize, std::shared_ptr<ConvolutionCommon::Int8Common> weightQuantInfo, bool supportSparse) { auto cpuBackend = (CPUBackend*)backend; #ifdef MNN_LOW_MEMORY bool lowMemory = cpuBackend->memoryMode() == BackendConfig::Memory_Low; #else bool lowMemory = false; #endif auto common = conv2d->common(); #ifdef MNN_USE_ONEDNN return OneDNN::createConvolution(common, backend, originWeight, originWeightSize, bias, biasSize); #endif
#ifdef MNN_USE_SPARSE_COMPUTE if (conv2d->sparseParameter() && nullptr != weightQuantInfo.get()) { if (supportSparse) { return new SparseConvolutionTiledExecutor(common, backend, weightQuantInfo->quan, conv2d->sparseParameter(), bias, biasSize); } } #endif bool fastWay = common->kernelY() == 1 && common->kernelX() == 1 && output->width() == input->width() && output->height() == input->height() && common->strideX() == 1 && common->strideY() == 1;
if (lowMemory) { if (fastWay && nullptr != weightQuantInfo.get()) { return new ConvolutionHybrid(common, backend, originWeight, originWeightSize, bias, biasSize, weightQuantInfo); } else { return new DenseConvolutionTiledExecutor(common, backend, originWeight, originWeightSize, bias, biasSize, weightQuantInfo); } } if (fastWay) { return new Convolution1x1Strassen(common, backend, originWeight, originWeightSize, bias, biasSize, weightQuantInfo); } if (originWeightSize == 0) { return new DenseConvolutionTiledExecutor(common, backend, originWeight, originWeightSize, bias, biasSize, weightQuantInfo); } if (!ConvolutionWinogradBridge::canUseWinograd(common)) { return new DenseConvolutionTiledExecutor(common, backend, originWeight, originWeightSize, bias, biasSize, nullptr); } PerfConfig convPerfconfig = DenseConvolutionTiledExecutor::bestTileConvolutionConfig(common, input, output, cpuBackend->threadNumber(), backend); auto winogradConfig = ConvolutionWinogradBridge::bestWinogradUnit(common, input, output, cpuBackend->threadNumber(), backend, convPerfconfig); if (winogradConfig.unit <= 1) { return new DenseConvolutionTiledExecutor(common, backend, originWeight, originWeightSize, bias, biasSize, nullptr); } return ConvolutionWinogradBridge::createWinogradImpl(common, input, output, backend, originWeight, originWeightSize, bias, biasSize, winogradConfig); }
|