/******************************************************************************* * * MIT License * * Copyright (c) 2017 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * *******************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include // MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_ROCM_PRECOMPILED_BINARIES) // MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_ASM_KERNELS_PERF_FILTERING) // Disable specific warnings #define MIO_RNN_DEBUG 0 #define MIOPEN_RNN_SYNCH 0 #define MIO_RNN_CPP_PROF 0 namespace miopen { void profileRNNkernels(Handle& handle, unsigned char select, float& ctime) { float ktime = 0.; assert((select < 3) && "profileSequence case incorrect"); switch(select) { case 0: if(handle.IsProfilingEnabled()) { handle.ResetKernelTime(); ctime = 0.; ktime = handle.GetKernelTime(); ctime = ktime; #if(MIO_RNN_CPP_PROF == 1) printf("init ktime: %f\n", ktime); printf("init ctime: %f\n", ctime); #endif } #if(MIOPEN_RNN_SYNCH == 1) else { handle.Finish(); } #endif break; case 1: if(handle.IsProfilingEnabled()) { ktime = handle.GetKernelTime(); ctime += ktime; #if(MIO_RNN_CPP_PROF == 1) printf("intermediate ktime: %f\n", ktime); printf("intermediate ctime: %f\n", ctime); #endif } #if(MIOPEN_RNN_SYNCH == 1) else { handle.Finish(); } #endif break; case 2: if(handle.IsProfilingEnabled()) { #if(MIO_RNN_CPP_PROF == 1) ktime = handle.GetKernelTime(); printf("Final time: %f\n", ktime + ctime); handle.AccumKernelTime(ctime); #else handle.GetKernelTime(); handle.AccumKernelTime(ctime); #endif } break; default: assert(false); } } size_t RNNDescriptor::biasOffsetCalculation(const TensorDescriptor& /*xDesc*/, const int layer, const int biasID) { if(biasMode == miopenRNNNoBias) { return 0; } size_t layerJump = 0; if(dirMode != 0u) { layerJump += (hsize * 2) * nHiddenTensorsPerLayer * (layer / 2) * 2; if(biasID >= nHiddenTensorsPerLayer) { layerJump += hsize * nHiddenTensorsPerLayer; } layerJump += (layer % 2 == 1) ? nHiddenTensorsPerLayer * hsize : 0; layerJump += hsize * biasID; } else { layerJump += (hsize * 2) * nHiddenTensorsPerLayer * layer; layerJump += hsize * biasID; } return layerJump; } size_t RNNDescriptor::paramsOffsetCalculation(const TensorDescriptor& xDesc, const int layer, const int paramID) { auto inputVectorLen = xDesc.GetLengths()[1]; if(inputMode == miopenRNNskip) { inputVectorLen = 0; } size_t layerJump = 0; if(dirMode != 0u) { if(layer > 1) { layerJump += (inputVectorLen * hsize + hsize * hsize) * nHiddenTensorsPerLayer * 2; layerJump += (hsize * hsize * 2 + hsize * hsize) * nHiddenTensorsPerLayer * (layer / 2 - 1) * 2; if(paramID >= nHiddenTensorsPerLayer) { layerJump += hsize * hsize * 2 * nHiddenTensorsPerLayer * 2; layerJump += (layer % 2 == 1) ? nHiddenTensorsPerLayer * (hsize * hsize) : 0; layerJump += (hsize * hsize) * (paramID - nHiddenTensorsPerLayer); } else { layerJump += (layer % 2 == 1) ? nHiddenTensorsPerLayer * (2 * hsize * hsize) : 0; layerJump += (2 * hsize * hsize) * paramID; } } else { if(paramID >= nHiddenTensorsPerLayer) { if(isNotRNNskip()) { layerJump += (inputVectorLen * hsize) * nHiddenTensorsPerLayer * 2; } layerJump += (layer == 1) ? nHiddenTensorsPerLayer * (hsize * hsize) : 0; layerJump += (hsize * hsize) * (paramID - nHiddenTensorsPerLayer); } else { layerJump += (layer == 1) ? nHiddenTensorsPerLayer * (inputVectorLen * hsize) : 0; layerJump += (inputVectorLen * hsize) * paramID; } } } else { if(layer > 0) { layerJump += (inputVectorLen * hsize + hsize * hsize) * nHiddenTensorsPerLayer; layerJump += (hsize * hsize * 2) * nHiddenTensorsPerLayer * (layer - 1); layerJump += (hsize * hsize) * paramID; } else { if(paramID >= nHiddenTensorsPerLayer) { if(isNotRNNskip()) { layerJump += (inputVectorLen * hsize) * nHiddenTensorsPerLayer; } layerJump += (hsize * hsize) * (paramID - nHiddenTensorsPerLayer); } else { layerJump += (inputVectorLen * hsize) * paramID; } } } return layerJump; } std::vector RNNDescriptor::pTensorLengthsCalculation(const TensorDescriptor& xDesc, const int layer, const int paramID) { auto inputVectorLen = xDesc.GetLengths()[1]; if(inputMode == miopenRNNskip) { inputVectorLen = 0; } std::vector tdim(2, 0); if(dirMode != 0u) { if(layer > 1) // NOT the input layer { if(paramID >= nHiddenTensorsPerLayer) { tdim[0] = tdim[1] = hsize; } else { tdim[0] = hsize; tdim[1] = hsize * 2; } } else // IS the input layer { if(paramID >= nHiddenTensorsPerLayer) { tdim[0] = tdim[1] = hsize; } else { tdim[0] = hsize; tdim[1] = inputVectorLen; } } } else { if(layer > 0) // NOT the input layer { tdim[0] = tdim[1] = hsize; } else { if(paramID >= nHiddenTensorsPerLayer) { tdim[0] = tdim[1] = hsize; } else { tdim[0] = hsize; tdim[1] = inputVectorLen; } } } return tdim; } RNNDescriptor::RNNDescriptor() { nLayers = 1; hsize = 0; nHiddenTensorsPerLayer = 0; rnnMode = miopenRNNTANH; dirMode = miopenRNNunidirection; biasMode = miopenRNNNoBias; algoMode = miopenRNNdefault; inputMode = miopenRNNlinear; dataType = miopenFloat; typeSize = 4; workspaceScale = 1; } RNNDescriptor::RNNDescriptor(int hsz, int layers, miopenRNNMode_t rmode, miopenRNNInputMode_t inMode, miopenRNNDirectionMode_t bidir, miopenRNNBiasMode_t bmode, miopenRNNAlgo_t amode, miopenDataType_t dType) { if(hsz < 0 || layers < 0) { MIOPEN_THROW(miopenStatusBadParm, "Parameter to RNN must be a positive integer."); } if(!(rmode == miopenRNNRELU || rmode == miopenRNNTANH || rmode == miopenLSTM || rmode == miopenGRU)) { MIOPEN_THROW(miopenStatusBadParm, "RNN mode not supported"); } if(bidir != 0 && bidir != 1) { MIOPEN_THROW(miopenStatusBadParm, "Parameters to RNN directional type not supported"); } if(bmode != 0 && bmode != 1) { MIOPEN_THROW(miopenStatusBadParm, "Parameters to RNN bias type not supported"); } if(dType != miopenFloat && dType != miopenHalf) { MIOPEN_THROW(miopenStatusBadParm, "Parameters to RNN datatype is not supported"); } else { typeSize = 4; } hsize = hsz; nLayers = layers; inputMode = inMode; dirMode = bidir; rnnMode = rmode; algoMode = amode; biasMode = bmode; dataType = dType; switch(rmode) { case 0: // RNN vanilla case 1: // RNN vanilla nHiddenTensorsPerLayer = 1; workspaceScale = 1; break; case 2: // LSTM nHiddenTensorsPerLayer = 4; workspaceScale = 6; break; case 3: // GRU nHiddenTensorsPerLayer = 3; workspaceScale = 4; break; } } size_t RNNDescriptor::GetWorkspaceSize(Handle& /* handle */, const int seqLength, c_array_view xDesc) const { if(xDesc[0].GetType() != dataType) { MIOPEN_THROW(miopenStatusBadParm, "Data type mismatch between descriptors"); } std::size_t inputBatchLenSum = 0; inputBatchLenSum = std::accumulate( xDesc.data, xDesc.data + seqLength, 0, [](size_t x, miopenTensorDescriptor_t y) { return x + deref(y).GetLengths()[0]; }); auto x = workspaceScale * nLayers * inputBatchLenSum * hsize * typeSize; return size_t(dirMode == miopenRNNbidirection ? 2 * x : x); } size_t RNNDescriptor::GetReserveSize(Handle& /* handle */, const int seqLength, c_array_view xDesc) const { if(xDesc[0].GetType() != dataType) { MIOPEN_THROW(miopenStatusBadParm, "Data type mismatch between descriptors"); } std::size_t inputBatchLenSum = 0; inputBatchLenSum = std::accumulate( xDesc.data, xDesc.data + seqLength, 0, [](size_t x, miopenTensorDescriptor_t y) { return x + deref(y).GetLengths()[0]; }); auto x = 2 * workspaceScale * nLayers * inputBatchLenSum * hsize * typeSize; return size_t(dirMode == miopenRNNbidirection ? 2 * x : x); } size_t RNNDescriptor::GetParamsSize(Handle& /* handle */, const TensorDescriptor& xDesc, miopenDataType_t dtype) { if(xDesc.GetType() != dataType || dtype != dataType) { MIOPEN_THROW(miopenStatusBadParm, "Data type mismatch."); } assert(xDesc.GetLengths().size() > 1); auto inputVectorLen = xDesc.GetLengths()[1]; if(inputMode == miopenRNNskip) inputVectorLen = 0; int bi = dirMode == miopenRNNbidirection ? 2 : 1; auto sz = nHiddenTensorsPerLayer * hsize * bi * (inputVectorLen + hsize + (nLayers - 1) * (bi + 1) * hsize); #if(MIO_RNN_DEBUG == 1) fprintf(stderr, "weight size: %lu\n", sz); #endif if(biasMode == miopenRNNwithBias) { sz += nLayers * 2 * nHiddenTensorsPerLayer * hsize * bi; } return size_t(typeSize * sz); } size_t RNNDescriptor::GetRNNInputSuperTensorSize(Handle& /* handle */, const int seqLength, c_array_view xDesc) { if(xDesc[0].GetType() != dataType) { MIOPEN_THROW(miopenStatusBadParm, "Data type mismatch between descriptors"); } std::size_t inputBatchLenSum = 0; inputBatchLenSum = std::accumulate( xDesc.data, xDesc.data + seqLength, 0, [](size_t x, miopenTensorDescriptor_t y) { return x + deref(y).GetLengths()[0]; }); auto x = inputBatchLenSum * xDesc[0].GetLengths()[1] * typeSize; return size_t(x); } size_t RNNDescriptor::GetRNNHiddenSuperTensorSize(Handle& /* handle */, c_array_view xDesc) { if(xDesc[0].GetType() != dataType) { MIOPEN_THROW(miopenStatusBadParm, "Data type mismatch between descriptors"); } auto x = xDesc[0].GetLengths()[0] * hsize * nLayers * typeSize; return size_t(dirMode == miopenRNNbidirection ? 2 * x : x); } void RNNDescriptor::GetParamsDescriptor(Handle& /* handle */, const TensorDescriptor& xDesc, TensorDescriptor& wDesc, miopenDataType_t dtype) { if(dtype != dataType) { MIOPEN_THROW(miopenStatusBadParm, "Data type mismatch."); } auto inputVectorLen = xDesc.GetLengths()[1]; // input vector size if(inputMode == miopenRNNskip) inputVectorLen = 0; // Create weight super tensor descriptor int bi = (dirMode == miopenRNNbidirection) ? 2 : 1; std::vector weight_lens(2, 0); weight_lens[0] = inputVectorLen + ((nLayers - 1) * (bi + 1) + 1) * hsize; weight_lens[1] = bi * hsize * nHiddenTensorsPerLayer; if(biasMode == miopenRNNwithBias) { weight_lens[0] += (nLayers * 2); } wDesc = miopen::TensorDescriptor(dtype, weight_lens.data(), 2); } std::size_t RNNDescriptor::GetLayerParamSize(Handle& /*handle*/, int layer, const TensorDescriptor& xDesc, int paramID) { if(xDesc.GetType() != dataType) { MIOPEN_THROW(miopenStatusBadParm, "Data type mismatch."); } auto inputVectorLen = xDesc.GetLengths()[1]; // input vector size inputVectorLen = (inputMode == miopenRNNskip) ? 0 : inputVectorLen; // Assuming Djikstra counting if((((dirMode != 0u) && layer <= 1) || ((dirMode == 0u) && layer < 1))) { if(paramID >= nHiddenTensorsPerLayer) return size_t(typeSize * hsize * hsize); else if(isNotRNNskip()) return size_t(typeSize * inputVectorLen * hsize); else return 0; } else if((dirMode != 0u) && paramID < nHiddenTensorsPerLayer) { return size_t(typeSize * hsize * hsize * 2); } else { return size_t(typeSize * hsize * hsize); } } std::size_t RNNDescriptor::GetLayerBiasSize(Handle& /* handle */, int /*layer*/, int /*biasID*/) { return size_t(typeSize * hsize); // is ther more needed here? } void RNNDescriptor::GetLayerParam(Handle& handle, const int layer, const TensorDescriptor& xDesc, const TensorDescriptor& /* wDesc */, ConstData_t w, const int paramID, TensorDescriptor& paramDesc, Data_t param) { if(!isNotRNNskip() && (((dirMode != 0u) && layer <= 1 && paramID < nHiddenTensorsPerLayer) || ((dirMode == 0u) && layer < 1 && paramID < nHiddenTensorsPerLayer))) { MIOPEN_THROW(miopenStatusBadParm, "Parameter of input layer is null in input skip mode"); } // Get the dimensions of the parameter matrix auto pDims = pTensorLengthsCalculation(xDesc, layer, paramID); paramDesc = miopen::TensorDescriptor(dataType, pDims.data(), 2); if(param == nullptr) { return; } // Calculate the location of the matrix via paramID, bidirection setting, and params auto poffset = paramsOffsetCalculation(xDesc, layer, paramID); #if(MIO_RNN_DEBUG == 1) fprintf(stderr, "GetLayerParam layer: %d layerID: %d offset: %lu size: %lu\n", layer, paramID, poffset, paramDesc.GetElementSize()); #endif // Copy over data to previously allocated param tensor miopen::CopyTensor(handle, paramDesc, w, paramDesc, param, poffset, 0); } void RNNDescriptor::GetLayerBias(Handle& handle, const int layer, const TensorDescriptor& xDesc, const TensorDescriptor& /* wDesc */, ConstData_t w, const int biasID, TensorDescriptor& biasDesc, Data_t bias) { if(biasMode == miopenRNNNoBias) { return; } // Get the dimensions of the parameter matrix auto bdim = int(hsize); biasDesc = miopen::TensorDescriptor(dataType, &bdim, 1); if(bias == nullptr) { return; } // Calculate the location of the matrix via layerID, bidirection setting, and params int x = static_cast((dirMode == miopenRNNbidirection) ? nLayers * 2 : nLayers); auto poffset = paramsOffsetCalculation(xDesc, x, 0); auto boffset = biasOffsetCalculation(xDesc, layer, biasID) + poffset; #if(MIO_RNN_DEBUG == 1) fprintf(stderr, "GetLayerBias layer: %d layerID: %d offset: %lu = %lu + %lu size: %lu\n", layer, biasID, boffset, poffset, boffset - poffset, biasDesc.GetElementSize()); #endif // Copy over data to previously allocated param tensor miopen::CopyTensor(handle, biasDesc, w, biasDesc, bias, boffset, 0); } void RNNDescriptor::SetLayerParam(Handle& handle, const int layer, const TensorDescriptor& xDesc, const TensorDescriptor& /* wDesc */, Data_t w, const int paramID, const TensorDescriptor& paramDesc, ConstData_t param) { if(!isNotRNNskip() && (((dirMode != 0u) && layer <= 1 && paramID < nHiddenTensorsPerLayer) || ((dirMode == 0u) && layer < 1 && paramID < nHiddenTensorsPerLayer))) { MIOPEN_THROW(miopenStatusBadParm, "Parameter of input layer is null in input skip mode"); } // TODO dlowell: Need guard checks here, or have them caught at the copy call? if(param == nullptr) { MIOPEN_THROW(miopenStatusBadParm, "param data cannot be null"); } // 1. Calculate the location of the matrix via paramID, bidirection setting, and params auto poffset = paramsOffsetCalculation(xDesc, layer, paramID); // 2. Calculate the strides for the matrix std::vector pstride(2, 1); pstride[1] = paramDesc.GetLengths()[0]; std::vector intLens(paramDesc.GetLengths().begin(), paramDesc.GetLengths().end()); // 3. Construct descriptor to access into w auto paramSrc = miopen::TensorDescriptor(dataType, intLens.data(), pstride.data(), 2); if(paramSrc.GetLengths() != paramDesc.GetLengths()) { MIOPEN_THROW(miopenStatusBadParm, "mismatch between descriptors"); } #if(MIO_RNN_DEBUG == 1) fprintf(stderr, "SetLayerParam layer: %d layerID: %d offset: %lu size: %lu\n", layer, paramID, poffset, paramDesc.GetElementSize()); #endif // 4. Copy over data to previously allocated param tensor miopen::CopyTensor(handle, paramDesc, param, paramSrc, w, 0, poffset); } void RNNDescriptor::SetLayerBias(Handle& handle, const int layer, const TensorDescriptor& xDesc, const TensorDescriptor& /* wDesc */, Data_t w, const int biasID, const TensorDescriptor& biasDesc, ConstData_t bias) { if(biasMode == miopenRNNNoBias) { return; } // TODO dlowell: Need guard checks here, or have them caught at the copy call? if(bias == nullptr) { MIOPEN_THROW(miopenStatusBadParm, "bias data cannot be null"); } // 1. Calculate the location of the matrix via layerID, bidirection setting, and params int x = static_cast((dirMode == miopenRNNbidirection) ? nLayers * 2 : nLayers); auto poffset = paramsOffsetCalculation(xDesc, x, 0); auto boffset = biasOffsetCalculation(xDesc, layer, biasID) + poffset; // 2. Calculate the strides for the matrix std::vector bstride(1, 1); std::vector intLens(biasDesc.GetLengths().begin(), biasDesc.GetLengths().end()); // 3. Construct descriptor to access into w auto biasSrc = miopen::TensorDescriptor(dataType, intLens.data(), bstride.data(), 1); if(biasSrc.GetLengths() != biasDesc.GetLengths()) { MIOPEN_THROW(miopenStatusBadParm, "mismatch between descriptors"); } #if(MIO_RNN_DEBUG == 1) fprintf(stderr, "SetLayerBias layer: %d layerID: %d offset: %lu = %lu + %lu size: %lu\n", layer, biasID, boffset, poffset, boffset - poffset, biasSrc.GetElementSize()); #endif // 4. Copy over data to previously allocated param tensor miopen::CopyTensor(handle, biasSrc, bias, biasDesc, w, 0, boffset); } void RNNDescriptor::GetLayerParamOffset(const int layer, const TensorDescriptor& xDesc, const int paramID, TensorDescriptor& paramDesc, size_t* paramOffset) { if(!isNotRNNskip() && (((dirMode != 0u) && layer <= 1 && paramID < nHiddenTensorsPerLayer) || ((dirMode == 0u) && layer < 1 && paramID < nHiddenTensorsPerLayer))) { MIOPEN_THROW(miopenStatusBadParm, "Parameter of input layer is null in input skip mode"); } // Get the dimensions of the parameter matrix auto pDims = pTensorLengthsCalculation(xDesc, layer, paramID); paramDesc = miopen::TensorDescriptor(dataType, pDims.data(), 2); if(paramOffset == nullptr) { return; } // Calculate the location of the matrix via paramID, bidirection setting, and params *paramOffset = paramsOffsetCalculation(xDesc, layer, paramID); #if(MIO_RNN_DEBUG == 1) fprintf(stderr, "GetLayerParamOffset layer: %d layerID: %d offset: %lu size: %lu\n", layer, paramID, *paramOffset, paramDesc.GetElementSize()); #endif } void RNNDescriptor::GetLayerBiasOffset(const int layer, const TensorDescriptor& xDesc, const int biasID, TensorDescriptor& biasDesc, size_t* biasOffset) { // Get the dimensions of the parameter matrix if(biasMode == miopenRNNNoBias) { return; } auto bdim = int(hsize); biasDesc = miopen::TensorDescriptor(dataType, &bdim, 1); if(biasOffset == nullptr) { return; } int x = static_cast((dirMode == miopenRNNbidirection) ? nLayers * 2 : nLayers); auto poffset = paramsOffsetCalculation(xDesc, x, 0); *biasOffset = biasOffsetCalculation(xDesc, layer, biasID) + poffset; #if(MIO_RNN_DEBUG == 1) fprintf(stderr, "GetLayerBiasOffset layer: %d layerID: %d offset: %lu = %lu + %lu size: %lu\n", layer, biasID, *biasOffset, poffset, *biasOffset - poffset, biasDesc.GetElementSize()); #endif } std::ostream& operator<<(std::ostream& stream, const RNNDescriptor& r) { stream << r.hsize << ", "; stream << r.nLayers << ", "; stream << r.nHiddenTensorsPerLayer << ", "; stream << r.workspaceScale << ", "; stream << r.rnnMode << ", "; stream << r.dirMode << ", "; stream << r.algoMode << ", "; stream << r.inputMode << ", "; stream << r.biasMode << ", "; return stream; } } // namespace miopen