arm_mat_trans_f32.c 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_mat_trans_f32.c
  4. * Description: Floating-point matrix transpose
  5. *
  6. * $Date: 18. March 2019
  7. * $Revision: V1.6.0
  8. *
  9. * Target Processor: Cortex-M cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "arm_math.h"
  29. /**
  30. @ingroup groupMatrix
  31. */
  32. /**
  33. @defgroup MatrixTrans Matrix Transpose
  34. Tranposes a matrix.
  35. Transposing an <code>M x N</code> matrix flips it around the center diagonal and results in an <code>N x M</code> matrix.
  36. \image html MatrixTranspose.gif "Transpose of a 3 x 3 matrix"
  37. */
  38. /**
  39. @addtogroup MatrixTrans
  40. @{
  41. */
  42. /**
  43. @brief Floating-point matrix transpose.
  44. @param[in] pSrc points to input matrix
  45. @param[out] pDst points to output matrix
  46. @return execution status
  47. - \ref ARM_MATH_SUCCESS : Operation successful
  48. - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
  49. */
  50. #if defined(ARM_MATH_NEON)
  51. arm_status arm_mat_trans_f32(
  52. const arm_matrix_instance_f32 * pSrc,
  53. arm_matrix_instance_f32 * pDst)
  54. {
  55. float32_t *pIn = pSrc->pData; /* input data matrix pointer */
  56. float32_t *pOut = pDst->pData; /* output data matrix pointer */
  57. float32_t *px; /* Temporary output data matrix pointer */
  58. uint16_t nRows = pSrc->numRows; /* number of rows */
  59. uint16_t nColumns = pSrc->numCols; /* number of columns */
  60. uint16_t blkCnt, rowCnt, i = 0U, row = nRows; /* loop counters */
  61. arm_status status; /* status of matrix transpose */
  62. #ifdef ARM_MATH_MATRIX_CHECK
  63. /* Check for matrix mismatch condition */
  64. if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
  65. {
  66. /* Set status as ARM_MATH_SIZE_MISMATCH */
  67. status = ARM_MATH_SIZE_MISMATCH;
  68. }
  69. else
  70. #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
  71. {
  72. /* Matrix transpose by exchanging the rows with columns */
  73. /* Row loop */
  74. rowCnt = row >> 2;
  75. while (rowCnt > 0U)
  76. {
  77. float32x4_t row0V,row1V,row2V,row3V;
  78. float32x4x2_t ra0,ra1,rb0,rb1;
  79. blkCnt = nColumns >> 2;
  80. /* The pointer px is set to starting address of the column being processed */
  81. px = pOut + i;
  82. /* Compute 4 outputs at a time.
  83. ** a second loop below computes the remaining 1 to 3 samples. */
  84. while (blkCnt > 0U) /* Column loop */
  85. {
  86. row0V = vld1q_f32(pIn);
  87. row1V = vld1q_f32(pIn + 1 * nColumns);
  88. row2V = vld1q_f32(pIn + 2 * nColumns);
  89. row3V = vld1q_f32(pIn + 3 * nColumns);
  90. pIn += 4;
  91. ra0 = vzipq_f32(row0V,row2V);
  92. ra1 = vzipq_f32(row1V,row3V);
  93. rb0 = vzipq_f32(ra0.val[0],ra1.val[0]);
  94. rb1 = vzipq_f32(ra0.val[1],ra1.val[1]);
  95. vst1q_f32(px,rb0.val[0]);
  96. px += nRows;
  97. vst1q_f32(px,rb0.val[1]);
  98. px += nRows;
  99. vst1q_f32(px,rb1.val[0]);
  100. px += nRows;
  101. vst1q_f32(px,rb1.val[1]);
  102. px += nRows;
  103. /* Decrement the column loop counter */
  104. blkCnt--;
  105. }
  106. /* Perform matrix transpose for last 3 samples here. */
  107. blkCnt = nColumns % 0x4U;
  108. while (blkCnt > 0U)
  109. {
  110. /* Read and store the input element in the destination */
  111. *px++ = *pIn;
  112. *px++ = *(pIn + 1 * nColumns);
  113. *px++ = *(pIn + 2 * nColumns);
  114. *px++ = *(pIn + 3 * nColumns);
  115. px += (nRows - 4);
  116. pIn++;
  117. /* Decrement the column loop counter */
  118. blkCnt--;
  119. }
  120. i += 4;
  121. pIn += 3 * nColumns;
  122. /* Decrement the row loop counter */
  123. rowCnt--;
  124. } /* Row loop end */
  125. rowCnt = row & 3;
  126. while (rowCnt > 0U)
  127. {
  128. blkCnt = nColumns ;
  129. /* The pointer px is set to starting address of the column being processed */
  130. px = pOut + i;
  131. while (blkCnt > 0U)
  132. {
  133. /* Read and store the input element in the destination */
  134. *px = *pIn++;
  135. /* Update the pointer px to point to the next row of the transposed matrix */
  136. px += nRows;
  137. /* Decrement the column loop counter */
  138. blkCnt--;
  139. }
  140. i++;
  141. rowCnt -- ;
  142. }
  143. /* Set status as ARM_MATH_SUCCESS */
  144. status = ARM_MATH_SUCCESS;
  145. }
  146. /* Return to application */
  147. return (status);
  148. }
  149. #else
  150. arm_status arm_mat_trans_f32(
  151. const arm_matrix_instance_f32 * pSrc,
  152. arm_matrix_instance_f32 * pDst)
  153. {
  154. float32_t *pIn = pSrc->pData; /* input data matrix pointer */
  155. float32_t *pOut = pDst->pData; /* output data matrix pointer */
  156. float32_t *px; /* Temporary output data matrix pointer */
  157. uint16_t nRows = pSrc->numRows; /* number of rows */
  158. uint16_t nCols = pSrc->numCols; /* number of columns */
  159. uint32_t col, row = nRows, i = 0U; /* Loop counters */
  160. arm_status status; /* status of matrix transpose */
  161. #ifdef ARM_MATH_MATRIX_CHECK
  162. /* Check for matrix mismatch condition */
  163. if ((pSrc->numRows != pDst->numCols) ||
  164. (pSrc->numCols != pDst->numRows) )
  165. {
  166. /* Set status as ARM_MATH_SIZE_MISMATCH */
  167. status = ARM_MATH_SIZE_MISMATCH;
  168. }
  169. else
  170. #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
  171. {
  172. /* Matrix transpose by exchanging the rows with columns */
  173. /* row loop */
  174. do
  175. {
  176. /* Pointer px is set to starting address of column being processed */
  177. px = pOut + i;
  178. #if defined (ARM_MATH_LOOPUNROLL)
  179. /* Loop unrolling: Compute 4 outputs at a time */
  180. col = nCols >> 2U;
  181. while (col > 0U) /* column loop */
  182. {
  183. /* Read and store input element in destination */
  184. *px = *pIn++;
  185. /* Update pointer px to point to next row of transposed matrix */
  186. px += nRows;
  187. *px = *pIn++;
  188. px += nRows;
  189. *px = *pIn++;
  190. px += nRows;
  191. *px = *pIn++;
  192. px += nRows;
  193. /* Decrement column loop counter */
  194. col--;
  195. }
  196. /* Loop unrolling: Compute remaining outputs */
  197. col = nCols % 0x4U;
  198. #else
  199. /* Initialize col with number of samples */
  200. col = nCols;
  201. #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  202. while (col > 0U)
  203. {
  204. /* Read and store input element in destination */
  205. *px = *pIn++;
  206. /* Update pointer px to point to next row of transposed matrix */
  207. px += nRows;
  208. /* Decrement column loop counter */
  209. col--;
  210. }
  211. i++;
  212. /* Decrement row loop counter */
  213. row--;
  214. } while (row > 0U); /* row loop end */
  215. /* Set status as ARM_MATH_SUCCESS */
  216. status = ARM_MATH_SUCCESS;
  217. }
  218. /* Return to application */
  219. return (status);
  220. }
  221. #endif /* #if defined(ARM_MATH_NEON) */
  222. /**
  223. * @} end of MatrixTrans group
  224. */