arm_cfft_radix4_f32.c 33 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_cfft_radix4_f32.c
  4. * Description: Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
  5. *
  6. * $Date: 18. March 2019
  7. * $Revision: V1.6.0
  8. *
  9. * Target Processor: Cortex-M cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "arm_math.h"
  29. extern void arm_bitreversal_f32(
  30. float32_t * pSrc,
  31. uint16_t fftSize,
  32. uint16_t bitRevFactor,
  33. const uint16_t * pBitRevTab);
  34. void arm_radix4_butterfly_f32(
  35. float32_t * pSrc,
  36. uint16_t fftLen,
  37. const float32_t * pCoef,
  38. uint16_t twidCoefModifier);
  39. void arm_radix4_butterfly_inverse_f32(
  40. float32_t * pSrc,
  41. uint16_t fftLen,
  42. const float32_t * pCoef,
  43. uint16_t twidCoefModifier,
  44. float32_t onebyfftLen);
  45. /**
  46. @ingroup groupTransforms
  47. */
  48. /**
  49. @addtogroup ComplexFFT
  50. @{
  51. */
  52. /**
  53. @brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
  54. @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
  55. @param[in] S points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
  56. @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
  57. @return none
  58. */
  59. void arm_cfft_radix4_f32(
  60. const arm_cfft_radix4_instance_f32 * S,
  61. float32_t * pSrc)
  62. {
  63. if (S->ifftFlag == 1U)
  64. {
  65. /* Complex IFFT radix-4 */
  66. arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
  67. }
  68. else
  69. {
  70. /* Complex FFT radix-4 */
  71. arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
  72. }
  73. if (S->bitReverseFlag == 1U)
  74. {
  75. /* Bit Reversal */
  76. arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
  77. }
  78. }
  79. /**
  80. @} end of ComplexFFT group
  81. */
  82. /* ----------------------------------------------------------------------
  83. * Internal helper function used by the FFTs
  84. * ---------------------------------------------------------------------- */
  85. /**
  86. brief Core function for the floating-point CFFT butterfly process.
  87. param[in,out] pSrc points to the in-place buffer of floating-point data type
  88. param[in] fftLen length of the FFT
  89. param[in] pCoef points to the twiddle coefficient buffer
  90. param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
  91. return none
  92. */
  93. void arm_radix4_butterfly_f32(
  94. float32_t * pSrc,
  95. uint16_t fftLen,
  96. const float32_t * pCoef,
  97. uint16_t twidCoefModifier)
  98. {
  99. float32_t co1, co2, co3, si1, si2, si3;
  100. uint32_t ia1, ia2, ia3;
  101. uint32_t i0, i1, i2, i3;
  102. uint32_t n1, n2, j, k;
  103. #if defined (ARM_MATH_LOOPUNROLL)
  104. float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
  105. float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
  106. Ybminusd;
  107. float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
  108. float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
  109. float32_t *ptr1;
  110. float32_t p0,p1,p2,p3,p4,p5;
  111. float32_t a0,a1,a2,a3,a4,a5,a6,a7;
  112. /* Initializations for the first stage */
  113. n2 = fftLen;
  114. n1 = n2;
  115. /* n2 = fftLen/4 */
  116. n2 >>= 2U;
  117. i0 = 0U;
  118. ia1 = 0U;
  119. j = n2;
  120. /* Calculation of first stage */
  121. do
  122. {
  123. /* index calculation for the input as, */
  124. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  125. i1 = i0 + n2;
  126. i2 = i1 + n2;
  127. i3 = i2 + n2;
  128. xaIn = pSrc[(2U * i0)];
  129. yaIn = pSrc[(2U * i0) + 1U];
  130. xbIn = pSrc[(2U * i1)];
  131. ybIn = pSrc[(2U * i1) + 1U];
  132. xcIn = pSrc[(2U * i2)];
  133. ycIn = pSrc[(2U * i2) + 1U];
  134. xdIn = pSrc[(2U * i3)];
  135. ydIn = pSrc[(2U * i3) + 1U];
  136. /* xa + xc */
  137. Xaplusc = xaIn + xcIn;
  138. /* xb + xd */
  139. Xbplusd = xbIn + xdIn;
  140. /* ya + yc */
  141. Yaplusc = yaIn + ycIn;
  142. /* yb + yd */
  143. Ybplusd = ybIn + ydIn;
  144. /* index calculation for the coefficients */
  145. ia2 = ia1 + ia1;
  146. co2 = pCoef[ia2 * 2U];
  147. si2 = pCoef[(ia2 * 2U) + 1U];
  148. /* xa - xc */
  149. Xaminusc = xaIn - xcIn;
  150. /* xb - xd */
  151. Xbminusd = xbIn - xdIn;
  152. /* ya - yc */
  153. Yaminusc = yaIn - ycIn;
  154. /* yb - yd */
  155. Ybminusd = ybIn - ydIn;
  156. /* xa' = xa + xb + xc + xd */
  157. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  158. /* ya' = ya + yb + yc + yd */
  159. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  160. /* (xa - xc) + (yb - yd) */
  161. Xb12C_out = (Xaminusc + Ybminusd);
  162. /* (ya - yc) + (xb - xd) */
  163. Yb12C_out = (Yaminusc - Xbminusd);
  164. /* (xa + xc) - (xb + xd) */
  165. Xc12C_out = (Xaplusc - Xbplusd);
  166. /* (ya + yc) - (yb + yd) */
  167. Yc12C_out = (Yaplusc - Ybplusd);
  168. /* (xa - xc) - (yb - yd) */
  169. Xd12C_out = (Xaminusc - Ybminusd);
  170. /* (ya - yc) + (xb - xd) */
  171. Yd12C_out = (Xbminusd + Yaminusc);
  172. co1 = pCoef[ia1 * 2U];
  173. si1 = pCoef[(ia1 * 2U) + 1U];
  174. /* index calculation for the coefficients */
  175. ia3 = ia2 + ia1;
  176. co3 = pCoef[ia3 * 2U];
  177. si3 = pCoef[(ia3 * 2U) + 1U];
  178. Xb12_out = Xb12C_out * co1;
  179. Yb12_out = Yb12C_out * co1;
  180. Xc12_out = Xc12C_out * co2;
  181. Yc12_out = Yc12C_out * co2;
  182. Xd12_out = Xd12C_out * co3;
  183. Yd12_out = Yd12C_out * co3;
  184. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  185. //Xb12_out -= Yb12C_out * si1;
  186. p0 = Yb12C_out * si1;
  187. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  188. //Yb12_out += Xb12C_out * si1;
  189. p1 = Xb12C_out * si1;
  190. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  191. //Xc12_out -= Yc12C_out * si2;
  192. p2 = Yc12C_out * si2;
  193. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  194. //Yc12_out += Xc12C_out * si2;
  195. p3 = Xc12C_out * si2;
  196. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  197. //Xd12_out -= Yd12C_out * si3;
  198. p4 = Yd12C_out * si3;
  199. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  200. //Yd12_out += Xd12C_out * si3;
  201. p5 = Xd12C_out * si3;
  202. Xb12_out += p0;
  203. Yb12_out -= p1;
  204. Xc12_out += p2;
  205. Yc12_out -= p3;
  206. Xd12_out += p4;
  207. Yd12_out -= p5;
  208. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  209. pSrc[2U * i1] = Xc12_out;
  210. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  211. pSrc[(2U * i1) + 1U] = Yc12_out;
  212. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  213. pSrc[2U * i2] = Xb12_out;
  214. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  215. pSrc[(2U * i2) + 1U] = Yb12_out;
  216. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  217. pSrc[2U * i3] = Xd12_out;
  218. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  219. pSrc[(2U * i3) + 1U] = Yd12_out;
  220. /* Twiddle coefficients index modifier */
  221. ia1 += twidCoefModifier;
  222. /* Updating input index */
  223. i0++;
  224. }
  225. while (--j);
  226. twidCoefModifier <<= 2U;
  227. /* Calculation of second stage to excluding last stage */
  228. for (k = fftLen >> 2U; k > 4U; k >>= 2U)
  229. {
  230. /* Initializations for the first stage */
  231. n1 = n2;
  232. n2 >>= 2U;
  233. ia1 = 0U;
  234. /* Calculation of first stage */
  235. j = 0;
  236. do
  237. {
  238. /* index calculation for the coefficients */
  239. ia2 = ia1 + ia1;
  240. ia3 = ia2 + ia1;
  241. co1 = pCoef[(ia1 * 2U)];
  242. si1 = pCoef[(ia1 * 2U) + 1U];
  243. co2 = pCoef[(ia2 * 2U)];
  244. si2 = pCoef[(ia2 * 2U) + 1U];
  245. co3 = pCoef[(ia3 * 2U)];
  246. si3 = pCoef[(ia3 * 2U) + 1U];
  247. /* Twiddle coefficients index modifier */
  248. ia1 += twidCoefModifier;
  249. i0 = j;
  250. do
  251. {
  252. /* index calculation for the input as, */
  253. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  254. i1 = i0 + n2;
  255. i2 = i1 + n2;
  256. i3 = i2 + n2;
  257. xaIn = pSrc[(2U * i0)];
  258. yaIn = pSrc[(2U * i0) + 1U];
  259. xbIn = pSrc[(2U * i1)];
  260. ybIn = pSrc[(2U * i1) + 1U];
  261. xcIn = pSrc[(2U * i2)];
  262. ycIn = pSrc[(2U * i2) + 1U];
  263. xdIn = pSrc[(2U * i3)];
  264. ydIn = pSrc[(2U * i3) + 1U];
  265. /* xa - xc */
  266. Xaminusc = xaIn - xcIn;
  267. /* (xb - xd) */
  268. Xbminusd = xbIn - xdIn;
  269. /* ya - yc */
  270. Yaminusc = yaIn - ycIn;
  271. /* (yb - yd) */
  272. Ybminusd = ybIn - ydIn;
  273. /* xa + xc */
  274. Xaplusc = xaIn + xcIn;
  275. /* xb + xd */
  276. Xbplusd = xbIn + xdIn;
  277. /* ya + yc */
  278. Yaplusc = yaIn + ycIn;
  279. /* yb + yd */
  280. Ybplusd = ybIn + ydIn;
  281. /* (xa - xc) + (yb - yd) */
  282. Xb12C_out = (Xaminusc + Ybminusd);
  283. /* (ya - yc) - (xb - xd) */
  284. Yb12C_out = (Yaminusc - Xbminusd);
  285. /* xa + xc -(xb + xd) */
  286. Xc12C_out = (Xaplusc - Xbplusd);
  287. /* (ya + yc) - (yb + yd) */
  288. Yc12C_out = (Yaplusc - Ybplusd);
  289. /* (xa - xc) - (yb - yd) */
  290. Xd12C_out = (Xaminusc - Ybminusd);
  291. /* (ya - yc) + (xb - xd) */
  292. Yd12C_out = (Xbminusd + Yaminusc);
  293. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  294. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  295. Xb12_out = Xb12C_out * co1;
  296. Yb12_out = Yb12C_out * co1;
  297. Xc12_out = Xc12C_out * co2;
  298. Yc12_out = Yc12C_out * co2;
  299. Xd12_out = Xd12C_out * co3;
  300. Yd12_out = Yd12C_out * co3;
  301. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  302. //Xb12_out -= Yb12C_out * si1;
  303. p0 = Yb12C_out * si1;
  304. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  305. //Yb12_out += Xb12C_out * si1;
  306. p1 = Xb12C_out * si1;
  307. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  308. //Xc12_out -= Yc12C_out * si2;
  309. p2 = Yc12C_out * si2;
  310. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  311. //Yc12_out += Xc12C_out * si2;
  312. p3 = Xc12C_out * si2;
  313. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  314. //Xd12_out -= Yd12C_out * si3;
  315. p4 = Yd12C_out * si3;
  316. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  317. //Yd12_out += Xd12C_out * si3;
  318. p5 = Xd12C_out * si3;
  319. Xb12_out += p0;
  320. Yb12_out -= p1;
  321. Xc12_out += p2;
  322. Yc12_out -= p3;
  323. Xd12_out += p4;
  324. Yd12_out -= p5;
  325. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  326. pSrc[2U * i1] = Xc12_out;
  327. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  328. pSrc[(2U * i1) + 1U] = Yc12_out;
  329. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  330. pSrc[2U * i2] = Xb12_out;
  331. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  332. pSrc[(2U * i2) + 1U] = Yb12_out;
  333. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  334. pSrc[2U * i3] = Xd12_out;
  335. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  336. pSrc[(2U * i3) + 1U] = Yd12_out;
  337. i0 += n1;
  338. } while (i0 < fftLen);
  339. j++;
  340. } while (j <= (n2 - 1U));
  341. twidCoefModifier <<= 2U;
  342. }
  343. j = fftLen >> 2;
  344. ptr1 = &pSrc[0];
  345. /* Calculations of last stage */
  346. do
  347. {
  348. xaIn = ptr1[0];
  349. yaIn = ptr1[1];
  350. xbIn = ptr1[2];
  351. ybIn = ptr1[3];
  352. xcIn = ptr1[4];
  353. ycIn = ptr1[5];
  354. xdIn = ptr1[6];
  355. ydIn = ptr1[7];
  356. /* xa + xc */
  357. Xaplusc = xaIn + xcIn;
  358. /* xa - xc */
  359. Xaminusc = xaIn - xcIn;
  360. /* ya + yc */
  361. Yaplusc = yaIn + ycIn;
  362. /* ya - yc */
  363. Yaminusc = yaIn - ycIn;
  364. /* xb + xd */
  365. Xbplusd = xbIn + xdIn;
  366. /* yb + yd */
  367. Ybplusd = ybIn + ydIn;
  368. /* (xb-xd) */
  369. Xbminusd = xbIn - xdIn;
  370. /* (yb-yd) */
  371. Ybminusd = ybIn - ydIn;
  372. /* xa' = xa + xb + xc + xd */
  373. a0 = (Xaplusc + Xbplusd);
  374. /* ya' = ya + yb + yc + yd */
  375. a1 = (Yaplusc + Ybplusd);
  376. /* xc' = (xa-xb+xc-xd) */
  377. a2 = (Xaplusc - Xbplusd);
  378. /* yc' = (ya-yb+yc-yd) */
  379. a3 = (Yaplusc - Ybplusd);
  380. /* xb' = (xa+yb-xc-yd) */
  381. a4 = (Xaminusc + Ybminusd);
  382. /* yb' = (ya-xb-yc+xd) */
  383. a5 = (Yaminusc - Xbminusd);
  384. /* xd' = (xa-yb-xc+yd)) */
  385. a6 = (Xaminusc - Ybminusd);
  386. /* yd' = (ya+xb-yc-xd) */
  387. a7 = (Xbminusd + Yaminusc);
  388. ptr1[0] = a0;
  389. ptr1[1] = a1;
  390. ptr1[2] = a2;
  391. ptr1[3] = a3;
  392. ptr1[4] = a4;
  393. ptr1[5] = a5;
  394. ptr1[6] = a6;
  395. ptr1[7] = a7;
  396. /* increment pointer by 8 */
  397. ptr1 += 8U;
  398. } while (--j);
  399. #else
  400. float32_t t1, t2, r1, r2, s1, s2;
  401. /* Initializations for the fft calculation */
  402. n2 = fftLen;
  403. n1 = n2;
  404. for (k = fftLen; k > 1U; k >>= 2U)
  405. {
  406. /* Initializations for the fft calculation */
  407. n1 = n2;
  408. n2 >>= 2U;
  409. ia1 = 0U;
  410. /* FFT Calculation */
  411. j = 0;
  412. do
  413. {
  414. /* index calculation for the coefficients */
  415. ia2 = ia1 + ia1;
  416. ia3 = ia2 + ia1;
  417. co1 = pCoef[ia1 * 2U];
  418. si1 = pCoef[(ia1 * 2U) + 1U];
  419. co2 = pCoef[ia2 * 2U];
  420. si2 = pCoef[(ia2 * 2U) + 1U];
  421. co3 = pCoef[ia3 * 2U];
  422. si3 = pCoef[(ia3 * 2U) + 1U];
  423. /* Twiddle coefficients index modifier */
  424. ia1 = ia1 + twidCoefModifier;
  425. i0 = j;
  426. do
  427. {
  428. /* index calculation for the input as, */
  429. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  430. i1 = i0 + n2;
  431. i2 = i1 + n2;
  432. i3 = i2 + n2;
  433. /* xa + xc */
  434. r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
  435. /* xa - xc */
  436. r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
  437. /* ya + yc */
  438. s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
  439. /* ya - yc */
  440. s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
  441. /* xb + xd */
  442. t1 = pSrc[2U * i1] + pSrc[2U * i3];
  443. /* xa' = xa + xb + xc + xd */
  444. pSrc[2U * i0] = r1 + t1;
  445. /* xa + xc -(xb + xd) */
  446. r1 = r1 - t1;
  447. /* yb + yd */
  448. t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
  449. /* ya' = ya + yb + yc + yd */
  450. pSrc[(2U * i0) + 1U] = s1 + t2;
  451. /* (ya + yc) - (yb + yd) */
  452. s1 = s1 - t2;
  453. /* (yb - yd) */
  454. t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
  455. /* (xb - xd) */
  456. t2 = pSrc[2U * i1] - pSrc[2U * i3];
  457. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  458. pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
  459. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  460. pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
  461. /* (xa - xc) + (yb - yd) */
  462. r1 = r2 + t1;
  463. /* (xa - xc) - (yb - yd) */
  464. r2 = r2 - t1;
  465. /* (ya - yc) - (xb - xd) */
  466. s1 = s2 - t2;
  467. /* (ya - yc) + (xb - xd) */
  468. s2 = s2 + t2;
  469. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  470. pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
  471. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  472. pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
  473. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  474. pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
  475. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  476. pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
  477. i0 += n1;
  478. } while ( i0 < fftLen);
  479. j++;
  480. } while (j <= (n2 - 1U));
  481. twidCoefModifier <<= 2U;
  482. }
  483. #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  484. }
  485. /**
  486. brief Core function for the floating-point CIFFT butterfly process.
  487. param[in,out] pSrc points to the in-place buffer of floating-point data type
  488. param[in] fftLen length of the FFT
  489. param[in] pCoef points to twiddle coefficient buffer
  490. param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  491. param[in] onebyfftLen value of 1/fftLen
  492. return none
  493. */
  494. void arm_radix4_butterfly_inverse_f32(
  495. float32_t * pSrc,
  496. uint16_t fftLen,
  497. const float32_t * pCoef,
  498. uint16_t twidCoefModifier,
  499. float32_t onebyfftLen)
  500. {
  501. float32_t co1, co2, co3, si1, si2, si3;
  502. uint32_t ia1, ia2, ia3;
  503. uint32_t i0, i1, i2, i3;
  504. uint32_t n1, n2, j, k;
  505. #if defined (ARM_MATH_LOOPUNROLL)
  506. float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
  507. float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
  508. Ybminusd;
  509. float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
  510. float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
  511. float32_t *ptr1;
  512. float32_t p0,p1,p2,p3,p4,p5,p6,p7;
  513. float32_t a0,a1,a2,a3,a4,a5,a6,a7;
  514. /* Initializations for the first stage */
  515. n2 = fftLen;
  516. n1 = n2;
  517. /* n2 = fftLen/4 */
  518. n2 >>= 2U;
  519. i0 = 0U;
  520. ia1 = 0U;
  521. j = n2;
  522. /* Calculation of first stage */
  523. do
  524. {
  525. /* index calculation for the input as, */
  526. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  527. i1 = i0 + n2;
  528. i2 = i1 + n2;
  529. i3 = i2 + n2;
  530. /* Butterfly implementation */
  531. xaIn = pSrc[(2U * i0)];
  532. yaIn = pSrc[(2U * i0) + 1U];
  533. xcIn = pSrc[(2U * i2)];
  534. ycIn = pSrc[(2U * i2) + 1U];
  535. xbIn = pSrc[(2U * i1)];
  536. ybIn = pSrc[(2U * i1) + 1U];
  537. xdIn = pSrc[(2U * i3)];
  538. ydIn = pSrc[(2U * i3) + 1U];
  539. /* xa + xc */
  540. Xaplusc = xaIn + xcIn;
  541. /* xb + xd */
  542. Xbplusd = xbIn + xdIn;
  543. /* ya + yc */
  544. Yaplusc = yaIn + ycIn;
  545. /* yb + yd */
  546. Ybplusd = ybIn + ydIn;
  547. /* index calculation for the coefficients */
  548. ia2 = ia1 + ia1;
  549. co2 = pCoef[ia2 * 2U];
  550. si2 = pCoef[(ia2 * 2U) + 1U];
  551. /* xa - xc */
  552. Xaminusc = xaIn - xcIn;
  553. /* xb - xd */
  554. Xbminusd = xbIn - xdIn;
  555. /* ya - yc */
  556. Yaminusc = yaIn - ycIn;
  557. /* yb - yd */
  558. Ybminusd = ybIn - ydIn;
  559. /* xa' = xa + xb + xc + xd */
  560. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  561. /* ya' = ya + yb + yc + yd */
  562. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  563. /* (xa - xc) - (yb - yd) */
  564. Xb12C_out = (Xaminusc - Ybminusd);
  565. /* (ya - yc) + (xb - xd) */
  566. Yb12C_out = (Yaminusc + Xbminusd);
  567. /* (xa + xc) - (xb + xd) */
  568. Xc12C_out = (Xaplusc - Xbplusd);
  569. /* (ya + yc) - (yb + yd) */
  570. Yc12C_out = (Yaplusc - Ybplusd);
  571. /* (xa - xc) + (yb - yd) */
  572. Xd12C_out = (Xaminusc + Ybminusd);
  573. /* (ya - yc) - (xb - xd) */
  574. Yd12C_out = (Yaminusc - Xbminusd);
  575. co1 = pCoef[ia1 * 2U];
  576. si1 = pCoef[(ia1 * 2U) + 1U];
  577. /* index calculation for the coefficients */
  578. ia3 = ia2 + ia1;
  579. co3 = pCoef[ia3 * 2U];
  580. si3 = pCoef[(ia3 * 2U) + 1U];
  581. Xb12_out = Xb12C_out * co1;
  582. Yb12_out = Yb12C_out * co1;
  583. Xc12_out = Xc12C_out * co2;
  584. Yc12_out = Yc12C_out * co2;
  585. Xd12_out = Xd12C_out * co3;
  586. Yd12_out = Yd12C_out * co3;
  587. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  588. //Xb12_out -= Yb12C_out * si1;
  589. p0 = Yb12C_out * si1;
  590. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  591. //Yb12_out += Xb12C_out * si1;
  592. p1 = Xb12C_out * si1;
  593. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  594. //Xc12_out -= Yc12C_out * si2;
  595. p2 = Yc12C_out * si2;
  596. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  597. //Yc12_out += Xc12C_out * si2;
  598. p3 = Xc12C_out * si2;
  599. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  600. //Xd12_out -= Yd12C_out * si3;
  601. p4 = Yd12C_out * si3;
  602. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  603. //Yd12_out += Xd12C_out * si3;
  604. p5 = Xd12C_out * si3;
  605. Xb12_out -= p0;
  606. Yb12_out += p1;
  607. Xc12_out -= p2;
  608. Yc12_out += p3;
  609. Xd12_out -= p4;
  610. Yd12_out += p5;
  611. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  612. pSrc[2U * i1] = Xc12_out;
  613. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  614. pSrc[(2U * i1) + 1U] = Yc12_out;
  615. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  616. pSrc[2U * i2] = Xb12_out;
  617. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  618. pSrc[(2U * i2) + 1U] = Yb12_out;
  619. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  620. pSrc[2U * i3] = Xd12_out;
  621. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  622. pSrc[(2U * i3) + 1U] = Yd12_out;
  623. /* Twiddle coefficients index modifier */
  624. ia1 = ia1 + twidCoefModifier;
  625. /* Updating input index */
  626. i0 = i0 + 1U;
  627. } while (--j);
  628. twidCoefModifier <<= 2U;
  629. /* Calculation of second stage to excluding last stage */
  630. for (k = fftLen >> 2U; k > 4U; k >>= 2U)
  631. {
  632. /* Initializations for the first stage */
  633. n1 = n2;
  634. n2 >>= 2U;
  635. ia1 = 0U;
  636. /* Calculation of first stage */
  637. j = 0;
  638. do
  639. {
  640. /* index calculation for the coefficients */
  641. ia2 = ia1 + ia1;
  642. ia3 = ia2 + ia1;
  643. co1 = pCoef[ia1 * 2U];
  644. si1 = pCoef[(ia1 * 2U) + 1U];
  645. co2 = pCoef[ia2 * 2U];
  646. si2 = pCoef[(ia2 * 2U) + 1U];
  647. co3 = pCoef[ia3 * 2U];
  648. si3 = pCoef[(ia3 * 2U) + 1U];
  649. /* Twiddle coefficients index modifier */
  650. ia1 = ia1 + twidCoefModifier;
  651. i0 = j;
  652. do
  653. {
  654. /* index calculation for the input as, */
  655. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  656. i1 = i0 + n2;
  657. i2 = i1 + n2;
  658. i3 = i2 + n2;
  659. xaIn = pSrc[(2U * i0)];
  660. yaIn = pSrc[(2U * i0) + 1U];
  661. xbIn = pSrc[(2U * i1)];
  662. ybIn = pSrc[(2U * i1) + 1U];
  663. xcIn = pSrc[(2U * i2)];
  664. ycIn = pSrc[(2U * i2) + 1U];
  665. xdIn = pSrc[(2U * i3)];
  666. ydIn = pSrc[(2U * i3) + 1U];
  667. /* xa - xc */
  668. Xaminusc = xaIn - xcIn;
  669. /* (xb - xd) */
  670. Xbminusd = xbIn - xdIn;
  671. /* ya - yc */
  672. Yaminusc = yaIn - ycIn;
  673. /* (yb - yd) */
  674. Ybminusd = ybIn - ydIn;
  675. /* xa + xc */
  676. Xaplusc = xaIn + xcIn;
  677. /* xb + xd */
  678. Xbplusd = xbIn + xdIn;
  679. /* ya + yc */
  680. Yaplusc = yaIn + ycIn;
  681. /* yb + yd */
  682. Ybplusd = ybIn + ydIn;
  683. /* (xa - xc) - (yb - yd) */
  684. Xb12C_out = (Xaminusc - Ybminusd);
  685. /* (ya - yc) + (xb - xd) */
  686. Yb12C_out = (Yaminusc + Xbminusd);
  687. /* xa + xc -(xb + xd) */
  688. Xc12C_out = (Xaplusc - Xbplusd);
  689. /* (ya + yc) - (yb + yd) */
  690. Yc12C_out = (Yaplusc - Ybplusd);
  691. /* (xa - xc) + (yb - yd) */
  692. Xd12C_out = (Xaminusc + Ybminusd);
  693. /* (ya - yc) - (xb - xd) */
  694. Yd12C_out = (Yaminusc - Xbminusd);
  695. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  696. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  697. Xb12_out = Xb12C_out * co1;
  698. Yb12_out = Yb12C_out * co1;
  699. Xc12_out = Xc12C_out * co2;
  700. Yc12_out = Yc12C_out * co2;
  701. Xd12_out = Xd12C_out * co3;
  702. Yd12_out = Yd12C_out * co3;
  703. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  704. //Xb12_out -= Yb12C_out * si1;
  705. p0 = Yb12C_out * si1;
  706. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  707. //Yb12_out += Xb12C_out * si1;
  708. p1 = Xb12C_out * si1;
  709. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  710. //Xc12_out -= Yc12C_out * si2;
  711. p2 = Yc12C_out * si2;
  712. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  713. //Yc12_out += Xc12C_out * si2;
  714. p3 = Xc12C_out * si2;
  715. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  716. //Xd12_out -= Yd12C_out * si3;
  717. p4 = Yd12C_out * si3;
  718. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  719. //Yd12_out += Xd12C_out * si3;
  720. p5 = Xd12C_out * si3;
  721. Xb12_out -= p0;
  722. Yb12_out += p1;
  723. Xc12_out -= p2;
  724. Yc12_out += p3;
  725. Xd12_out -= p4;
  726. Yd12_out += p5;
  727. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  728. pSrc[2U * i1] = Xc12_out;
  729. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  730. pSrc[(2U * i1) + 1U] = Yc12_out;
  731. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  732. pSrc[2U * i2] = Xb12_out;
  733. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  734. pSrc[(2U * i2) + 1U] = Yb12_out;
  735. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  736. pSrc[2U * i3] = Xd12_out;
  737. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  738. pSrc[(2U * i3) + 1U] = Yd12_out;
  739. i0 += n1;
  740. } while (i0 < fftLen);
  741. j++;
  742. } while (j <= (n2 - 1U));
  743. twidCoefModifier <<= 2U;
  744. }
  745. /* Initializations of last stage */
  746. j = fftLen >> 2;
  747. ptr1 = &pSrc[0];
  748. /* Calculations of last stage */
  749. do
  750. {
  751. xaIn = ptr1[0];
  752. yaIn = ptr1[1];
  753. xbIn = ptr1[2];
  754. ybIn = ptr1[3];
  755. xcIn = ptr1[4];
  756. ycIn = ptr1[5];
  757. xdIn = ptr1[6];
  758. ydIn = ptr1[7];
  759. /* Butterfly implementation */
  760. /* xa + xc */
  761. Xaplusc = xaIn + xcIn;
  762. /* xa - xc */
  763. Xaminusc = xaIn - xcIn;
  764. /* ya + yc */
  765. Yaplusc = yaIn + ycIn;
  766. /* ya - yc */
  767. Yaminusc = yaIn - ycIn;
  768. /* xb + xd */
  769. Xbplusd = xbIn + xdIn;
  770. /* yb + yd */
  771. Ybplusd = ybIn + ydIn;
  772. /* (xb-xd) */
  773. Xbminusd = xbIn - xdIn;
  774. /* (yb-yd) */
  775. Ybminusd = ybIn - ydIn;
  776. /* xa' = (xa+xb+xc+xd) * onebyfftLen */
  777. a0 = (Xaplusc + Xbplusd);
  778. /* ya' = (ya+yb+yc+yd) * onebyfftLen */
  779. a1 = (Yaplusc + Ybplusd);
  780. /* xc' = (xa-xb+xc-xd) * onebyfftLen */
  781. a2 = (Xaplusc - Xbplusd);
  782. /* yc' = (ya-yb+yc-yd) * onebyfftLen */
  783. a3 = (Yaplusc - Ybplusd);
  784. /* xb' = (xa-yb-xc+yd) * onebyfftLen */
  785. a4 = (Xaminusc - Ybminusd);
  786. /* yb' = (ya+xb-yc-xd) * onebyfftLen */
  787. a5 = (Yaminusc + Xbminusd);
  788. /* xd' = (xa-yb-xc+yd) * onebyfftLen */
  789. a6 = (Xaminusc + Ybminusd);
  790. /* yd' = (ya-xb-yc+xd) * onebyfftLen */
  791. a7 = (Yaminusc - Xbminusd);
  792. p0 = a0 * onebyfftLen;
  793. p1 = a1 * onebyfftLen;
  794. p2 = a2 * onebyfftLen;
  795. p3 = a3 * onebyfftLen;
  796. p4 = a4 * onebyfftLen;
  797. p5 = a5 * onebyfftLen;
  798. p6 = a6 * onebyfftLen;
  799. p7 = a7 * onebyfftLen;
  800. /* xa' = (xa+xb+xc+xd) * onebyfftLen */
  801. ptr1[0] = p0;
  802. /* ya' = (ya+yb+yc+yd) * onebyfftLen */
  803. ptr1[1] = p1;
  804. /* xc' = (xa-xb+xc-xd) * onebyfftLen */
  805. ptr1[2] = p2;
  806. /* yc' = (ya-yb+yc-yd) * onebyfftLen */
  807. ptr1[3] = p3;
  808. /* xb' = (xa-yb-xc+yd) * onebyfftLen */
  809. ptr1[4] = p4;
  810. /* yb' = (ya+xb-yc-xd) * onebyfftLen */
  811. ptr1[5] = p5;
  812. /* xd' = (xa-yb-xc+yd) * onebyfftLen */
  813. ptr1[6] = p6;
  814. /* yd' = (ya-xb-yc+xd) * onebyfftLen */
  815. ptr1[7] = p7;
  816. /* increment source pointer by 8 for next calculations */
  817. ptr1 = ptr1 + 8U;
  818. } while (--j);
  819. #else
  820. float32_t t1, t2, r1, r2, s1, s2;
  821. /* Initializations for the first stage */
  822. n2 = fftLen;
  823. n1 = n2;
  824. /* Calculation of first stage */
  825. for (k = fftLen; k > 4U; k >>= 2U)
  826. {
  827. /* Initializations for the first stage */
  828. n1 = n2;
  829. n2 >>= 2U;
  830. ia1 = 0U;
  831. /* Calculation of first stage */
  832. j = 0;
  833. do
  834. {
  835. /* index calculation for the coefficients */
  836. ia2 = ia1 + ia1;
  837. ia3 = ia2 + ia1;
  838. co1 = pCoef[ia1 * 2U];
  839. si1 = pCoef[(ia1 * 2U) + 1U];
  840. co2 = pCoef[ia2 * 2U];
  841. si2 = pCoef[(ia2 * 2U) + 1U];
  842. co3 = pCoef[ia3 * 2U];
  843. si3 = pCoef[(ia3 * 2U) + 1U];
  844. /* Twiddle coefficients index modifier */
  845. ia1 = ia1 + twidCoefModifier;
  846. i0 = j;
  847. do
  848. {
  849. /* index calculation for the input as, */
  850. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  851. i1 = i0 + n2;
  852. i2 = i1 + n2;
  853. i3 = i2 + n2;
  854. /* xa + xc */
  855. r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
  856. /* xa - xc */
  857. r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
  858. /* ya + yc */
  859. s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
  860. /* ya - yc */
  861. s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
  862. /* xb + xd */
  863. t1 = pSrc[2U * i1] + pSrc[2U * i3];
  864. /* xa' = xa + xb + xc + xd */
  865. pSrc[2U * i0] = r1 + t1;
  866. /* xa + xc -(xb + xd) */
  867. r1 = r1 - t1;
  868. /* yb + yd */
  869. t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
  870. /* ya' = ya + yb + yc + yd */
  871. pSrc[(2U * i0) + 1U] = s1 + t2;
  872. /* (ya + yc) - (yb + yd) */
  873. s1 = s1 - t2;
  874. /* (yb - yd) */
  875. t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
  876. /* (xb - xd) */
  877. t2 = pSrc[2U * i1] - pSrc[2U * i3];
  878. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  879. pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
  880. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  881. pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
  882. /* (xa - xc) - (yb - yd) */
  883. r1 = r2 - t1;
  884. /* (xa - xc) + (yb - yd) */
  885. r2 = r2 + t1;
  886. /* (ya - yc) + (xb - xd) */
  887. s1 = s2 + t2;
  888. /* (ya - yc) - (xb - xd) */
  889. s2 = s2 - t2;
  890. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  891. pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
  892. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  893. pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
  894. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  895. pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
  896. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  897. pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
  898. i0 += n1;
  899. } while ( i0 < fftLen);
  900. j++;
  901. } while (j <= (n2 - 1U));
  902. twidCoefModifier <<= 2U;
  903. }
  904. /* Initializations of last stage */
  905. n1 = n2;
  906. n2 >>= 2U;
  907. /* Calculations of last stage */
  908. for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
  909. {
  910. /* index calculation for the input as, */
  911. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  912. i1 = i0 + n2;
  913. i2 = i1 + n2;
  914. i3 = i2 + n2;
  915. /* Butterfly implementation */
  916. /* xa + xc */
  917. r1 = pSrc[2U * i0] + pSrc[2U * i2];
  918. /* xa - xc */
  919. r2 = pSrc[2U * i0] - pSrc[2U * i2];
  920. /* ya + yc */
  921. s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
  922. /* ya - yc */
  923. s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
  924. /* xc + xd */
  925. t1 = pSrc[2U * i1] + pSrc[2U * i3];
  926. /* xa' = xa + xb + xc + xd */
  927. pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
  928. /* (xa + xb) - (xc + xd) */
  929. r1 = r1 - t1;
  930. /* yb + yd */
  931. t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
  932. /* ya' = ya + yb + yc + yd */
  933. pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
  934. /* (ya + yc) - (yb + yd) */
  935. s1 = s1 - t2;
  936. /* (yb-yd) */
  937. t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
  938. /* (xb-xd) */
  939. t2 = pSrc[2U * i1] - pSrc[2U * i3];
  940. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  941. pSrc[2U * i1] = r1 * onebyfftLen;
  942. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  943. pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
  944. /* (xa - xc) - (yb-yd) */
  945. r1 = r2 - t1;
  946. /* (xa - xc) + (yb-yd) */
  947. r2 = r2 + t1;
  948. /* (ya - yc) + (xb-xd) */
  949. s1 = s2 + t2;
  950. /* (ya - yc) - (xb-xd) */
  951. s2 = s2 - t2;
  952. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  953. pSrc[2U * i2] = r1 * onebyfftLen;
  954. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  955. pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
  956. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  957. pSrc[2U * i3] = r2 * onebyfftLen;
  958. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  959. pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
  960. }
  961. #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  962. }