@@ -871,115 +871,110 @@ END SUBROUTINE H2Coeffs
871871! =======================================================================
872872! > This routine takes the Fourier coefficients and converts them to velocity
873873! ! note that the resulting time series has zero mean.
874+ ! !
875+ ! ! OPENMP - The FFT_Data data structure is problematic for some Fortran compilers when used in
876+ ! ! the `!$OMP PRIVATE(FFT_Data)` context. Since this is contains work data with will be
877+ ! ! specific to a given ApplyFFT call, it must be kept private inside the `!$OMP PARALLEL DO`
878+ ! ! construct. So we must call `InitFFT` inside a given OMP thread. This limits us to only
879+ ! ! doing parallelization around the UVW directions, and not across the NPoints loop as well
880+ ! ! where we could potentially see big gains in performance. If we did move the InitFFT call
881+ ! ! inside the NPoint loop, we would likely see performance drop with all the extra allocations
882+ ! ! as well as a full doubling of memory needed (which is already a big bottleneck).
883+ ! ! It would be possible to limit the InitFFT calls in when OpenMP is not used with `#ifdef
884+ ! ! _OPENMP`, but that was an unreadable mess when I tried it.
874885SUBROUTINE Coeffs2TimeSeries ( V , NumSteps , NPoints , NUsrPoints , ErrStat , ErrMsg )
875-
876-
877- ! USE NWTC_FFTPACK
878-
879- IMPLICIT NONE
880-
881-
882- ! passed variables
883886 INTEGER (IntKi), INTENT (IN ) :: NumSteps ! < Size of dimension 1 of V (number of time steps)
884887 INTEGER (IntKi), INTENT (IN ) :: NPoints ! < Size of dimension 2 of V (number of grid points)
885888 INTEGER (IntKi), INTENT (IN ) :: NUsrPoints ! < number of user-defined time series
886-
887889 REAL (ReKi), INTENT (INOUT ) :: V (NumSteps,NPoints,3 ) ! < An array containing the summations of the rows of H (NumSteps,NPoints,3).
888-
889890 INTEGER (IntKi), intent ( out ) :: ErrStat ! < Error level
890891 CHARACTER (* ), intent ( out ) :: ErrMsg ! < Message describing error
891892
892-
893- ! local variables
894893 TYPE (FFT_DataType) :: FFT_Data ! data for applying FFT
895894 REAL (SiKi), ALLOCATABLE :: Work ( : ) ! working array to hold coefficients of fft !bjj: made it allocatable so it doesn't take stack space
896-
897-
898- INTEGER (IntKi) :: ITime ! loop counter for time step/frequency
899895 INTEGER (IntKi) :: IVec ! loop counter for velocity components
900896 INTEGER (IntKi) :: IPoint ! loop counter for grid points
901-
897+ logical :: ExitOMPlooping ! flag to indicate skipping other loops
902898 INTEGER (IntKi) :: ErrStat2 ! Error level (local)
903- ! CHARACTER(MaxMsgLen) :: ErrMsg2 ! Message describing error (local)
904899
905900
906- ! initialize variables
907-
908- ! ErrStat = ErrID_None
909- ! ErrMsg = ""
910-
911901 CALL AllocAry(Work, NumSteps, ' Work' ,ErrStat,ErrMsg)
912902 if (ErrStat >= AbortErrLev) return
913903
914- ! Allocate the FFT working storage and initialize its variables
915-
916- CALL InitFFT( NumSteps, FFT_Data, ErrStat= ErrStat2 )
917- CALL SetErrStat(ErrStat2, ' Error in InitFFT' , ErrStat, ErrMsg, ' Coeffs2TimeSeries' )
918- IF (ErrStat >= AbortErrLev) THEN
919- CALL Cleanup()
920- RETURN
921- END IF
922-
923-
924904 ! Get the stationary-point time series.
925905
926- CALL WrScr ( ' Generating time series for all points:' )
906+ CALL WrScr ( ' Generating time series for all points:' )
927907
928- DO IVec= 1 ,3
908+ ! Since we are potentially using OpenMP here, we cannot
909+ ExitOMPlooping = .false.
929910
930- CALL WrScr ( ' ' // Comp(IVec)// ' -component' )
911+ ! $OMP PARALLEL DO &
912+ ! $OMP PRIVATE(iVec, IPoint, Work, FFT_Data, ErrStat2)&
913+ ! $OMP SHARED(NPoints, NumSteps, NUsrPoints, V, errStat, errMsg, AbortErrLev, ExitOMPlooping) DEFAULT(NONE)
914+ DO IVec= 1 ,3
931915
932- DO IPoint= 1 ,NPoints ! NTotB
916+ ! make sure we didn't have a failure on prior OMP loop
917+ if (ExitOMPlooping) cycle
933918
934- ! Overwrite the first point with zero. This sets the real (and
935- ! imaginary) part of the steady-state value to zero so that we
936- ! can add in the mean value later.
919+ CALL WrScr ( ' ' // Comp(IVec)// ' -component' )
937920
938- Work(1 ) = 0.0_ReKi
921+ ! The FFT_Data is not thread safe with the allocation inside.
922+ CALL InitFFT( NumSteps, FFT_Data, ErrStat= ErrStat2 )
923+ ! $OMP CRITICAL ! Needed to avoid data race on ErrStat and ErrMsg
924+ CALL SetErrStat(ErrStat2, ' Error in InitFFT' , ErrStat, ErrMsg, ' Coeffs2TimeSeries' )
925+ ! $OMP END CRITICAL
939926
940- ! DO ITime = 2,NumSteps-1
941- DO ITime = 2 ,NumSteps
942- Work(ITime) = V(ITime-1 , IPoint, IVec)
943- ENDDO ! ITime
944-
945- IF (iPoint > NUsrPoints) THEN
946- ! BJJ: we can't override this for the user-input spectra or we don't get the correct time series out.
947- ! Per JMJ, I will keep this here for the other points, but I personally think it could be skipped, too.
927+ ! Proceed only if the InitFFT worked.
928+ ! NOTE: this is to allow for OpenMP - can't return from inside loop
929+ if (ErrStat2 < AbortErrLev) then ! check ErrStat2 for this OMPthread
930+ DO IPoint= 1 ,NPoints ! NTotB
931+ ! Overwrite the first point with zero. This sets the real (and
932+ ! imaginary) part of the steady-state value to zero so that we
933+ ! can add in the mean value later.
934+ Work(1 ) = 0.0_ReKi
935+ Work(2 :NumSteps) = V(1 :NumSteps-1 , IPoint, IVec)
948936
949- ! Now, let's add a complex zero to the end to set the power in the Nyquist
950- ! frequency to zero.
937+ IF (iPoint > NUsrPoints) THEN
938+ ! BJJ: we can't override this for the user-input spectra or we don't get the correct time series out.
939+ ! Per JMJ, I will keep this here for the other points, but I personally think it could be skipped, too.
940+
941+ ! Now, let's add a complex zero to the end to set the power in the Nyquist
942+ ! frequency to zero.
943+ Work(NumSteps) = 0.0
944+ END IF
951945
952- Work(NumSteps) = 0.0
953- END IF
954-
955-
956-
957- ! perform FFT
958-
959- CALL ApplyFFT( Work, FFT_Data, ErrStat2 )
960- IF (ErrStat2 /= ErrID_None ) THEN
961- CALL SetErrStat(ErrStat2, ' Error in ApplyFFT for point ' // TRIM (Num2LStr(IPoint))// ' .' , ErrStat, ErrMsg, ' Coeffs2TimeSeries' )
962- IF (ErrStat >= AbortErrLev) EXIT
963- END IF
964-
965- V(:,IPoint,IVec) = Work
966-
967- ENDDO ! IPoint
968-
969- ENDDO ! IVec
970-
971- CALL Cleanup()
946+ ! perform FFT
947+ CALL ApplyFFT( Work, FFT_Data, ErrStat2 )
948+ IF (ErrStat2 /= ErrID_None ) THEN
949+ ! $OMP CRITICAL ! Needed to avoid data race on ErrStat and ErrMsg
950+ CALL SetErrStat(ErrStat2, ' Error in ApplyFFT for point ' // TRIM (Num2LStr(IPoint))// ' .' , ErrStat, ErrMsg, ' Coeffs2TimeSeries' )
951+ ! $OMP END CRITICAL
952+ IF (ErrStat >= AbortErrLev) EXIT
953+ END IF
954+
955+ V(:,IPoint,IVec) = Work
956+ ENDDO ! IPoint
957+
958+ ! Clean up memory from FFT_Data.
959+ CALL ExitFFT( FFT_Data, ErrStat2 )
960+ ! $OMP CRITICAL ! Needed to avoid data race on ErrStat and ErrMsg
961+ CALL SetErrStat(ErrStat2, ' Error in ExitFFT' , ErrStat, ErrMsg, ' Coeffs2TimeSeries' )
962+ ! $OMP END CRITICAL
963+
964+ ! skip further OMP loops if any sequential (or if OMP not used).
965+ ! NOTE: OMP doesn't allow return inside OMP thread
966+ if (ErrStat2 >= AbortErrLev) ExitOMPlooping = .true.
967+ endif
968+ ENDDO ! IVec
969+ ! $OMP END PARALLEL DO
972970
973- RETURN
974- CONTAINS
975- ! ...........................................
976- SUBROUTINE Cleanup ()
977-
978- CALL ExitFFT( FFT_Data, ErrStat2 )
979- CALL SetErrStat(ErrStat2, ' Error in ExitFFT' , ErrStat, ErrMsg, ' Coeffs2TimeSeries' )
971+ CALL Cleanup()
980972
981- if (allocated (work)) deallocate (work)
982-
973+ RETURN
974+ CONTAINS
975+ ! ...........................................
976+ SUBROUTINE Cleanup ()
977+ if (allocated (work)) deallocate (work)
983978 END SUBROUTINE Cleanup
984979END SUBROUTINE Coeffs2TimeSeries
985980! =======================================================================
0 commit comments