Skip to content

Commit 1184401

Browse files
committed
TurbSim: add OMP directives on Coeffs2TimeSeries
1 parent 436a4ad commit 1184401

File tree

1 file changed

+74
-79
lines changed

1 file changed

+74
-79
lines changed

modules/turbsim/src/TSsubs.f90

Lines changed: 74 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -871,115 +871,110 @@ END SUBROUTINE H2Coeffs
871871
!=======================================================================
872872
!> This routine takes the Fourier coefficients and converts them to velocity
873873
!! note that the resulting time series has zero mean.
874+
!!
875+
!! OPENMP - The FFT_Data data structure is problematic for some Fortran compilers when used in
876+
!! the `!$OMP PRIVATE(FFT_Data)` context. Since this is contains work data with will be
877+
!! specific to a given ApplyFFT call, it must be kept private inside the `!$OMP PARALLEL DO`
878+
!! construct. So we must call `InitFFT` inside a given OMP thread. This limits us to only
879+
!! doing parallelization around the UVW directions, and not across the NPoints loop as well
880+
!! where we could potentially see big gains in performance. If we did move the InitFFT call
881+
!! inside the NPoint loop, we would likely see performance drop with all the extra allocations
882+
!! as well as a full doubling of memory needed (which is already a big bottleneck).
883+
!! It would be possible to limit the InitFFT calls in when OpenMP is not used with `#ifdef
884+
!! _OPENMP`, but that was an unreadable mess when I tried it.
874885
SUBROUTINE Coeffs2TimeSeries( V, NumSteps, NPoints, NUsrPoints, ErrStat, ErrMsg )
875-
876-
877-
!USE NWTC_FFTPACK
878-
879-
IMPLICIT NONE
880-
881-
882-
! passed variables
883886
INTEGER(IntKi), INTENT(IN) :: NumSteps !< Size of dimension 1 of V (number of time steps)
884887
INTEGER(IntKi), INTENT(IN) :: NPoints !< Size of dimension 2 of V (number of grid points)
885888
INTEGER(IntKi), INTENT(IN) :: NUsrPoints !< number of user-defined time series
886-
887889
REAL(ReKi), INTENT(INOUT) :: V (NumSteps,NPoints,3) !< An array containing the summations of the rows of H (NumSteps,NPoints,3).
888-
889890
INTEGER(IntKi), intent( out) :: ErrStat !< Error level
890891
CHARACTER(*), intent( out) :: ErrMsg !< Message describing error
891892

892-
893-
! local variables
894893
TYPE(FFT_DataType) :: FFT_Data ! data for applying FFT
895894
REAL(SiKi), ALLOCATABLE :: Work ( : ) ! working array to hold coefficients of fft !bjj: made it allocatable so it doesn't take stack space
896-
897-
898-
INTEGER(IntKi) :: ITime ! loop counter for time step/frequency
899895
INTEGER(IntKi) :: IVec ! loop counter for velocity components
900896
INTEGER(IntKi) :: IPoint ! loop counter for grid points
901-
897+
logical :: ExitOMPlooping ! flag to indicate skipping other loops
902898
INTEGER(IntKi) :: ErrStat2 ! Error level (local)
903-
!CHARACTER(MaxMsgLen) :: ErrMsg2 ! Message describing error (local)
904899

905900

906-
! initialize variables
907-
908-
!ErrStat = ErrID_None
909-
!ErrMsg = ""
910-
911901
CALL AllocAry(Work, NumSteps, 'Work',ErrStat,ErrMsg)
912902
if (ErrStat >= AbortErrLev) return
913903

914-
! Allocate the FFT working storage and initialize its variables
915-
916-
CALL InitFFT( NumSteps, FFT_Data, ErrStat=ErrStat2 )
917-
CALL SetErrStat(ErrStat2, 'Error in InitFFT', ErrStat, ErrMsg, 'Coeffs2TimeSeries' )
918-
IF (ErrStat >= AbortErrLev) THEN
919-
CALL Cleanup()
920-
RETURN
921-
END IF
922-
923-
924904
! Get the stationary-point time series.
925905

926-
CALL WrScr ( ' Generating time series for all points:' )
906+
CALL WrScr ( ' Generating time series for all points:' )
927907

928-
DO IVec=1,3
908+
! Since we are potentially using OpenMP here, we cannot
909+
ExitOMPlooping = .false.
929910

930-
CALL WrScr ( ' '//Comp(IVec)//'-component' )
911+
!$OMP PARALLEL DO &
912+
!$OMP PRIVATE(iVec, IPoint, Work, FFT_Data, ErrStat2)&
913+
!$OMP SHARED(NPoints, NumSteps, NUsrPoints, V, errStat, errMsg, AbortErrLev, ExitOMPlooping) DEFAULT(NONE)
914+
DO IVec=1,3
931915

932-
DO IPoint=1,NPoints !NTotB
916+
! make sure we didn't have a failure on prior OMP loop
917+
if (ExitOMPlooping) cycle
933918

934-
! Overwrite the first point with zero. This sets the real (and
935-
! imaginary) part of the steady-state value to zero so that we
936-
! can add in the mean value later.
919+
CALL WrScr ( ' '//Comp(IVec)//'-component' )
937920

938-
Work(1) = 0.0_ReKi
921+
! The FFT_Data is not thread safe with the allocation inside.
922+
CALL InitFFT( NumSteps, FFT_Data, ErrStat=ErrStat2 )
923+
!$OMP CRITICAL ! Needed to avoid data race on ErrStat and ErrMsg
924+
CALL SetErrStat(ErrStat2, 'Error in InitFFT', ErrStat, ErrMsg, 'Coeffs2TimeSeries' )
925+
!$OMP END CRITICAL
939926

940-
! DO ITime = 2,NumSteps-1
941-
DO ITime = 2,NumSteps
942-
Work(ITime) = V(ITime-1, IPoint, IVec)
943-
ENDDO ! ITime
944-
945-
IF (iPoint > NUsrPoints) THEN
946-
! BJJ: we can't override this for the user-input spectra or we don't get the correct time series out.
947-
! Per JMJ, I will keep this here for the other points, but I personally think it could be skipped, too.
927+
! Proceed only if the InitFFT worked.
928+
! NOTE: this is to allow for OpenMP - can't return from inside loop
929+
if (ErrStat2 < AbortErrLev) then ! check ErrStat2 for this OMPthread
930+
DO IPoint=1,NPoints !NTotB
931+
! Overwrite the first point with zero. This sets the real (and
932+
! imaginary) part of the steady-state value to zero so that we
933+
! can add in the mean value later.
934+
Work(1) = 0.0_ReKi
935+
Work(2:NumSteps) = V(1:NumSteps-1, IPoint, IVec)
948936

949-
! Now, let's add a complex zero to the end to set the power in the Nyquist
950-
! frequency to zero.
937+
IF (iPoint > NUsrPoints) THEN
938+
! BJJ: we can't override this for the user-input spectra or we don't get the correct time series out.
939+
! Per JMJ, I will keep this here for the other points, but I personally think it could be skipped, too.
940+
941+
! Now, let's add a complex zero to the end to set the power in the Nyquist
942+
! frequency to zero.
943+
Work(NumSteps) = 0.0
944+
END IF
951945

952-
Work(NumSteps) = 0.0
953-
END IF
954-
955-
956-
957-
! perform FFT
958-
959-
CALL ApplyFFT( Work, FFT_Data, ErrStat2 )
960-
IF (ErrStat2 /= ErrID_None ) THEN
961-
CALL SetErrStat(ErrStat2, 'Error in ApplyFFT for point '//TRIM(Num2LStr(IPoint))//'.', ErrStat, ErrMsg, 'Coeffs2TimeSeries' )
962-
IF (ErrStat >= AbortErrLev) EXIT
963-
END IF
964-
965-
V(:,IPoint,IVec) = Work
966-
967-
ENDDO ! IPoint
968-
969-
ENDDO ! IVec
970-
971-
CALL Cleanup()
946+
! perform FFT
947+
CALL ApplyFFT( Work, FFT_Data, ErrStat2 )
948+
IF (ErrStat2 /= ErrID_None ) THEN
949+
!$OMP CRITICAL ! Needed to avoid data race on ErrStat and ErrMsg
950+
CALL SetErrStat(ErrStat2, 'Error in ApplyFFT for point '//TRIM(Num2LStr(IPoint))//'.', ErrStat, ErrMsg, 'Coeffs2TimeSeries' )
951+
!$OMP END CRITICAL
952+
IF (ErrStat >= AbortErrLev) EXIT
953+
END IF
954+
955+
V(:,IPoint,IVec) = Work
956+
ENDDO ! IPoint
957+
958+
! Clean up memory from FFT_Data.
959+
CALL ExitFFT( FFT_Data, ErrStat2 )
960+
!$OMP CRITICAL ! Needed to avoid data race on ErrStat and ErrMsg
961+
CALL SetErrStat(ErrStat2, 'Error in ExitFFT', ErrStat, ErrMsg, 'Coeffs2TimeSeries' )
962+
!$OMP END CRITICAL
963+
964+
! skip further OMP loops if any sequential (or if OMP not used).
965+
! NOTE: OMP doesn't allow return inside OMP thread
966+
if (ErrStat2 >= AbortErrLev) ExitOMPlooping = .true.
967+
endif
968+
ENDDO ! IVec
969+
!$OMP END PARALLEL DO
972970

973-
RETURN
974-
CONTAINS
975-
!...........................................
976-
SUBROUTINE Cleanup()
977-
978-
CALL ExitFFT( FFT_Data, ErrStat2 )
979-
CALL SetErrStat(ErrStat2, 'Error in ExitFFT', ErrStat, ErrMsg, 'Coeffs2TimeSeries' )
971+
CALL Cleanup()
980972

981-
if (allocated(work)) deallocate(work)
982-
973+
RETURN
974+
CONTAINS
975+
!...........................................
976+
SUBROUTINE Cleanup()
977+
if (allocated(work)) deallocate(work)
983978
END SUBROUTINE Cleanup
984979
END SUBROUTINE Coeffs2TimeSeries
985980
!=======================================================================

0 commit comments

Comments
 (0)