-
Notifications
You must be signed in to change notification settings - Fork 1k
add floyd-rivest selection and parallelization to gmedian #7481
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
df05b9d
36742ea
14cd5fe
6d94f8c
9c0b64d
19f0b8d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -880,43 +880,68 @@ SEXP gmedian(SEXP x, SEXP narmArg) { | |
| const bool nosubset = irowslen==-1; | ||
| switch(TYPEOF(x)) { | ||
| case REALSXP: { | ||
| double *subd = REAL(PROTECT(allocVector(REALSXP, maxgrpn))); // allocate once upfront and reuse | ||
| int64_t *xi64 = (int64_t *)REAL(x); | ||
| double *xd = REAL(x); | ||
| for (int i=0; i<ngrp; ++i) { | ||
| int thisgrpsize = grpsize[i], nacount=0; | ||
| for (int j=0; j<thisgrpsize; ++j) { | ||
| int k = ff[i]+j-1; | ||
| if (isunsorted) k = oo[k]-1; | ||
| k = nosubset ? k : (irows[k]==NA_INTEGER ? NA_INTEGER : irows[k]-1); | ||
| if (k==NA_INTEGER || (isInt64 ? xi64[k]==NA_INTEGER64 : ISNAN(xd[k]))) nacount++; | ||
| else subd[j-nacount] = xd[k]; | ||
| #pragma omp parallel num_threads(getDTthreads(ngrp, true)) | ||
| { | ||
| double *thread_subd = malloc(maxgrpn * sizeof(double)); | ||
| if (!thread_subd) error(_("Unable to allocate thread-local buffer in gmedian")); // # nocov | ||
| #pragma omp for | ||
| for (int i=0; i<ngrp; ++i) { | ||
| int thisgrpsize = grpsize[i], nacount=0; | ||
| for (int j=0; j<thisgrpsize; ++j) { | ||
| int k = ff[i]+j-1; | ||
| if (isunsorted) k = oo[k]-1; | ||
| k = nosubset ? k : (irows[k]==NA_INTEGER ? NA_INTEGER : irows[k]-1); | ||
| if (k==NA_INTEGER || (isInt64 ? xi64[k]==NA_INTEGER64 : ISNAN(xd[k]))) nacount++; | ||
| else thread_subd[j-nacount] = xd[k]; | ||
| } | ||
| thisgrpsize -= nacount; // all-NA is returned as NA_REAL via n==0 case inside *quickselect | ||
| if (nacount && !narm) { | ||
| ansd[i] = NA_REAL; | ||
| } else { | ||
| // Use Floyd-Rivest for groups larger than 100, quickselect for smaller | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it make sense to put 100 as a parameter, so we can explore impact of this code path branches by options
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could do. Like all of these parameters, it should be tuned. |
||
| ansd[i] = isInt64 ? | ||
| (thisgrpsize > 100 ? i64floyd_rivest((int64_t *)thread_subd, thisgrpsize) : i64quickselect((int64_t *)thread_subd, thisgrpsize)) : | ||
| (thisgrpsize > 100 ? dfloyd_rivest(thread_subd, thisgrpsize) : dquickselect(thread_subd, thisgrpsize)); | ||
| } | ||
| } | ||
| thisgrpsize -= nacount; // all-NA is returned as NA_REAL via n==0 case inside *quickselect | ||
| ansd[i] = (nacount && !narm) ? NA_REAL : (isInt64 ? i64quickselect((void *)subd, thisgrpsize) : dquickselect(subd, thisgrpsize)); | ||
| free(thread_subd); | ||
| }} | ||
| break; | ||
| case LGLSXP: case INTSXP: { | ||
| int *subi = INTEGER(PROTECT(allocVector(INTSXP, maxgrpn))); | ||
| int *xi = INTEGER(x); | ||
| for (int i=0; i<ngrp; i++) { | ||
| const int thisgrpsize = grpsize[i]; | ||
| int nacount=0; | ||
| for (int j=0; j<thisgrpsize; ++j) { | ||
| int k = ff[i]+j-1; | ||
| if (isunsorted) k = oo[k]-1; | ||
| if (nosubset ? xi[k]==NA_INTEGER : (irows[k]==NA_INTEGER || (k=irows[k]-1,xi[k]==NA_INTEGER))) nacount++; | ||
| else subi[j-nacount] = xi[k]; | ||
| #pragma omp parallel num_threads(getDTthreads(ngrp, true)) | ||
| { | ||
| int *thread_subi = malloc(maxgrpn * sizeof(int)); | ||
| if (!thread_subi) error(_("Unable to allocate thread-local buffer in gmedian")); // # nocov | ||
| #pragma omp for | ||
| for (int i=0; i<ngrp; i++) { | ||
| const int thisgrpsize = grpsize[i]; | ||
| int nacount=0; | ||
| for (int j=0; j<thisgrpsize; ++j) { | ||
| int k = ff[i]+j-1; | ||
| if (isunsorted) k = oo[k]-1; | ||
| if (nosubset ? xi[k]==NA_INTEGER : (irows[k]==NA_INTEGER || (k=irows[k]-1,xi[k]==NA_INTEGER))) nacount++; | ||
| else thread_subi[j-nacount] = xi[k]; | ||
| } | ||
| if (nacount && !narm) { | ||
| ansd[i] = NA_REAL; | ||
| } else { | ||
| const int validsize = thisgrpsize-nacount; | ||
| // Use Floyd-Rivest for groups larger than 100, quickselect for smaller | ||
| ansd[i] = validsize > 100 ? ifloyd_rivest(thread_subi, validsize) : iquickselect(thread_subi, validsize); | ||
| } | ||
| } | ||
| ansd[i] = (nacount && !narm) ? NA_REAL : iquickselect(subi, thisgrpsize-nacount); | ||
| free(thread_subi); | ||
| }} | ||
| break; | ||
| default: | ||
| error(_("Type '%s' is not supported by GForce %s. Either add the prefix %s or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)), "median (gmedian)", "stats::median(.)"); | ||
| } | ||
| if (!isInt64) copyMostAttrib(x, ans); | ||
| // else the integer64 class needs to be dropped since double is always returned by gmedian | ||
| UNPROTECT(2); // ans, subd|subi | ||
| UNPROTECT(1); // ans | ||
| return ans; | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it make sense to allocate outside of parallel and use R alloc, so the memory usage can be tracked by apps that track memory usage using R API.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, will do.