19#include "kmp_collapse.h"
22#include "ompt-specific.h"
29template <
typename T> T __kmp_abs(
const T val) {
30 return (val < 0) ? -val : val;
32kmp_uint32 __kmp_abs(
const kmp_uint32 val) {
return val; }
33kmp_uint64 __kmp_abs(
const kmp_uint64 val) {
return val; }
39template <
typename T>
int __kmp_sign(T val) {
40 return (T(0) < val) - (val < T(0));
43template <
typename T>
class CollapseAllocator {
47 static const size_t allocaSize = 32;
49 char stackAlloc[allocaSize];
50 static constexpr size_t maxElemCount = allocaSize /
sizeof(T);
54 CollapseAllocator(
size_t n) : pTAlloc(reinterpret_cast<pT>(stackAlloc)) {
55 if (n > maxElemCount) {
56 pTAlloc =
reinterpret_cast<pT
>(__kmp_allocate(n *
sizeof(T)));
59 ~CollapseAllocator() {
60 if (pTAlloc !=
reinterpret_cast<pT
>(stackAlloc)) {
64 T &operator[](
int index) {
return pTAlloc[index]; }
65 operator const pT() {
return pTAlloc; }
76void kmp_canonicalize_one_loop_XX(
80 if (__kmp_env_consistency_check) {
81 if (bounds->step == 0) {
82 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
87 if (bounds->comparison == comparison_t::comp_not_eq) {
89 if (bounds->step > 0) {
90 bounds->comparison = comparison_t::comp_less;
92 bounds->comparison = comparison_t::comp_greater;
96 if (bounds->comparison == comparison_t::comp_less) {
101 bounds->comparison = comparison_t::comp_less_or_eq;
102 }
else if (bounds->comparison == comparison_t::comp_greater) {
104 bounds->comparison = comparison_t::comp_greater_or_eq;
109void kmp_canonicalize_loop_nest(
ident_t *loc,
113 for (kmp_index_t ind = 0; ind < n; ++ind) {
114 auto bounds = &(original_bounds_nest[ind]);
116 switch (bounds->loop_type) {
117 case loop_type_t::loop_type_int32:
118 kmp_canonicalize_one_loop_XX<kmp_int32>(
122 case loop_type_t::loop_type_uint32:
123 kmp_canonicalize_one_loop_XX<kmp_uint32>(
127 case loop_type_t::loop_type_int64:
128 kmp_canonicalize_one_loop_XX<kmp_int64>(
132 case loop_type_t::loop_type_uint64:
133 kmp_canonicalize_one_loop_XX<kmp_uint64>(
153kmp_loop_nest_iv_t kmp_calculate_trip_count_XX(
156 if (bounds->comparison == comparison_t::comp_less_or_eq) {
157 if (bounds->ub0 < bounds->lb0) {
160 bounds->trip_count = 0;
165 static_cast<kmp_loop_nest_iv_t
>(bounds->ub0 - bounds->lb0) /
166 __kmp_abs(bounds->step) +
169 }
else if (bounds->comparison == comparison_t::comp_greater_or_eq) {
170 if (bounds->lb0 < bounds->ub0) {
173 bounds->trip_count = 0;
178 static_cast<kmp_loop_nest_iv_t
>(bounds->lb0 - bounds->ub0) /
179 __kmp_abs(bounds->step) +
185 return bounds->trip_count;
189kmp_loop_nest_iv_t kmp_calculate_trip_count(
bounds_info_t *bounds) {
191 kmp_loop_nest_iv_t trip_count = 0;
193 switch (bounds->loop_type) {
194 case loop_type_t::loop_type_int32:
195 trip_count = kmp_calculate_trip_count_XX<kmp_int32>(
198 case loop_type_t::loop_type_uint32:
199 trip_count = kmp_calculate_trip_count_XX<kmp_uint32>(
202 case loop_type_t::loop_type_int64:
203 trip_count = kmp_calculate_trip_count_XX<kmp_int64>(
206 case loop_type_t::loop_type_uint64:
207 trip_count = kmp_calculate_trip_count_XX<kmp_uint64>(
222kmp_uint64 kmp_fix_iv(loop_type_t loop_iv_type, kmp_uint64 original_iv) {
225 switch (loop_iv_type) {
226 case loop_type_t::loop_type_int8:
227 res =
static_cast<kmp_uint64
>(
static_cast<kmp_int8
>(original_iv));
229 case loop_type_t::loop_type_uint8:
230 res =
static_cast<kmp_uint64
>(
static_cast<kmp_uint8
>(original_iv));
232 case loop_type_t::loop_type_int16:
233 res =
static_cast<kmp_uint64
>(
static_cast<kmp_int16
>(original_iv));
235 case loop_type_t::loop_type_uint16:
236 res =
static_cast<kmp_uint64
>(
static_cast<kmp_uint16
>(original_iv));
238 case loop_type_t::loop_type_int32:
239 res =
static_cast<kmp_uint64
>(
static_cast<kmp_int32
>(original_iv));
241 case loop_type_t::loop_type_uint32:
242 res =
static_cast<kmp_uint64
>(
static_cast<kmp_uint32
>(original_iv));
244 case loop_type_t::loop_type_int64:
245 res =
static_cast<kmp_uint64
>(
static_cast<kmp_int64
>(original_iv));
247 case loop_type_t::loop_type_uint64:
248 res =
static_cast<kmp_uint64
>(original_iv);
259bool kmp_ivs_eq(loop_type_t loop_iv_type, kmp_uint64 original_iv1,
260 kmp_uint64 original_iv2) {
263 switch (loop_iv_type) {
264 case loop_type_t::loop_type_int8:
265 res =
static_cast<kmp_int8
>(original_iv1) ==
266 static_cast<kmp_int8
>(original_iv2);
268 case loop_type_t::loop_type_uint8:
269 res =
static_cast<kmp_uint8
>(original_iv1) ==
270 static_cast<kmp_uint8
>(original_iv2);
272 case loop_type_t::loop_type_int16:
273 res =
static_cast<kmp_int16
>(original_iv1) ==
274 static_cast<kmp_int16
>(original_iv2);
276 case loop_type_t::loop_type_uint16:
277 res =
static_cast<kmp_uint16
>(original_iv1) ==
278 static_cast<kmp_uint16
>(original_iv2);
280 case loop_type_t::loop_type_int32:
281 res =
static_cast<kmp_int32
>(original_iv1) ==
282 static_cast<kmp_int32
>(original_iv2);
284 case loop_type_t::loop_type_uint32:
285 res =
static_cast<kmp_uint32
>(original_iv1) ==
286 static_cast<kmp_uint32
>(original_iv2);
288 case loop_type_t::loop_type_int64:
289 res =
static_cast<kmp_int64
>(original_iv1) ==
290 static_cast<kmp_int64
>(original_iv2);
292 case loop_type_t::loop_type_uint64:
293 res =
static_cast<kmp_uint64
>(original_iv1) ==
294 static_cast<kmp_uint64
>(original_iv2);
309 const kmp_point_t original_ivs,
312 T iv =
static_cast<T
>(original_ivs[ind]);
313 T outer_iv =
static_cast<T
>(original_ivs[bounds->outer_iv]);
315 if (((bounds->comparison == comparison_t::comp_less_or_eq) &&
316 (iv > (bounds->ub0 + bounds->ub1 * outer_iv))) ||
317 ((bounds->comparison == comparison_t::comp_greater_or_eq) &&
318 (iv < (bounds->ub0 + bounds->ub1 * outer_iv)))) {
331 kmp_point_t original_ivs,
332 const kmp_iterations_t iterations, kmp_index_t ind,
333 bool start_with_lower_bound,
bool checkBounds) {
336 T outer_iv =
static_cast<T
>(original_ivs[bounds->outer_iv]);
338 if (start_with_lower_bound) {
341 temp = bounds->lb0 + bounds->lb1 * outer_iv;
343 auto iteration = iterations[ind];
344 temp = bounds->lb0 + bounds->lb1 * outer_iv + iteration * bounds->step;
348 original_ivs[ind] = kmp_fix_iv(bounds->loop_iv_type, temp);
351 return kmp_iv_is_in_upper_bound_XX(bounds, original_ivs, ind);
358 kmp_point_t original_ivs,
359 const kmp_iterations_t iterations, kmp_index_t ind,
360 bool start_with_lower_bound,
bool checkBounds) {
362 switch (bounds->loop_type) {
363 case loop_type_t::loop_type_int32:
364 return kmp_calc_one_iv_XX<kmp_int32>(
366 original_ivs, iterations, ind, start_with_lower_bound,
369 case loop_type_t::loop_type_uint32:
370 return kmp_calc_one_iv_XX<kmp_uint32>(
372 original_ivs, iterations, ind, start_with_lower_bound,
375 case loop_type_t::loop_type_int64:
376 return kmp_calc_one_iv_XX<kmp_int64>(
378 original_ivs, iterations, ind, start_with_lower_bound,
381 case loop_type_t::loop_type_uint64:
382 return kmp_calc_one_iv_XX<kmp_uint64>(
384 original_ivs, iterations, ind, start_with_lower_bound,
400 kmp_uint64 *original_ivs,
401 const kmp_iterations_t iterations,
404 auto iteration = iterations[ind];
408 bounds->lb1 *
static_cast<T
>(original_ivs[bounds->outer_iv]) +
409 iteration * bounds->step;
412 original_ivs[ind] = kmp_fix_iv(bounds->loop_iv_type, temp);
416 kmp_uint64 *original_ivs,
417 const kmp_iterations_t iterations,
420 switch (bounds->loop_type) {
421 case loop_type_t::loop_type_int32:
422 kmp_calc_one_iv_rectang_XX<kmp_int32>(
424 original_ivs, iterations, ind);
426 case loop_type_t::loop_type_uint32:
427 kmp_calc_one_iv_rectang_XX<kmp_uint32>(
429 original_ivs, iterations, ind);
431 case loop_type_t::loop_type_int64:
432 kmp_calc_one_iv_rectang_XX<kmp_int64>(
434 original_ivs, iterations, ind);
436 case loop_type_t::loop_type_uint64:
437 kmp_calc_one_iv_rectang_XX<kmp_uint64>(
439 original_ivs, iterations, ind);
459extern "C" kmp_loop_nest_iv_t
460__kmpc_process_loop_nest_rectang(
ident_t *loc, kmp_int32 gtid,
464 kmp_canonicalize_loop_nest(loc, original_bounds_nest, n);
466 kmp_loop_nest_iv_t total = 1;
468 for (kmp_index_t ind = 0; ind < n; ++ind) {
469 auto bounds = &(original_bounds_nest[ind]);
471 kmp_loop_nest_iv_t trip_count = kmp_calculate_trip_count( bounds);
488__kmpc_calc_original_ivs_rectang(
ident_t *loc, kmp_loop_nest_iv_t new_iv,
490 kmp_uint64 *original_ivs,
493 CollapseAllocator<kmp_loop_nest_iv_t> iterations(n);
496 for (kmp_index_t ind = n; ind > 0;) {
498 auto bounds = &(original_bounds_nest[ind]);
501 auto temp = new_iv / bounds->trip_count;
502 auto iteration = new_iv % bounds->trip_count;
505 iterations[ind] = iteration;
507 KMP_ASSERT(new_iv == 0);
509 for (kmp_index_t ind = 0; ind < n; ++ind) {
510 auto bounds = &(original_bounds_nest[ind]);
512 kmp_calc_one_iv_rectang(bounds, original_ivs, iterations, ind);
528void kmp_calc_span_lessoreq_XX(
529 bounds_info_internalXX_template<T> *bounds,
530 bounds_info_internal_t *bounds_nest) {
532 typedef typename traits_t<T>::unsigned_t UT;
538 auto &bbounds = bounds->b;
540 if ((bbounds.lb1 != 0) || (bbounds.ub1 != 0)) {
543 bounds_info_internalXX_template<T> *previous =
544 reinterpret_cast<bounds_info_internalXX_template<T> *
>(
545 &(bounds_nest[bbounds.outer_iv]));
551 span_t bound_candidate1 =
552 bbounds.lb0 + bbounds.lb1 * previous->span_smallest;
553 span_t bound_candidate2 =
554 bbounds.lb0 + bbounds.lb1 * previous->span_biggest;
555 if (bound_candidate1 < bound_candidate2) {
556 bounds->span_smallest = bound_candidate1;
558 bounds->span_smallest = bound_candidate2;
566 span_t bound_candidate1 =
567 bbounds.ub0 + bbounds.ub1 * previous->span_smallest;
568 span_t bound_candidate2 =
569 bbounds.ub0 + bbounds.ub1 * previous->span_biggest;
570 if (bound_candidate1 < bound_candidate2) {
571 bounds->span_biggest = bound_candidate2;
573 bounds->span_biggest = bound_candidate1;
578 bounds->span_smallest = bbounds.lb0;
579 bounds->span_biggest = bbounds.ub0;
581 if (!bounds->loop_bounds_adjusted) {
585 bounds->span_biggest -=
586 (
static_cast<UT
>(bbounds.ub0 - bbounds.lb0)) % bbounds.step;
592void kmp_calc_span_greateroreq_XX(
593 bounds_info_internalXX_template<T> *bounds,
594 bounds_info_internal_t *bounds_nest) {
596 typedef typename traits_t<T>::unsigned_t UT;
602 auto &bbounds = bounds->b;
604 if ((bbounds.lb1 != 0) || (bbounds.ub1 != 0)) {
607 bounds_info_internalXX_template<T> *previous =
608 reinterpret_cast<bounds_info_internalXX_template<T> *
>(
609 &(bounds_nest[bbounds.outer_iv]));
615 span_t bound_candidate1 =
616 bbounds.lb0 + bbounds.lb1 * previous->span_smallest;
617 span_t bound_candidate2 =
618 bbounds.lb0 + bbounds.lb1 * previous->span_biggest;
619 if (bound_candidate1 >= bound_candidate2) {
620 bounds->span_smallest = bound_candidate1;
622 bounds->span_smallest = bound_candidate2;
630 span_t bound_candidate1 =
631 bbounds.ub0 + bbounds.ub1 * previous->span_smallest;
632 span_t bound_candidate2 =
633 bbounds.ub0 + bbounds.ub1 * previous->span_biggest;
634 if (bound_candidate1 >= bound_candidate2) {
635 bounds->span_biggest = bound_candidate2;
637 bounds->span_biggest = bound_candidate1;
643 bounds->span_biggest = bbounds.lb0;
644 bounds->span_smallest = bbounds.ub0;
646 if (!bounds->loop_bounds_adjusted) {
650 bounds->span_biggest -=
651 (
static_cast<UT
>(bbounds.ub0 - bbounds.lb0)) % bbounds.step;
657void kmp_calc_span_XX(
658 bounds_info_internalXX_template<T> *bounds,
659 bounds_info_internal_t *bounds_nest) {
661 if (bounds->b.comparison == comparison_t::comp_less_or_eq) {
662 kmp_calc_span_lessoreq_XX( bounds, bounds_nest);
664 KMP_ASSERT(bounds->b.comparison == comparison_t::comp_greater_or_eq);
665 kmp_calc_span_greateroreq_XX( bounds, bounds_nest);
676void kmp_calc_new_bounds_XX(
677 bounds_info_internalXX_template<T> *bounds,
678 bounds_info_internal_t *bounds_nest) {
680 auto &bbounds = bounds->b;
682 if (bbounds.lb1 == bbounds.ub1) {
684 bounds->loop_bounds_adjusted =
false;
686 bounds->loop_bounds_adjusted =
true;
688 T old_lb1 = bbounds.lb1;
689 T old_ub1 = bbounds.ub1;
691 if (__kmp_sign(old_lb1) != __kmp_sign(old_ub1)) {
699 if (((old_lb1 < 0) && (old_lb1 < old_ub1)) ||
700 ((old_lb1 > 0) && (old_lb1 > old_ub1))) {
701 bbounds.lb1 = old_ub1;
703 bbounds.ub1 = old_lb1;
710 bounds_info_internalXX_template<T> *previous =
711 reinterpret_cast<bounds_info_internalXX_template<T> *
>(
712 &bounds_nest[bbounds.outer_iv]);
714 if (bbounds.comparison == comparison_t::comp_less_or_eq) {
715 if (old_lb1 < bbounds.lb1) {
716 KMP_ASSERT(old_lb1 < 0);
720 T sub = (bbounds.lb1 - old_lb1) * previous->span_biggest;
723 }
else if (old_lb1 > bbounds.lb1) {
725 T add = (old_lb1 - bbounds.lb1) * previous->span_smallest;
729 if (old_ub1 > bbounds.ub1) {
730 KMP_ASSERT(old_ub1 > 0);
734 T add = (old_ub1 - bbounds.ub1) * previous->span_biggest;
736 }
else if (old_ub1 < bbounds.ub1) {
738 T sub = (bbounds.ub1 - old_ub1) * previous->span_smallest;
742 KMP_ASSERT(bbounds.comparison == comparison_t::comp_greater_or_eq);
743 if (old_lb1 < bbounds.lb1) {
744 KMP_ASSERT(old_lb1 < 0);
745 T sub = (bbounds.lb1 - old_lb1) * previous->span_smallest;
747 }
else if (old_lb1 > bbounds.lb1) {
748 T add = (old_lb1 - bbounds.lb1) * previous->span_biggest;
752 if (old_ub1 > bbounds.ub1) {
753 KMP_ASSERT(old_ub1 > 0);
754 T add = (old_ub1 - bbounds.ub1) * previous->span_smallest;
756 }
else if (old_ub1 < bbounds.ub1) {
757 T sub = (bbounds.ub1 - old_ub1) * previous->span_biggest;
767kmp_loop_nest_iv_t kmp_process_one_loop_XX(
768 bounds_info_internalXX_template<T> *bounds,
769 bounds_info_internal_t *bounds_nest) {
771 kmp_calc_new_bounds_XX( bounds, bounds_nest);
772 kmp_calc_span_XX( bounds, bounds_nest);
773 return kmp_calculate_trip_count_XX( &(bounds->b));
781kmp_loop_nest_iv_t kmp_process_loop_nest(
782 bounds_info_internal_t *bounds_nest, kmp_index_t n) {
784 kmp_loop_nest_iv_t total = 1;
786 for (kmp_index_t ind = 0; ind < n; ++ind) {
787 auto bounds = &(bounds_nest[ind]);
788 kmp_loop_nest_iv_t trip_count = 0;
790 switch (bounds->b.loop_type) {
791 case loop_type_t::loop_type_int32:
792 trip_count = kmp_process_one_loop_XX<kmp_int32>(
793 (bounds_info_internalXX_template<kmp_int32> *)(bounds),
796 case loop_type_t::loop_type_uint32:
797 trip_count = kmp_process_one_loop_XX<kmp_uint32>(
798 (bounds_info_internalXX_template<kmp_uint32> *)(bounds),
801 case loop_type_t::loop_type_int64:
802 trip_count = kmp_process_one_loop_XX<kmp_int64>(
803 (bounds_info_internalXX_template<kmp_int64> *)(bounds),
806 case loop_type_t::loop_type_uint64:
807 trip_count = kmp_process_one_loop_XX<kmp_uint64>(
808 (bounds_info_internalXX_template<kmp_uint64> *)(bounds),
828 const kmp_point_t original_ivs,
831 kmp_loop_nest_iv_t iterations = 0;
833 if (bounds->comparison == comparison_t::comp_less_or_eq) {
835 (
static_cast<T
>(original_ivs[ind]) - bounds->lb0 -
836 bounds->lb1 *
static_cast<T
>(original_ivs[bounds->outer_iv])) /
837 __kmp_abs(bounds->step);
839 KMP_DEBUG_ASSERT(bounds->comparison == comparison_t::comp_greater_or_eq);
840 iterations = (bounds->lb0 +
841 bounds->lb1 *
static_cast<T
>(original_ivs[bounds->outer_iv]) -
842 static_cast<T
>(original_ivs[ind])) /
843 __kmp_abs(bounds->step);
851kmp_loop_nest_iv_t kmp_calc_number_of_iterations(
const bounds_info_t *bounds,
852 const kmp_point_t original_ivs,
855 switch (bounds->loop_type) {
856 case loop_type_t::loop_type_int32:
857 return kmp_calc_number_of_iterations_XX<kmp_int32>(
860 case loop_type_t::loop_type_uint32:
861 return kmp_calc_number_of_iterations_XX<kmp_uint32>(
864 case loop_type_t::loop_type_int64:
865 return kmp_calc_number_of_iterations_XX<kmp_int64>(
868 case loop_type_t::loop_type_uint64:
869 return kmp_calc_number_of_iterations_XX<kmp_uint64>(
886kmp_calc_new_iv_from_original_ivs(
const bounds_info_internal_t *bounds_nest,
887 const kmp_point_t original_ivs,
890 kmp_loop_nest_iv_t new_iv = 0;
892 for (kmp_index_t ind = 0; ind < n; ++ind) {
893 auto bounds = &(bounds_nest[ind].b);
895 new_iv = new_iv * bounds->trip_count +
896 kmp_calc_number_of_iterations(bounds, original_ivs, ind);
907bool kmp_calc_original_ivs_from_iterations(
909 kmp_point_t original_ivs,
910 kmp_iterations_t iterations, kmp_index_t ind) {
912 kmp_index_t lengthened_ind = n;
915 auto bounds = &(original_bounds_nest[ind]);
916 bool good = kmp_calc_one_iv(bounds, original_ivs, iterations,
917 ind, (lengthened_ind < ind),
true);
928 lengthened_ind = ind;
929 for (kmp_index_t i = ind + 1; i < n; ++i) {
947bool kmp_calc_original_ivs_for_start(
const bounds_info_t *original_bounds_nest,
949 kmp_point_t original_ivs) {
952 CollapseAllocator<kmp_loop_nest_iv_t> iterations(n);
953 for (kmp_index_t ind = n; ind > 0;) {
959 bool b = kmp_calc_original_ivs_from_iterations(original_bounds_nest, n,
969bool kmp_calc_next_original_ivs(
const bounds_info_t *original_bounds_nest,
970 kmp_index_t n,
const kmp_point_t original_ivs,
971 kmp_point_t next_original_ivs) {
973 CollapseAllocator<kmp_loop_nest_iv_t> iterations(n);
975 for (kmp_index_t ind = 0; ind < n; ++ind) {
976 auto bounds = &(original_bounds_nest[ind]);
977 iterations[ind] = kmp_calc_number_of_iterations(bounds, original_ivs, ind);
980 for (kmp_index_t ind = 0; ind < n; ++ind) {
981 next_original_ivs[ind] = original_ivs[ind];
986 kmp_index_t ind = n - 1;
989 bool b = kmp_calc_original_ivs_from_iterations(
990 original_bounds_nest, n, next_original_ivs, iterations, ind);
1001template <
typename T>
1002bool kmp_calc_one_iv_for_chunk_end_XX(
1005 kmp_point_t original_ivs,
const kmp_iterations_t iterations,
1006 kmp_index_t ind,
bool start_with_lower_bound,
bool compare_with_start,
1007 const kmp_point_t original_ivs_start) {
1015 T outer_iv =
static_cast<T
>(original_ivs[bounds->outer_iv]);
1017 if (start_with_lower_bound) {
1020 temp = bounds->lb0 + bounds->lb1 * outer_iv;
1030 auto iteration = iterations[ind];
1032 auto step = bounds->step;
1035 auto accountForStep =
1036 ((bounds->lb0 + bounds->lb1 * outer_iv) -
1037 (updated_bounds->lb0 + updated_bounds->lb1 * outer_iv)) %
1040 temp = updated_bounds->lb0 + updated_bounds->lb1 * outer_iv +
1041 accountForStep + iteration * step;
1043 if (((bounds->comparison == comparison_t::comp_less_or_eq) &&
1044 (temp < (bounds->lb0 + bounds->lb1 * outer_iv))) ||
1045 ((bounds->comparison == comparison_t::comp_greater_or_eq) &&
1046 (temp > (bounds->lb0 + bounds->lb1 * outer_iv)))) {
1049 temp = bounds->lb0 + bounds->lb1 * outer_iv + iteration / 2 * step;
1052 if (compare_with_start) {
1054 T start =
static_cast<T
>(original_ivs_start[ind]);
1056 temp = kmp_fix_iv(bounds->loop_iv_type, temp);
1060 if (((bounds->comparison == comparison_t::comp_less_or_eq) &&
1062 ((bounds->comparison == comparison_t::comp_greater_or_eq) &&
1066 temp = start + iteration / 4 * step;
1071 original_ivs[ind] = temp = kmp_fix_iv(bounds->loop_iv_type, temp);
1073 if (((bounds->comparison == comparison_t::comp_less_or_eq) &&
1074 (temp > (bounds->ub0 + bounds->ub1 * outer_iv))) ||
1075 ((bounds->comparison == comparison_t::comp_greater_or_eq) &&
1076 (temp < (bounds->ub0 + bounds->ub1 * outer_iv)))) {
1086bool kmp_calc_one_iv_for_chunk_end(
const bounds_info_t *bounds,
1088 kmp_point_t original_ivs,
1089 const kmp_iterations_t iterations,
1090 kmp_index_t ind,
bool start_with_lower_bound,
1091 bool compare_with_start,
1092 const kmp_point_t original_ivs_start) {
1094 switch (bounds->loop_type) {
1095 case loop_type_t::loop_type_int32:
1096 return kmp_calc_one_iv_for_chunk_end_XX<kmp_int32>(
1100 original_ivs, iterations, ind, start_with_lower_bound,
1101 compare_with_start, original_ivs_start);
1103 case loop_type_t::loop_type_uint32:
1104 return kmp_calc_one_iv_for_chunk_end_XX<kmp_uint32>(
1108 original_ivs, iterations, ind, start_with_lower_bound,
1109 compare_with_start, original_ivs_start);
1111 case loop_type_t::loop_type_int64:
1112 return kmp_calc_one_iv_for_chunk_end_XX<kmp_int64>(
1116 original_ivs, iterations, ind, start_with_lower_bound,
1117 compare_with_start, original_ivs_start);
1119 case loop_type_t::loop_type_uint64:
1120 return kmp_calc_one_iv_for_chunk_end_XX<kmp_uint64>(
1124 original_ivs, iterations, ind, start_with_lower_bound,
1125 compare_with_start, original_ivs_start);
1147bool kmp_calc_original_ivs_for_chunk_end(
1149 const bounds_info_internal_t *updated_bounds_nest,
1150 const kmp_point_t original_ivs_start, kmp_loop_nest_iv_t new_iv,
1151 kmp_point_t original_ivs) {
1154 CollapseAllocator<kmp_loop_nest_iv_t> iterations(n);
1156 for (kmp_index_t ind = n; ind > 0;) {
1158 auto &updated_bounds = updated_bounds_nest[ind];
1161 auto new_ind = new_iv / updated_bounds.b.trip_count;
1162 auto iteration = new_iv % updated_bounds.b.trip_count;
1165 iterations[ind] = iteration;
1167 KMP_DEBUG_ASSERT(new_iv == 0);
1169 kmp_index_t lengthened_ind = n;
1170 kmp_index_t equal_ind = -1;
1173 for (kmp_index_t ind = 0; ind < n;) {
1174 auto bounds = &(original_bounds_nest[ind]);
1175 auto updated_bounds = &(updated_bounds_nest[ind].b);
1177 bool good = kmp_calc_one_iv_for_chunk_end(
1178 bounds, updated_bounds,
1179 original_ivs, iterations, ind, (lengthened_ind < ind),
1180 (equal_ind >= ind - 1), original_ivs_start);
1190 ++(iterations[ind]);
1191 lengthened_ind = ind;
1192 if (equal_ind >= lengthened_ind) {
1195 equal_ind = lengthened_ind - 1;
1197 for (kmp_index_t i = ind + 1; i < n; ++i) {
1204 if ((equal_ind == ind - 1) &&
1205 (kmp_ivs_eq(bounds->loop_iv_type, original_ivs[ind],
1206 original_ivs_start[ind]))) {
1208 }
else if ((equal_ind > ind - 1) &&
1209 !(kmp_ivs_eq(bounds->loop_iv_type, original_ivs[ind],
1210 original_ivs_start[ind]))) {
1211 equal_ind = ind - 1;
1222template <
typename T>
1224 kmp_point_t original_ivs,
1227 T temp = bounds->ub0 +
1228 bounds->ub1 *
static_cast<T
>(original_ivs[bounds->outer_iv]);
1230 original_ivs[ind] = kmp_fix_iv(bounds->loop_iv_type, temp);
1234 kmp_point_t original_ivs, kmp_index_t ind) {
1236 switch (bounds->loop_type) {
1240 case loop_type_t::loop_type_int32:
1241 kmp_calc_one_iv_end_XX<kmp_int32>(
1245 case loop_type_t::loop_type_uint32:
1246 kmp_calc_one_iv_end_XX<kmp_uint32>(
1250 case loop_type_t::loop_type_int64:
1251 kmp_calc_one_iv_end_XX<kmp_int64>(
1255 case loop_type_t::loop_type_uint64:
1256 kmp_calc_one_iv_end_XX<kmp_uint64>(
1266void kmp_calc_original_ivs_for_end(
1267 const bounds_info_t *
const original_bounds_nest, kmp_index_t n,
1268 kmp_point_t original_ivs) {
1269 for (kmp_index_t ind = 0; ind < n; ++ind) {
1270 auto bounds = &(original_bounds_nest[ind]);
1271 kmp_calc_one_iv_end(bounds, original_ivs, ind);
1295__kmpc_for_collapsed_init(
ident_t *loc, kmp_int32 gtid,
1298 kmp_index_t n, kmp_int32 *plastiter) {
1300 KMP_DEBUG_ASSERT(plastiter && original_bounds_nest);
1301 KE_TRACE(10, (
"__kmpc_for_collapsed_init called (%d)\n", gtid));
1303 if (__kmp_env_consistency_check) {
1304 __kmp_push_workshare(gtid, ct_pdo, loc);
1307 kmp_canonicalize_loop_nest(loc, original_bounds_nest, n);
1309 CollapseAllocator<bounds_info_internal_t> updated_bounds_nest(n);
1311 for (kmp_index_t i = 0; i < n; ++i) {
1312 updated_bounds_nest[i].b = original_bounds_nest[i];
1315 kmp_loop_nest_iv_t total =
1316 kmp_process_loop_nest( updated_bounds_nest, n);
1318 if (plastiter != NULL) {
1328 __kmp_assert_valid_gtid(gtid);
1329 kmp_uint32 tid = __kmp_tid_from_gtid(gtid);
1331 kmp_info_t *th = __kmp_threads[gtid];
1332 kmp_team_t *team = th->th.th_team;
1333 kmp_uint32 nth = team->t.t_nproc;
1335 KMP_DEBUG_ASSERT(tid < nth);
1337 CollapseAllocator<kmp_uint64> original_ivs_start(n);
1339 if (!kmp_calc_original_ivs_for_start(original_bounds_nest, n,
1340 original_ivs_start)) {
1363 kmp_loop_nest_iv_t new_iv = kmp_calc_new_iv_from_original_ivs(
1364 updated_bounds_nest, original_ivs_start, n);
1366 bool last_iter =
false;
1372 KMP_DEBUG_ASSERT(total >= new_iv);
1374 kmp_loop_nest_iv_t total_left = total - new_iv;
1375 kmp_loop_nest_iv_t chunk_size = total_left / nth;
1376 kmp_loop_nest_iv_t remainder = total_left % nth;
1378 kmp_loop_nest_iv_t curr_chunk_size = chunk_size;
1380 if (remainder > 0) {
1385#if defined(KMP_DEBUG)
1386 kmp_loop_nest_iv_t new_iv_for_start = new_iv;
1389 if (curr_chunk_size > 1) {
1390 new_iv += curr_chunk_size - 1;
1393 CollapseAllocator<kmp_uint64> original_ivs_end(n);
1394 if ((nth == 1) || (new_iv >= total - 1)) {
1397 kmp_calc_original_ivs_for_end(original_bounds_nest, n,
1403 if (!kmp_calc_original_ivs_for_chunk_end(original_bounds_nest, n,
1404 updated_bounds_nest,
1405 original_ivs_start, new_iv,
1406 original_ivs_end)) {
1408 kmp_calc_original_ivs_for_end(original_bounds_nest, n,
1415#if defined(KMP_DEBUG)
1416 auto new_iv_for_end = kmp_calc_new_iv_from_original_ivs(
1417 updated_bounds_nest, original_ivs_end, n);
1418 KMP_DEBUG_ASSERT(new_iv_for_end >= new_iv_for_start);
1421 if (last_iter && (tid != 0)) {
1431 CollapseAllocator<kmp_uint64> original_ivs_next_start(n);
1433 !kmp_calc_next_original_ivs(original_bounds_nest, n, original_ivs_end,
1434 original_ivs_next_start)) {
1437 if (plastiter != NULL) {
1443 for (kmp_index_t i = 0; i < n; ++i) {
1444 chunk_bounds_nest[i] =
1445 original_bounds_nest[i];
1446 chunk_bounds_nest[i].lb0_u64 = original_ivs_start[i];
1447 chunk_bounds_nest[i].lb1_u64 = 0;
1449 chunk_bounds_nest[i].ub0_u64 = original_ivs_end[i];
1450 chunk_bounds_nest[i].ub1_u64 = 0;
1459 bool next_chunk = kmp_calc_next_original_ivs(
1460 original_bounds_nest, n, original_ivs_end, original_ivs_start);
1470 new_iv = kmp_calc_new_iv_from_original_ivs(updated_bounds_nest,
1471 original_ivs_start, n);