LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_csupport.cpp
1/*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#define __KMP_IMP
14#include "omp.h" /* extern "C" declarations of user-visible routines */
15#include "kmp.h"
16#include "kmp_error.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
19#include "kmp_lock.h"
20#include "kmp_stats.h"
21#include "ompt-specific.h"
22
23#define MAX_MESSAGE 512
24
25// flags will be used in future, e.g. to implement openmp_strict library
26// restrictions
27
36void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37 // By default __kmpc_begin() is no-op.
38 char *env;
39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
44 } else if (__kmp_ignore_mppbeg() == FALSE) {
45 // By default __kmp_ignore_mppbeg() returns TRUE.
46 __kmp_internal_begin();
47 KC_TRACE(10, ("__kmpc_begin: called\n"));
48 }
49}
50
59void __kmpc_end(ident_t *loc) {
60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63 // returns FALSE and __kmpc_end() will unregister this root (it can cause
64 // library shut down).
65 if (__kmp_ignore_mppend() == FALSE) {
66 KC_TRACE(10, ("__kmpc_end: called\n"));
67 KA_TRACE(30, ("__kmpc_end\n"));
68
69 __kmp_internal_end_thread(-1);
70 }
71#if KMP_OS_WINDOWS && OMPT_SUPPORT
72 // Normal exit process on Windows does not allow worker threads of the final
73 // parallel region to finish reporting their events, so shutting down the
74 // library here fixes the issue at least for the cases where __kmpc_end() is
75 // placed properly.
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
78#endif
79}
80
100 kmp_int32 gtid = __kmp_entry_gtid();
101
102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
103
104 return gtid;
105}
106
122 KC_TRACE(10,
123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124
125 return TCR_4(__kmp_all_nth);
126}
127
135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
136 return __kmp_tid_from_gtid(__kmp_entry_gtid());
137}
138
145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
146
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
148}
149
156kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157#ifndef KMP_DEBUG
158
159 return TRUE;
160
161#else
162
163 const char *semi2;
164 const char *semi3;
165 int line_no;
166
167 if (__kmp_par_range == 0) {
168 return TRUE;
169 }
170 semi2 = loc->psource;
171 if (semi2 == NULL) {
172 return TRUE;
173 }
174 semi2 = strchr(semi2, ';');
175 if (semi2 == NULL) {
176 return TRUE;
177 }
178 semi2 = strchr(semi2 + 1, ';');
179 if (semi2 == NULL) {
180 return TRUE;
181 }
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185 name--;
186 }
187 if ((*name == '/') || (*name == ';')) {
188 name++;
189 }
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
192 }
193 }
194 semi3 = strchr(semi2 + 1, ';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
199 }
200 }
201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
204 }
205 return __kmp_par_range < 0;
206 }
207 return TRUE;
208
209#endif /* KMP_DEBUG */
210}
211
219 return __kmp_entry_thread()->th.th_root->r.r_active;
220}
221
231void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232 kmp_int32 num_threads) {
233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234 global_tid, num_threads));
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
237}
238
239void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
241 /* the num_threads are automatically popped */
242}
243
244void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247 proc_bind));
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250}
251
262void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263 int gtid = __kmp_entry_gtid();
264
265#if (KMP_STATS_ENABLED)
266 // If we were in a serial region, then stop the serial timer, record
267 // the event, and start parallel region timer
268 stats_state_e previous_state = KMP_GET_THREAD_STATE();
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271 } else {
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273 }
274 int inParallel = __kmpc_in_parallel(loc);
275 if (inParallel) {
276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277 } else {
278 KMP_COUNT_BLOCK(OMP_PARALLEL);
279 }
280#endif
281
282 // maybe to save thr_state is enough here
283 {
284 va_list ap;
285 va_start(ap, microtask);
286
287#if OMPT_SUPPORT
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
293 }
294 OMPT_STORE_RETURN_ADDRESS(gtid);
295#endif
296
297#if INCLUDE_SSC_MARKS
298 SSC_MARK_FORKING();
299#endif
300 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
303 kmp_va_addr_of(ap));
304#if INCLUDE_SSC_MARKS
305 SSC_MARK_JOINING();
306#endif
307 __kmp_join_call(loc, gtid
308#if OMPT_SUPPORT
309 ,
310 fork_context_intel
311#endif
312 );
313
314 va_end(ap);
315
316#if OMPT_SUPPORT
317 if (ompt_enabled.enabled) {
318 ompt_frame->enter_frame = ompt_data_none;
319 }
320#endif
321 }
322
323#if KMP_STATS_ENABLED
324 if (previous_state == stats_state_e::SERIAL_REGION) {
325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326 KMP_SET_THREAD_STATE(previous_state);
327 } else {
328 KMP_POP_PARTITIONED_TIMER();
329 }
330#endif // KMP_STATS_ENABLED
331}
332
343void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
344 kmp_int32 cond, void *args) {
345 int gtid = __kmp_entry_gtid();
346 if (cond) {
347 if (args)
348 __kmpc_fork_call(loc, argc, microtask, args);
349 else
350 __kmpc_fork_call(loc, argc, microtask);
351 } else {
353
354#if OMPT_SUPPORT
355 void *exit_frame_ptr;
356#endif
357
358 if (args)
359 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
360 /*npr=*/0,
361 /*argc=*/1, &args
362#if OMPT_SUPPORT
363 ,
364 &exit_frame_ptr
365#endif
366 );
367 else
368 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
369 /*npr=*/0,
370 /*argc=*/0,
371 /*args=*/nullptr
372#if OMPT_SUPPORT
373 ,
374 &exit_frame_ptr
375#endif
376 );
377
379 }
380}
381
393void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
394 kmp_int32 num_teams, kmp_int32 num_threads) {
395 KA_TRACE(20,
396 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
397 global_tid, num_teams, num_threads));
398 __kmp_assert_valid_gtid(global_tid);
399 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
400}
401
412void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid,
413 kmp_int32 thread_limit) {
414 __kmp_assert_valid_gtid(global_tid);
415 kmp_info_t *thread = __kmp_threads[global_tid];
416 if (thread_limit > 0)
417 thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
418}
419
436void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
437 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
438 kmp_int32 num_threads) {
439 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
440 " num_teams_ub=%d num_threads=%d\n",
441 global_tid, num_teams_lb, num_teams_ub, num_threads));
442 __kmp_assert_valid_gtid(global_tid);
443 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
444 num_threads);
445}
446
457void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
458 ...) {
459 int gtid = __kmp_entry_gtid();
460 kmp_info_t *this_thr = __kmp_threads[gtid];
461 va_list ap;
462 va_start(ap, microtask);
463
464#if KMP_STATS_ENABLED
465 KMP_COUNT_BLOCK(OMP_TEAMS);
466 stats_state_e previous_state = KMP_GET_THREAD_STATE();
467 if (previous_state == stats_state_e::SERIAL_REGION) {
468 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
469 } else {
470 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
471 }
472#endif
473
474 // remember teams entry point and nesting level
475 this_thr->th.th_teams_microtask = microtask;
476 this_thr->th.th_teams_level =
477 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
478
479#if OMPT_SUPPORT
480 kmp_team_t *parent_team = this_thr->th.th_team;
481 int tid = __kmp_tid_from_gtid(gtid);
482 if (ompt_enabled.enabled) {
483 parent_team->t.t_implicit_task_taskdata[tid]
484 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
485 }
486 OMPT_STORE_RETURN_ADDRESS(gtid);
487#endif
488
489 // check if __kmpc_push_num_teams called, set default number of teams
490 // otherwise
491 if (this_thr->th.th_teams_size.nteams == 0) {
492 __kmp_push_num_teams(loc, gtid, 0, 0);
493 }
494 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
495 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
496 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
497
498 __kmp_fork_call(
499 loc, gtid, fork_context_intel, argc,
500 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
501 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
502 __kmp_join_call(loc, gtid
503#if OMPT_SUPPORT
504 ,
505 fork_context_intel
506#endif
507 );
508
509 // Pop current CG root off list
510 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
511 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
512 this_thr->th.th_cg_roots = tmp->up;
513 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
514 " to node %p. cg_nthreads was %d\n",
515 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
516 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
517 int i = tmp->cg_nthreads--;
518 if (i == 1) { // check is we are the last thread in CG (not always the case)
519 __kmp_free(tmp);
520 }
521 // Restore current task's thread_limit from CG root
522 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
523 this_thr->th.th_current_task->td_icvs.thread_limit =
524 this_thr->th.th_cg_roots->cg_thread_limit;
525
526 this_thr->th.th_teams_microtask = NULL;
527 this_thr->th.th_teams_level = 0;
528 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
529 va_end(ap);
530#if KMP_STATS_ENABLED
531 if (previous_state == stats_state_e::SERIAL_REGION) {
532 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
533 KMP_SET_THREAD_STATE(previous_state);
534 } else {
535 KMP_POP_PARTITIONED_TIMER();
536 }
537#endif // KMP_STATS_ENABLED
538}
539
540// I don't think this function should ever have been exported.
541// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
542// openmp code ever called it, but it's been exported from the RTL for so
543// long that I'm afraid to remove the definition.
544int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
545
558void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
559 // The implementation is now in kmp_runtime.cpp so that it can share static
560 // functions with kmp_fork_call since the tasks to be done are similar in
561 // each case.
562 __kmp_assert_valid_gtid(global_tid);
563#if OMPT_SUPPORT
564 OMPT_STORE_RETURN_ADDRESS(global_tid);
565#endif
566 __kmp_serialized_parallel(loc, global_tid);
567}
568
576void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
577 kmp_internal_control_t *top;
578 kmp_info_t *this_thr;
579 kmp_team_t *serial_team;
580
581 KC_TRACE(10,
582 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
583
584 /* skip all this code for autopar serialized loops since it results in
585 unacceptable overhead */
586 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
587 return;
588
589 // Not autopar code
590 __kmp_assert_valid_gtid(global_tid);
591 if (!TCR_4(__kmp_init_parallel))
592 __kmp_parallel_initialize();
593
594 __kmp_resume_if_soft_paused();
595
596 this_thr = __kmp_threads[global_tid];
597 serial_team = this_thr->th.th_serial_team;
598
599 kmp_task_team_t *task_team = this_thr->th.th_task_team;
600 // we need to wait for the proxy tasks before finishing the thread
601 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
602 task_team->tt.tt_hidden_helper_task_encountered))
603 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
604
605 KMP_MB();
606 KMP_DEBUG_ASSERT(serial_team);
607 KMP_ASSERT(serial_team->t.t_serialized);
608 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
609 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
610 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
611 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
612
613#if OMPT_SUPPORT
614 if (ompt_enabled.enabled &&
615 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
616 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
617 if (ompt_enabled.ompt_callback_implicit_task) {
618 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
619 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
620 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
621 }
622
623 // reset clear the task id only after unlinking the task
624 ompt_data_t *parent_task_data;
625 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
626
627 if (ompt_enabled.ompt_callback_parallel_end) {
628 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
629 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
630 ompt_parallel_invoker_program | ompt_parallel_team,
631 OMPT_LOAD_RETURN_ADDRESS(global_tid));
632 }
633 __ompt_lw_taskteam_unlink(this_thr);
634 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
635 }
636#endif
637
638 /* If necessary, pop the internal control stack values and replace the team
639 * values */
640 top = serial_team->t.t_control_stack_top;
641 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
642 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
643 serial_team->t.t_control_stack_top = top->next;
644 __kmp_free(top);
645 }
646
647 /* pop dispatch buffers stack */
648 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
649 {
650 dispatch_private_info_t *disp_buffer =
651 serial_team->t.t_dispatch->th_disp_buffer;
652 serial_team->t.t_dispatch->th_disp_buffer =
653 serial_team->t.t_dispatch->th_disp_buffer->next;
654 __kmp_free(disp_buffer);
655 }
656 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
657
658 --serial_team->t.t_serialized;
659 if (serial_team->t.t_serialized == 0) {
660
661 /* return to the parallel section */
662
663#if KMP_ARCH_X86 || KMP_ARCH_X86_64
664 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
665 __kmp_clear_x87_fpu_status_word();
666 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
667 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
668 }
669#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
670
671 __kmp_pop_current_task_from_thread(this_thr);
672#if OMPD_SUPPORT
673 if (ompd_state & OMPD_ENABLE_BP)
674 ompd_bp_parallel_end();
675#endif
676
677 this_thr->th.th_team = serial_team->t.t_parent;
678 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
679
680 /* restore values cached in the thread */
681 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
682 this_thr->th.th_team_master =
683 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
684 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
685
686 /* TODO the below shouldn't need to be adjusted for serialized teams */
687 this_thr->th.th_dispatch =
688 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
689
690 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
691 this_thr->th.th_current_task->td_flags.executing = 1;
692
693 if (__kmp_tasking_mode != tskm_immediate_exec) {
694 // Copy the task team from the new child / old parent team to the thread.
695 this_thr->th.th_task_team =
696 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
697 KA_TRACE(20,
698 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
699 "team %p\n",
700 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
701 }
702#if KMP_AFFINITY_SUPPORTED
703 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
704 __kmp_reset_root_init_mask(global_tid);
705 }
706#endif
707 } else {
708 if (__kmp_tasking_mode != tskm_immediate_exec) {
709 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
710 "depth of serial team %p to %d\n",
711 global_tid, serial_team, serial_team->t.t_serialized));
712 }
713 }
714
715 serial_team->t.t_level--;
716 if (__kmp_env_consistency_check)
717 __kmp_pop_parallel(global_tid, NULL);
718#if OMPT_SUPPORT
719 if (ompt_enabled.enabled)
720 this_thr->th.ompt_thread_info.state =
721 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
722 : ompt_state_work_parallel);
723#endif
724}
725
735 KC_TRACE(10, ("__kmpc_flush: called\n"));
736
737 /* need explicit __mf() here since use volatile instead in library */
738 KMP_MFENCE(); /* Flush all pending memory write invalidates. */
739
740#if OMPT_SUPPORT && OMPT_OPTIONAL
741 if (ompt_enabled.ompt_callback_flush) {
742 ompt_callbacks.ompt_callback(ompt_callback_flush)(
743 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
744 }
745#endif
746}
747
748/* -------------------------------------------------------------------------- */
756void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
757 KMP_COUNT_BLOCK(OMP_BARRIER);
758 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
759 __kmp_assert_valid_gtid(global_tid);
760
761 if (!TCR_4(__kmp_init_parallel))
762 __kmp_parallel_initialize();
763
764 __kmp_resume_if_soft_paused();
765
766 if (__kmp_env_consistency_check) {
767 if (loc == 0) {
768 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
769 }
770 __kmp_check_barrier(global_tid, ct_barrier, loc);
771 }
772
773#if OMPT_SUPPORT
774 ompt_frame_t *ompt_frame;
775 if (ompt_enabled.enabled) {
776 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
777 if (ompt_frame->enter_frame.ptr == NULL)
778 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
779 }
780 OMPT_STORE_RETURN_ADDRESS(global_tid);
781#endif
782 __kmp_threads[global_tid]->th.th_ident = loc;
783 // TODO: explicit barrier_wait_id:
784 // this function is called when 'barrier' directive is present or
785 // implicit barrier at the end of a worksharing construct.
786 // 1) better to add a per-thread barrier counter to a thread data structure
787 // 2) set to 0 when a new team is created
788 // 4) no sync is required
789
790 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
791#if OMPT_SUPPORT && OMPT_OPTIONAL
792 if (ompt_enabled.enabled) {
793 ompt_frame->enter_frame = ompt_data_none;
794 }
795#endif
796}
797
798/* The BARRIER for a MASTER section is always explicit */
805kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
806 int status = 0;
807
808 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
809 __kmp_assert_valid_gtid(global_tid);
810
811 if (!TCR_4(__kmp_init_parallel))
812 __kmp_parallel_initialize();
813
814 __kmp_resume_if_soft_paused();
815
816 if (KMP_MASTER_GTID(global_tid)) {
817 KMP_COUNT_BLOCK(OMP_MASTER);
818 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
819 status = 1;
820 }
821
822#if OMPT_SUPPORT && OMPT_OPTIONAL
823 if (status) {
824 if (ompt_enabled.ompt_callback_masked) {
825 kmp_info_t *this_thr = __kmp_threads[global_tid];
826 kmp_team_t *team = this_thr->th.th_team;
827
828 int tid = __kmp_tid_from_gtid(global_tid);
829 ompt_callbacks.ompt_callback(ompt_callback_masked)(
830 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
831 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
832 OMPT_GET_RETURN_ADDRESS(0));
833 }
834 }
835#endif
836
837 if (__kmp_env_consistency_check) {
838#if KMP_USE_DYNAMIC_LOCK
839 if (status)
840 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
841 else
842 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
843#else
844 if (status)
845 __kmp_push_sync(global_tid, ct_master, loc, NULL);
846 else
847 __kmp_check_sync(global_tid, ct_master, loc, NULL);
848#endif
849 }
850
851 return status;
852}
853
862void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
863 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
864 __kmp_assert_valid_gtid(global_tid);
865 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
866 KMP_POP_PARTITIONED_TIMER();
867
868#if OMPT_SUPPORT && OMPT_OPTIONAL
869 kmp_info_t *this_thr = __kmp_threads[global_tid];
870 kmp_team_t *team = this_thr->th.th_team;
871 if (ompt_enabled.ompt_callback_masked) {
872 int tid = __kmp_tid_from_gtid(global_tid);
873 ompt_callbacks.ompt_callback(ompt_callback_masked)(
874 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
875 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
876 OMPT_GET_RETURN_ADDRESS(0));
877 }
878#endif
879
880 if (__kmp_env_consistency_check) {
881 if (KMP_MASTER_GTID(global_tid))
882 __kmp_pop_sync(global_tid, ct_master, loc);
883 }
884}
885
894kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
895 int status = 0;
896 int tid;
897 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
898 __kmp_assert_valid_gtid(global_tid);
899
900 if (!TCR_4(__kmp_init_parallel))
901 __kmp_parallel_initialize();
902
903 __kmp_resume_if_soft_paused();
904
905 tid = __kmp_tid_from_gtid(global_tid);
906 if (tid == filter) {
907 KMP_COUNT_BLOCK(OMP_MASKED);
908 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
909 status = 1;
910 }
911
912#if OMPT_SUPPORT && OMPT_OPTIONAL
913 if (status) {
914 if (ompt_enabled.ompt_callback_masked) {
915 kmp_info_t *this_thr = __kmp_threads[global_tid];
916 kmp_team_t *team = this_thr->th.th_team;
917 ompt_callbacks.ompt_callback(ompt_callback_masked)(
918 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
919 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
920 OMPT_GET_RETURN_ADDRESS(0));
921 }
922 }
923#endif
924
925 if (__kmp_env_consistency_check) {
926#if KMP_USE_DYNAMIC_LOCK
927 if (status)
928 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
929 else
930 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
931#else
932 if (status)
933 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
934 else
935 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
936#endif
937 }
938
939 return status;
940}
941
950void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
951 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
952 __kmp_assert_valid_gtid(global_tid);
953 KMP_POP_PARTITIONED_TIMER();
954
955#if OMPT_SUPPORT && OMPT_OPTIONAL
956 kmp_info_t *this_thr = __kmp_threads[global_tid];
957 kmp_team_t *team = this_thr->th.th_team;
958 if (ompt_enabled.ompt_callback_masked) {
959 int tid = __kmp_tid_from_gtid(global_tid);
960 ompt_callbacks.ompt_callback(ompt_callback_masked)(
961 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
962 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
963 OMPT_GET_RETURN_ADDRESS(0));
964 }
965#endif
966
967 if (__kmp_env_consistency_check) {
968 __kmp_pop_sync(global_tid, ct_masked, loc);
969 }
970}
971
979void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
980 int cid = 0;
981 kmp_info_t *th;
982 KMP_DEBUG_ASSERT(__kmp_init_serial);
983
984 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
985 __kmp_assert_valid_gtid(gtid);
986
987 if (!TCR_4(__kmp_init_parallel))
988 __kmp_parallel_initialize();
989
990 __kmp_resume_if_soft_paused();
991
992#if USE_ITT_BUILD
993 __kmp_itt_ordered_prep(gtid);
994// TODO: ordered_wait_id
995#endif /* USE_ITT_BUILD */
996
997 th = __kmp_threads[gtid];
998
999#if OMPT_SUPPORT && OMPT_OPTIONAL
1000 kmp_team_t *team;
1001 ompt_wait_id_t lck;
1002 void *codeptr_ra;
1003 OMPT_STORE_RETURN_ADDRESS(gtid);
1004 if (ompt_enabled.enabled) {
1005 team = __kmp_team_from_gtid(gtid);
1006 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
1007 /* OMPT state update */
1008 th->th.ompt_thread_info.wait_id = lck;
1009 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
1010
1011 /* OMPT event callback */
1012 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1013 if (ompt_enabled.ompt_callback_mutex_acquire) {
1014 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1015 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
1016 codeptr_ra);
1017 }
1018 }
1019#endif
1020
1021 if (th->th.th_dispatch->th_deo_fcn != 0)
1022 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
1023 else
1024 __kmp_parallel_deo(&gtid, &cid, loc);
1025
1026#if OMPT_SUPPORT && OMPT_OPTIONAL
1027 if (ompt_enabled.enabled) {
1028 /* OMPT state update */
1029 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1030 th->th.ompt_thread_info.wait_id = 0;
1031
1032 /* OMPT event callback */
1033 if (ompt_enabled.ompt_callback_mutex_acquired) {
1034 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1035 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1036 }
1037 }
1038#endif
1039
1040#if USE_ITT_BUILD
1041 __kmp_itt_ordered_start(gtid);
1042#endif /* USE_ITT_BUILD */
1043}
1044
1052void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1053 int cid = 0;
1054 kmp_info_t *th;
1055
1056 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1057 __kmp_assert_valid_gtid(gtid);
1058
1059#if USE_ITT_BUILD
1060 __kmp_itt_ordered_end(gtid);
1061// TODO: ordered_wait_id
1062#endif /* USE_ITT_BUILD */
1063
1064 th = __kmp_threads[gtid];
1065
1066 if (th->th.th_dispatch->th_dxo_fcn != 0)
1067 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1068 else
1069 __kmp_parallel_dxo(&gtid, &cid, loc);
1070
1071#if OMPT_SUPPORT && OMPT_OPTIONAL
1072 OMPT_STORE_RETURN_ADDRESS(gtid);
1073 if (ompt_enabled.ompt_callback_mutex_released) {
1074 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1075 ompt_mutex_ordered,
1076 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1077 ->t.t_ordered.dt.t_value,
1078 OMPT_LOAD_RETURN_ADDRESS(gtid));
1079 }
1080#endif
1081}
1082
1083#if KMP_USE_DYNAMIC_LOCK
1084
1085static __forceinline void
1086__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1087 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1088 // Pointer to the allocated indirect lock is written to crit, while indexing
1089 // is ignored.
1090 void *idx;
1091 kmp_indirect_lock_t **lck;
1092 lck = (kmp_indirect_lock_t **)crit;
1093 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1094 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1095 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1096 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1097 KA_TRACE(20,
1098 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1099#if USE_ITT_BUILD
1100 __kmp_itt_critical_creating(ilk->lock, loc);
1101#endif
1102 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1103 if (status == 0) {
1104#if USE_ITT_BUILD
1105 __kmp_itt_critical_destroyed(ilk->lock);
1106#endif
1107 // We don't really need to destroy the unclaimed lock here since it will be
1108 // cleaned up at program exit.
1109 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1110 }
1111 KMP_DEBUG_ASSERT(*lck != NULL);
1112}
1113
1114// Fast-path acquire tas lock
1115#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1116 { \
1117 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1118 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1119 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1120 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1121 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1122 kmp_uint32 spins; \
1123 KMP_FSYNC_PREPARE(l); \
1124 KMP_INIT_YIELD(spins); \
1125 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1126 do { \
1127 if (TCR_4(__kmp_nth) > \
1128 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1129 KMP_YIELD(TRUE); \
1130 } else { \
1131 KMP_YIELD_SPIN(spins); \
1132 } \
1133 __kmp_spin_backoff(&backoff); \
1134 } while ( \
1135 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1136 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1137 } \
1138 KMP_FSYNC_ACQUIRED(l); \
1139 }
1140
1141// Fast-path test tas lock
1142#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1143 { \
1144 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1145 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1146 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1147 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1148 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1149 }
1150
1151// Fast-path release tas lock
1152#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1153 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1154
1155#if KMP_USE_FUTEX
1156
1157#include <sys/syscall.h>
1158#include <unistd.h>
1159#ifndef FUTEX_WAIT
1160#define FUTEX_WAIT 0
1161#endif
1162#ifndef FUTEX_WAKE
1163#define FUTEX_WAKE 1
1164#endif
1165
1166// Fast-path acquire futex lock
1167#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1168 { \
1169 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1170 kmp_int32 gtid_code = (gtid + 1) << 1; \
1171 KMP_MB(); \
1172 KMP_FSYNC_PREPARE(ftx); \
1173 kmp_int32 poll_val; \
1174 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1175 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1176 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1177 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1178 if (!cond) { \
1179 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1180 poll_val | \
1181 KMP_LOCK_BUSY(1, futex))) { \
1182 continue; \
1183 } \
1184 poll_val |= KMP_LOCK_BUSY(1, futex); \
1185 } \
1186 kmp_int32 rc; \
1187 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1188 NULL, NULL, 0)) != 0) { \
1189 continue; \
1190 } \
1191 gtid_code |= 1; \
1192 } \
1193 KMP_FSYNC_ACQUIRED(ftx); \
1194 }
1195
1196// Fast-path test futex lock
1197#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1198 { \
1199 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1200 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1201 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1202 KMP_FSYNC_ACQUIRED(ftx); \
1203 rc = TRUE; \
1204 } else { \
1205 rc = FALSE; \
1206 } \
1207 }
1208
1209// Fast-path release futex lock
1210#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1211 { \
1212 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1213 KMP_MB(); \
1214 KMP_FSYNC_RELEASING(ftx); \
1215 kmp_int32 poll_val = \
1216 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1217 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1218 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1219 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1220 } \
1221 KMP_MB(); \
1222 KMP_YIELD_OVERSUB(); \
1223 }
1224
1225#endif // KMP_USE_FUTEX
1226
1227#else // KMP_USE_DYNAMIC_LOCK
1228
1229static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1230 ident_t const *loc,
1231 kmp_int32 gtid) {
1232 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1233
1234 // Because of the double-check, the following load doesn't need to be volatile
1235 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1236
1237 if (lck == NULL) {
1238 void *idx;
1239
1240 // Allocate & initialize the lock.
1241 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1242 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1243 __kmp_init_user_lock_with_checks(lck);
1244 __kmp_set_user_lock_location(lck, loc);
1245#if USE_ITT_BUILD
1246 __kmp_itt_critical_creating(lck);
1247// __kmp_itt_critical_creating() should be called *before* the first usage
1248// of underlying lock. It is the only place where we can guarantee it. There
1249// are chances the lock will destroyed with no usage, but it is not a
1250// problem, because this is not real event seen by user but rather setting
1251// name for object (lock). See more details in kmp_itt.h.
1252#endif /* USE_ITT_BUILD */
1253
1254 // Use a cmpxchg instruction to slam the start of the critical section with
1255 // the lock pointer. If another thread beat us to it, deallocate the lock,
1256 // and use the lock that the other thread allocated.
1257 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1258
1259 if (status == 0) {
1260// Deallocate the lock and reload the value.
1261#if USE_ITT_BUILD
1262 __kmp_itt_critical_destroyed(lck);
1263// Let ITT know the lock is destroyed and the same memory location may be reused
1264// for another purpose.
1265#endif /* USE_ITT_BUILD */
1266 __kmp_destroy_user_lock_with_checks(lck);
1267 __kmp_user_lock_free(&idx, gtid, lck);
1268 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1269 KMP_DEBUG_ASSERT(lck != NULL);
1270 }
1271 }
1272 return lck;
1273}
1274
1275#endif // KMP_USE_DYNAMIC_LOCK
1276
1287void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1288 kmp_critical_name *crit) {
1289#if KMP_USE_DYNAMIC_LOCK
1290#if OMPT_SUPPORT && OMPT_OPTIONAL
1291 OMPT_STORE_RETURN_ADDRESS(global_tid);
1292#endif // OMPT_SUPPORT
1293 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1294#else
1295 KMP_COUNT_BLOCK(OMP_CRITICAL);
1296#if OMPT_SUPPORT && OMPT_OPTIONAL
1297 ompt_state_t prev_state = ompt_state_undefined;
1298 ompt_thread_info_t ti;
1299#endif
1300 kmp_user_lock_p lck;
1301
1302 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1303 __kmp_assert_valid_gtid(global_tid);
1304
1305 // TODO: add THR_OVHD_STATE
1306
1307 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1308 KMP_CHECK_USER_LOCK_INIT();
1309
1310 if ((__kmp_user_lock_kind == lk_tas) &&
1311 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1312 lck = (kmp_user_lock_p)crit;
1313 }
1314#if KMP_USE_FUTEX
1315 else if ((__kmp_user_lock_kind == lk_futex) &&
1316 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1317 lck = (kmp_user_lock_p)crit;
1318 }
1319#endif
1320 else { // ticket, queuing or drdpa
1321 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1322 }
1323
1324 if (__kmp_env_consistency_check)
1325 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1326
1327 // since the critical directive binds to all threads, not just the current
1328 // team we have to check this even if we are in a serialized team.
1329 // also, even if we are the uber thread, we still have to conduct the lock,
1330 // as we have to contend with sibling threads.
1331
1332#if USE_ITT_BUILD
1333 __kmp_itt_critical_acquiring(lck);
1334#endif /* USE_ITT_BUILD */
1335#if OMPT_SUPPORT && OMPT_OPTIONAL
1336 OMPT_STORE_RETURN_ADDRESS(gtid);
1337 void *codeptr_ra = NULL;
1338 if (ompt_enabled.enabled) {
1339 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1340 /* OMPT state update */
1341 prev_state = ti.state;
1342 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1343 ti.state = ompt_state_wait_critical;
1344
1345 /* OMPT event callback */
1346 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1347 if (ompt_enabled.ompt_callback_mutex_acquire) {
1348 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1349 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1350 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1351 }
1352 }
1353#endif
1354 // Value of 'crit' should be good for using as a critical_id of the critical
1355 // section directive.
1356 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1357
1358#if USE_ITT_BUILD
1359 __kmp_itt_critical_acquired(lck);
1360#endif /* USE_ITT_BUILD */
1361#if OMPT_SUPPORT && OMPT_OPTIONAL
1362 if (ompt_enabled.enabled) {
1363 /* OMPT state update */
1364 ti.state = prev_state;
1365 ti.wait_id = 0;
1366
1367 /* OMPT event callback */
1368 if (ompt_enabled.ompt_callback_mutex_acquired) {
1369 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1370 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1371 }
1372 }
1373#endif
1374 KMP_POP_PARTITIONED_TIMER();
1375
1376 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1377 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1378#endif // KMP_USE_DYNAMIC_LOCK
1379}
1380
1381#if KMP_USE_DYNAMIC_LOCK
1382
1383// Converts the given hint to an internal lock implementation
1384static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1385#if KMP_USE_TSX
1386#define KMP_TSX_LOCK(seq) lockseq_##seq
1387#else
1388#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1389#endif
1390
1391#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1392#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1393#else
1394#define KMP_CPUINFO_RTM 0
1395#endif
1396
1397 // Hints that do not require further logic
1398 if (hint & kmp_lock_hint_hle)
1399 return KMP_TSX_LOCK(hle);
1400 if (hint & kmp_lock_hint_rtm)
1401 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1402 if (hint & kmp_lock_hint_adaptive)
1403 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1404
1405 // Rule out conflicting hints first by returning the default lock
1406 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1407 return __kmp_user_lock_seq;
1408 if ((hint & omp_lock_hint_speculative) &&
1409 (hint & omp_lock_hint_nonspeculative))
1410 return __kmp_user_lock_seq;
1411
1412 // Do not even consider speculation when it appears to be contended
1413 if (hint & omp_lock_hint_contended)
1414 return lockseq_queuing;
1415
1416 // Uncontended lock without speculation
1417 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1418 return lockseq_tas;
1419
1420 // Use RTM lock for speculation
1421 if (hint & omp_lock_hint_speculative)
1422 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1423
1424 return __kmp_user_lock_seq;
1425}
1426
1427#if OMPT_SUPPORT && OMPT_OPTIONAL
1428#if KMP_USE_DYNAMIC_LOCK
1429static kmp_mutex_impl_t
1430__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1431 if (user_lock) {
1432 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1433 case 0:
1434 break;
1435#if KMP_USE_FUTEX
1436 case locktag_futex:
1437 return kmp_mutex_impl_queuing;
1438#endif
1439 case locktag_tas:
1440 return kmp_mutex_impl_spin;
1441#if KMP_USE_TSX
1442 case locktag_hle:
1443 case locktag_rtm_spin:
1444 return kmp_mutex_impl_speculative;
1445#endif
1446 default:
1447 return kmp_mutex_impl_none;
1448 }
1449 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1450 }
1451 KMP_ASSERT(ilock);
1452 switch (ilock->type) {
1453#if KMP_USE_TSX
1454 case locktag_adaptive:
1455 case locktag_rtm_queuing:
1456 return kmp_mutex_impl_speculative;
1457#endif
1458 case locktag_nested_tas:
1459 return kmp_mutex_impl_spin;
1460#if KMP_USE_FUTEX
1461 case locktag_nested_futex:
1462#endif
1463 case locktag_ticket:
1464 case locktag_queuing:
1465 case locktag_drdpa:
1466 case locktag_nested_ticket:
1467 case locktag_nested_queuing:
1468 case locktag_nested_drdpa:
1469 return kmp_mutex_impl_queuing;
1470 default:
1471 return kmp_mutex_impl_none;
1472 }
1473}
1474#else
1475// For locks without dynamic binding
1476static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1477 switch (__kmp_user_lock_kind) {
1478 case lk_tas:
1479 return kmp_mutex_impl_spin;
1480#if KMP_USE_FUTEX
1481 case lk_futex:
1482#endif
1483 case lk_ticket:
1484 case lk_queuing:
1485 case lk_drdpa:
1486 return kmp_mutex_impl_queuing;
1487#if KMP_USE_TSX
1488 case lk_hle:
1489 case lk_rtm_queuing:
1490 case lk_rtm_spin:
1491 case lk_adaptive:
1492 return kmp_mutex_impl_speculative;
1493#endif
1494 default:
1495 return kmp_mutex_impl_none;
1496 }
1497}
1498#endif // KMP_USE_DYNAMIC_LOCK
1499#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1500
1514void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1515 kmp_critical_name *crit, uint32_t hint) {
1516 KMP_COUNT_BLOCK(OMP_CRITICAL);
1517 kmp_user_lock_p lck;
1518#if OMPT_SUPPORT && OMPT_OPTIONAL
1519 ompt_state_t prev_state = ompt_state_undefined;
1520 ompt_thread_info_t ti;
1521 // This is the case, if called from __kmpc_critical:
1522 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1523 if (!codeptr)
1524 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1525#endif
1526
1527 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1528 __kmp_assert_valid_gtid(global_tid);
1529
1530 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1531 // Check if it is initialized.
1532 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1533 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1534 if (*lk == 0) {
1535 if (KMP_IS_D_LOCK(lockseq)) {
1536 KMP_COMPARE_AND_STORE_ACQ32(
1537 (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0,
1538 KMP_GET_D_TAG(lockseq));
1539 } else {
1540 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1541 }
1542 }
1543 // Branch for accessing the actual lock object and set operation. This
1544 // branching is inevitable since this lock initialization does not follow the
1545 // normal dispatch path (lock table is not used).
1546 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1547 lck = (kmp_user_lock_p)lk;
1548 if (__kmp_env_consistency_check) {
1549 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1550 __kmp_map_hint_to_lock(hint));
1551 }
1552#if USE_ITT_BUILD
1553 __kmp_itt_critical_acquiring(lck);
1554#endif
1555#if OMPT_SUPPORT && OMPT_OPTIONAL
1556 if (ompt_enabled.enabled) {
1557 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1558 /* OMPT state update */
1559 prev_state = ti.state;
1560 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1561 ti.state = ompt_state_wait_critical;
1562
1563 /* OMPT event callback */
1564 if (ompt_enabled.ompt_callback_mutex_acquire) {
1565 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1566 ompt_mutex_critical, (unsigned int)hint,
1567 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1568 codeptr);
1569 }
1570 }
1571#endif
1572#if KMP_USE_INLINED_TAS
1573 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1574 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1575 } else
1576#elif KMP_USE_INLINED_FUTEX
1577 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1578 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1579 } else
1580#endif
1581 {
1582 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1583 }
1584 } else {
1585 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1586 lck = ilk->lock;
1587 if (__kmp_env_consistency_check) {
1588 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1589 __kmp_map_hint_to_lock(hint));
1590 }
1591#if USE_ITT_BUILD
1592 __kmp_itt_critical_acquiring(lck);
1593#endif
1594#if OMPT_SUPPORT && OMPT_OPTIONAL
1595 if (ompt_enabled.enabled) {
1596 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1597 /* OMPT state update */
1598 prev_state = ti.state;
1599 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1600 ti.state = ompt_state_wait_critical;
1601
1602 /* OMPT event callback */
1603 if (ompt_enabled.ompt_callback_mutex_acquire) {
1604 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1605 ompt_mutex_critical, (unsigned int)hint,
1606 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1607 codeptr);
1608 }
1609 }
1610#endif
1611 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1612 }
1613 KMP_POP_PARTITIONED_TIMER();
1614
1615#if USE_ITT_BUILD
1616 __kmp_itt_critical_acquired(lck);
1617#endif /* USE_ITT_BUILD */
1618#if OMPT_SUPPORT && OMPT_OPTIONAL
1619 if (ompt_enabled.enabled) {
1620 /* OMPT state update */
1621 ti.state = prev_state;
1622 ti.wait_id = 0;
1623
1624 /* OMPT event callback */
1625 if (ompt_enabled.ompt_callback_mutex_acquired) {
1626 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1627 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1628 }
1629 }
1630#endif
1631
1632 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1633 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1634} // __kmpc_critical_with_hint
1635
1636#endif // KMP_USE_DYNAMIC_LOCK
1637
1647void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1648 kmp_critical_name *crit) {
1649 kmp_user_lock_p lck;
1650
1651 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1652
1653#if KMP_USE_DYNAMIC_LOCK
1654 int locktag = KMP_EXTRACT_D_TAG(crit);
1655 if (locktag) {
1656 lck = (kmp_user_lock_p)crit;
1657 KMP_ASSERT(lck != NULL);
1658 if (__kmp_env_consistency_check) {
1659 __kmp_pop_sync(global_tid, ct_critical, loc);
1660 }
1661#if USE_ITT_BUILD
1662 __kmp_itt_critical_releasing(lck);
1663#endif
1664#if KMP_USE_INLINED_TAS
1665 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1666 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1667 } else
1668#elif KMP_USE_INLINED_FUTEX
1669 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1670 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1671 } else
1672#endif
1673 {
1674 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1675 }
1676 } else {
1677 kmp_indirect_lock_t *ilk =
1678 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1679 KMP_ASSERT(ilk != NULL);
1680 lck = ilk->lock;
1681 if (__kmp_env_consistency_check) {
1682 __kmp_pop_sync(global_tid, ct_critical, loc);
1683 }
1684#if USE_ITT_BUILD
1685 __kmp_itt_critical_releasing(lck);
1686#endif
1687 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1688 }
1689
1690#else // KMP_USE_DYNAMIC_LOCK
1691
1692 if ((__kmp_user_lock_kind == lk_tas) &&
1693 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1694 lck = (kmp_user_lock_p)crit;
1695 }
1696#if KMP_USE_FUTEX
1697 else if ((__kmp_user_lock_kind == lk_futex) &&
1698 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1699 lck = (kmp_user_lock_p)crit;
1700 }
1701#endif
1702 else { // ticket, queuing or drdpa
1703 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1704 }
1705
1706 KMP_ASSERT(lck != NULL);
1707
1708 if (__kmp_env_consistency_check)
1709 __kmp_pop_sync(global_tid, ct_critical, loc);
1710
1711#if USE_ITT_BUILD
1712 __kmp_itt_critical_releasing(lck);
1713#endif /* USE_ITT_BUILD */
1714 // Value of 'crit' should be good for using as a critical_id of the critical
1715 // section directive.
1716 __kmp_release_user_lock_with_checks(lck, global_tid);
1717
1718#endif // KMP_USE_DYNAMIC_LOCK
1719
1720#if OMPT_SUPPORT && OMPT_OPTIONAL
1721 /* OMPT release event triggers after lock is released; place here to trigger
1722 * for all #if branches */
1723 OMPT_STORE_RETURN_ADDRESS(global_tid);
1724 if (ompt_enabled.ompt_callback_mutex_released) {
1725 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1726 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1727 OMPT_LOAD_RETURN_ADDRESS(0));
1728 }
1729#endif
1730
1731 KMP_POP_PARTITIONED_TIMER();
1732 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1733}
1734
1744kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1745 int status;
1746 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1747 __kmp_assert_valid_gtid(global_tid);
1748
1749 if (!TCR_4(__kmp_init_parallel))
1750 __kmp_parallel_initialize();
1751
1752 __kmp_resume_if_soft_paused();
1753
1754 if (__kmp_env_consistency_check)
1755 __kmp_check_barrier(global_tid, ct_barrier, loc);
1756
1757#if OMPT_SUPPORT
1758 ompt_frame_t *ompt_frame;
1759 if (ompt_enabled.enabled) {
1760 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1761 if (ompt_frame->enter_frame.ptr == NULL)
1762 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1763 }
1764 OMPT_STORE_RETURN_ADDRESS(global_tid);
1765#endif
1766#if USE_ITT_NOTIFY
1767 __kmp_threads[global_tid]->th.th_ident = loc;
1768#endif
1769 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1770#if OMPT_SUPPORT && OMPT_OPTIONAL
1771 if (ompt_enabled.enabled) {
1772 ompt_frame->enter_frame = ompt_data_none;
1773 }
1774#endif
1775
1776 return (status != 0) ? 0 : 1;
1777}
1778
1788void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1789 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1790 __kmp_assert_valid_gtid(global_tid);
1791 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1792}
1793
1804kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1805 kmp_int32 ret;
1806 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1807 __kmp_assert_valid_gtid(global_tid);
1808
1809 if (!TCR_4(__kmp_init_parallel))
1810 __kmp_parallel_initialize();
1811
1812 __kmp_resume_if_soft_paused();
1813
1814 if (__kmp_env_consistency_check) {
1815 if (loc == 0) {
1816 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1817 }
1818 __kmp_check_barrier(global_tid, ct_barrier, loc);
1819 }
1820
1821#if OMPT_SUPPORT
1822 ompt_frame_t *ompt_frame;
1823 if (ompt_enabled.enabled) {
1824 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1825 if (ompt_frame->enter_frame.ptr == NULL)
1826 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1827 }
1828 OMPT_STORE_RETURN_ADDRESS(global_tid);
1829#endif
1830#if USE_ITT_NOTIFY
1831 __kmp_threads[global_tid]->th.th_ident = loc;
1832#endif
1833 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1834#if OMPT_SUPPORT && OMPT_OPTIONAL
1835 if (ompt_enabled.enabled) {
1836 ompt_frame->enter_frame = ompt_data_none;
1837 }
1838#endif
1839
1840 ret = __kmpc_master(loc, global_tid);
1841
1842 if (__kmp_env_consistency_check) {
1843 /* there's no __kmpc_end_master called; so the (stats) */
1844 /* actions of __kmpc_end_master are done here */
1845 if (ret) {
1846 /* only one thread should do the pop since only */
1847 /* one did the push (see __kmpc_master()) */
1848 __kmp_pop_sync(global_tid, ct_master, loc);
1849 }
1850 }
1851
1852 return (ret);
1853}
1854
1855/* The BARRIER for a SINGLE process section is always explicit */
1867kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1868 __kmp_assert_valid_gtid(global_tid);
1869 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1870
1871 if (rc) {
1872 // We are going to execute the single statement, so we should count it.
1873 KMP_COUNT_BLOCK(OMP_SINGLE);
1874 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1875 }
1876
1877#if OMPT_SUPPORT && OMPT_OPTIONAL
1878 kmp_info_t *this_thr = __kmp_threads[global_tid];
1879 kmp_team_t *team = this_thr->th.th_team;
1880 int tid = __kmp_tid_from_gtid(global_tid);
1881
1882 if (ompt_enabled.enabled) {
1883 if (rc) {
1884 if (ompt_enabled.ompt_callback_work) {
1885 ompt_callbacks.ompt_callback(ompt_callback_work)(
1886 ompt_work_single_executor, ompt_scope_begin,
1887 &(team->t.ompt_team_info.parallel_data),
1888 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1889 1, OMPT_GET_RETURN_ADDRESS(0));
1890 }
1891 } else {
1892 if (ompt_enabled.ompt_callback_work) {
1893 ompt_callbacks.ompt_callback(ompt_callback_work)(
1894 ompt_work_single_other, ompt_scope_begin,
1895 &(team->t.ompt_team_info.parallel_data),
1896 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1897 1, OMPT_GET_RETURN_ADDRESS(0));
1898 ompt_callbacks.ompt_callback(ompt_callback_work)(
1899 ompt_work_single_other, ompt_scope_end,
1900 &(team->t.ompt_team_info.parallel_data),
1901 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1902 1, OMPT_GET_RETURN_ADDRESS(0));
1903 }
1904 }
1905 }
1906#endif
1907
1908 return rc;
1909}
1910
1920void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1921 __kmp_assert_valid_gtid(global_tid);
1922 __kmp_exit_single(global_tid);
1923 KMP_POP_PARTITIONED_TIMER();
1924
1925#if OMPT_SUPPORT && OMPT_OPTIONAL
1926 kmp_info_t *this_thr = __kmp_threads[global_tid];
1927 kmp_team_t *team = this_thr->th.th_team;
1928 int tid = __kmp_tid_from_gtid(global_tid);
1929
1930 if (ompt_enabled.ompt_callback_work) {
1931 ompt_callbacks.ompt_callback(ompt_callback_work)(
1932 ompt_work_single_executor, ompt_scope_end,
1933 &(team->t.ompt_team_info.parallel_data),
1934 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1935 OMPT_GET_RETURN_ADDRESS(0));
1936 }
1937#endif
1938}
1939
1947void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1948 KMP_POP_PARTITIONED_TIMER();
1949 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1950
1951#if OMPT_SUPPORT && OMPT_OPTIONAL
1952 if (ompt_enabled.ompt_callback_work) {
1953 ompt_work_t ompt_work_type = ompt_work_loop;
1954 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1955 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1956 // Determine workshare type
1957 if (loc != NULL) {
1958 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1959 ompt_work_type = ompt_work_loop;
1960 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1961 ompt_work_type = ompt_work_sections;
1962 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1963 ompt_work_type = ompt_work_distribute;
1964 } else {
1965 // use default set above.
1966 // a warning about this case is provided in __kmpc_for_static_init
1967 }
1968 KMP_DEBUG_ASSERT(ompt_work_type);
1969 }
1970 ompt_callbacks.ompt_callback(ompt_callback_work)(
1971 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1972 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1973 }
1974#endif
1975 if (__kmp_env_consistency_check)
1976 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1977}
1978
1979// User routines which take C-style arguments (call by value)
1980// different from the Fortran equivalent routines
1981
1982void ompc_set_num_threads(int arg) {
1983 // !!!!! TODO: check the per-task binding
1984 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1985}
1986
1987void ompc_set_dynamic(int flag) {
1988 kmp_info_t *thread;
1989
1990 /* For the thread-private implementation of the internal controls */
1991 thread = __kmp_entry_thread();
1992
1993 __kmp_save_internal_controls(thread);
1994
1995 set__dynamic(thread, flag ? true : false);
1996}
1997
1998void ompc_set_nested(int flag) {
1999 kmp_info_t *thread;
2000
2001 /* For the thread-private internal controls implementation */
2002 thread = __kmp_entry_thread();
2003
2004 __kmp_save_internal_controls(thread);
2005
2006 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
2007}
2008
2009void ompc_set_max_active_levels(int max_active_levels) {
2010 /* TO DO */
2011 /* we want per-task implementation of this internal control */
2012
2013 /* For the per-thread internal controls implementation */
2014 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
2015}
2016
2017void ompc_set_schedule(omp_sched_t kind, int modifier) {
2018 // !!!!! TODO: check the per-task binding
2019 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
2020}
2021
2022int ompc_get_ancestor_thread_num(int level) {
2023 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
2024}
2025
2026int ompc_get_team_size(int level) {
2027 return __kmp_get_team_size(__kmp_entry_gtid(), level);
2028}
2029
2030/* OpenMP 5.0 Affinity Format API */
2031void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2032 if (!__kmp_init_serial) {
2033 __kmp_serial_initialize();
2034 }
2035 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2036 format, KMP_STRLEN(format) + 1);
2037}
2038
2039size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2040 size_t format_size;
2041 if (!__kmp_init_serial) {
2042 __kmp_serial_initialize();
2043 }
2044 format_size = KMP_STRLEN(__kmp_affinity_format);
2045 if (buffer && size) {
2046 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2047 format_size + 1);
2048 }
2049 return format_size;
2050}
2051
2052void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2053 int gtid;
2054 if (!TCR_4(__kmp_init_middle)) {
2055 __kmp_middle_initialize();
2056 }
2057 __kmp_assign_root_init_mask();
2058 gtid = __kmp_get_gtid();
2059#if KMP_AFFINITY_SUPPORTED
2060 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2061 __kmp_affinity.flags.reset) {
2062 __kmp_reset_root_init_mask(gtid);
2063 }
2064#endif
2065 __kmp_aux_display_affinity(gtid, format);
2066}
2067
2068size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2069 char const *format) {
2070 int gtid;
2071 size_t num_required;
2072 kmp_str_buf_t capture_buf;
2073 if (!TCR_4(__kmp_init_middle)) {
2074 __kmp_middle_initialize();
2075 }
2076 __kmp_assign_root_init_mask();
2077 gtid = __kmp_get_gtid();
2078#if KMP_AFFINITY_SUPPORTED
2079 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2080 __kmp_affinity.flags.reset) {
2081 __kmp_reset_root_init_mask(gtid);
2082 }
2083#endif
2084 __kmp_str_buf_init(&capture_buf);
2085 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2086 if (buffer && buf_size) {
2087 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2088 capture_buf.used + 1);
2089 }
2090 __kmp_str_buf_free(&capture_buf);
2091 return num_required;
2092}
2093
2094void kmpc_set_stacksize(int arg) {
2095 // __kmp_aux_set_stacksize initializes the library if needed
2096 __kmp_aux_set_stacksize(arg);
2097}
2098
2099void kmpc_set_stacksize_s(size_t arg) {
2100 // __kmp_aux_set_stacksize initializes the library if needed
2101 __kmp_aux_set_stacksize(arg);
2102}
2103
2104void kmpc_set_blocktime(int arg) {
2105 int gtid, tid, bt = arg;
2106 kmp_info_t *thread;
2107
2108 gtid = __kmp_entry_gtid();
2109 tid = __kmp_tid_from_gtid(gtid);
2110 thread = __kmp_thread_from_gtid(gtid);
2111
2112 __kmp_aux_convert_blocktime(&bt);
2113 __kmp_aux_set_blocktime(bt, thread, tid);
2114}
2115
2116void kmpc_set_library(int arg) {
2117 // __kmp_user_set_library initializes the library if needed
2118 __kmp_user_set_library((enum library_type)arg);
2119}
2120
2121void kmpc_set_defaults(char const *str) {
2122 // __kmp_aux_set_defaults initializes the library if needed
2123 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2124}
2125
2126void kmpc_set_disp_num_buffers(int arg) {
2127 // ignore after initialization because some teams have already
2128 // allocated dispatch buffers
2129 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2130 arg <= KMP_MAX_DISP_NUM_BUFF) {
2131 __kmp_dispatch_num_buffers = arg;
2132 }
2133}
2134
2135int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2136#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2137 return -1;
2138#else
2139 if (!TCR_4(__kmp_init_middle)) {
2140 __kmp_middle_initialize();
2141 }
2142 __kmp_assign_root_init_mask();
2143 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2144#endif
2145}
2146
2147int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2148#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2149 return -1;
2150#else
2151 if (!TCR_4(__kmp_init_middle)) {
2152 __kmp_middle_initialize();
2153 }
2154 __kmp_assign_root_init_mask();
2155 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2156#endif
2157}
2158
2159int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2160#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2161 return -1;
2162#else
2163 if (!TCR_4(__kmp_init_middle)) {
2164 __kmp_middle_initialize();
2165 }
2166 __kmp_assign_root_init_mask();
2167 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2168#endif
2169}
2170
2171/* -------------------------------------------------------------------------- */
2216void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2217 void *cpy_data, void (*cpy_func)(void *, void *),
2218 kmp_int32 didit) {
2219 void **data_ptr;
2220 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2221 __kmp_assert_valid_gtid(gtid);
2222
2223 KMP_MB();
2224
2225 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2226
2227 if (__kmp_env_consistency_check) {
2228 if (loc == 0) {
2229 KMP_WARNING(ConstructIdentInvalid);
2230 }
2231 }
2232
2233 // ToDo: Optimize the following two barriers into some kind of split barrier
2234
2235 if (didit)
2236 *data_ptr = cpy_data;
2237
2238#if OMPT_SUPPORT
2239 ompt_frame_t *ompt_frame;
2240 if (ompt_enabled.enabled) {
2241 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2242 if (ompt_frame->enter_frame.ptr == NULL)
2243 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2244 }
2245 OMPT_STORE_RETURN_ADDRESS(gtid);
2246#endif
2247/* This barrier is not a barrier region boundary */
2248#if USE_ITT_NOTIFY
2249 __kmp_threads[gtid]->th.th_ident = loc;
2250#endif
2251 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2252
2253 if (!didit)
2254 (*cpy_func)(cpy_data, *data_ptr);
2255
2256 // Consider next barrier a user-visible barrier for barrier region boundaries
2257 // Nesting checks are already handled by the single construct checks
2258 {
2259#if OMPT_SUPPORT
2260 OMPT_STORE_RETURN_ADDRESS(gtid);
2261#endif
2262#if USE_ITT_NOTIFY
2263 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2264// tasks can overwrite the location)
2265#endif
2266 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2267#if OMPT_SUPPORT && OMPT_OPTIONAL
2268 if (ompt_enabled.enabled) {
2269 ompt_frame->enter_frame = ompt_data_none;
2270 }
2271#endif
2272 }
2273}
2274
2275/* --------------------------------------------------------------------------*/
2292void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2293 void **data_ptr;
2294
2295 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2296
2297 KMP_MB();
2298
2299 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2300
2301 if (__kmp_env_consistency_check) {
2302 if (loc == 0) {
2303 KMP_WARNING(ConstructIdentInvalid);
2304 }
2305 }
2306
2307 // ToDo: Optimize the following barrier
2308
2309 if (cpy_data)
2310 *data_ptr = cpy_data;
2311
2312#if OMPT_SUPPORT
2313 ompt_frame_t *ompt_frame;
2314 if (ompt_enabled.enabled) {
2315 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2316 if (ompt_frame->enter_frame.ptr == NULL)
2317 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2318 OMPT_STORE_RETURN_ADDRESS(gtid);
2319 }
2320#endif
2321/* This barrier is not a barrier region boundary */
2322#if USE_ITT_NOTIFY
2323 __kmp_threads[gtid]->th.th_ident = loc;
2324#endif
2325 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2326
2327 return *data_ptr;
2328}
2329
2330/* -------------------------------------------------------------------------- */
2331
2332#define INIT_LOCK __kmp_init_user_lock_with_checks
2333#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2334#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2335#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2336#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2337#define ACQUIRE_NESTED_LOCK_TIMED \
2338 __kmp_acquire_nested_user_lock_with_checks_timed
2339#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2340#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2341#define TEST_LOCK __kmp_test_user_lock_with_checks
2342#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2343#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2344#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2345
2346// TODO: Make check abort messages use location info & pass it into
2347// with_checks routines
2348
2349#if KMP_USE_DYNAMIC_LOCK
2350
2351// internal lock initializer
2352static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2353 kmp_dyna_lockseq_t seq) {
2354 if (KMP_IS_D_LOCK(seq)) {
2355 KMP_INIT_D_LOCK(lock, seq);
2356#if USE_ITT_BUILD
2357 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2358#endif
2359 } else {
2360 KMP_INIT_I_LOCK(lock, seq);
2361#if USE_ITT_BUILD
2362 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2363 __kmp_itt_lock_creating(ilk->lock, loc);
2364#endif
2365 }
2366}
2367
2368// internal nest lock initializer
2369static __forceinline void
2370__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2371 kmp_dyna_lockseq_t seq) {
2372#if KMP_USE_TSX
2373 // Don't have nested lock implementation for speculative locks
2374 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2375 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2376 seq = __kmp_user_lock_seq;
2377#endif
2378 switch (seq) {
2379 case lockseq_tas:
2380 seq = lockseq_nested_tas;
2381 break;
2382#if KMP_USE_FUTEX
2383 case lockseq_futex:
2384 seq = lockseq_nested_futex;
2385 break;
2386#endif
2387 case lockseq_ticket:
2388 seq = lockseq_nested_ticket;
2389 break;
2390 case lockseq_queuing:
2391 seq = lockseq_nested_queuing;
2392 break;
2393 case lockseq_drdpa:
2394 seq = lockseq_nested_drdpa;
2395 break;
2396 default:
2397 seq = lockseq_nested_queuing;
2398 }
2399 KMP_INIT_I_LOCK(lock, seq);
2400#if USE_ITT_BUILD
2401 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2402 __kmp_itt_lock_creating(ilk->lock, loc);
2403#endif
2404}
2405
2406/* initialize the lock with a hint */
2407void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2408 uintptr_t hint) {
2409 KMP_DEBUG_ASSERT(__kmp_init_serial);
2410 if (__kmp_env_consistency_check && user_lock == NULL) {
2411 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2412 }
2413
2414 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2415
2416#if OMPT_SUPPORT && OMPT_OPTIONAL
2417 // This is the case, if called from omp_init_lock_with_hint:
2418 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2419 if (!codeptr)
2420 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2421 if (ompt_enabled.ompt_callback_lock_init) {
2422 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2423 ompt_mutex_lock, (omp_lock_hint_t)hint,
2424 __ompt_get_mutex_impl_type(user_lock),
2425 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2426 }
2427#endif
2428}
2429
2430/* initialize the lock with a hint */
2431void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2432 void **user_lock, uintptr_t hint) {
2433 KMP_DEBUG_ASSERT(__kmp_init_serial);
2434 if (__kmp_env_consistency_check && user_lock == NULL) {
2435 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2436 }
2437
2438 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2439
2440#if OMPT_SUPPORT && OMPT_OPTIONAL
2441 // This is the case, if called from omp_init_lock_with_hint:
2442 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2443 if (!codeptr)
2444 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2445 if (ompt_enabled.ompt_callback_lock_init) {
2446 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2447 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2448 __ompt_get_mutex_impl_type(user_lock),
2449 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2450 }
2451#endif
2452}
2453
2454#endif // KMP_USE_DYNAMIC_LOCK
2455
2456/* initialize the lock */
2457void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2458#if KMP_USE_DYNAMIC_LOCK
2459
2460 KMP_DEBUG_ASSERT(__kmp_init_serial);
2461 if (__kmp_env_consistency_check && user_lock == NULL) {
2462 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2463 }
2464 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2465
2466#if OMPT_SUPPORT && OMPT_OPTIONAL
2467 // This is the case, if called from omp_init_lock_with_hint:
2468 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2469 if (!codeptr)
2470 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2471 if (ompt_enabled.ompt_callback_lock_init) {
2472 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2473 ompt_mutex_lock, omp_lock_hint_none,
2474 __ompt_get_mutex_impl_type(user_lock),
2475 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2476 }
2477#endif
2478
2479#else // KMP_USE_DYNAMIC_LOCK
2480
2481 static char const *const func = "omp_init_lock";
2482 kmp_user_lock_p lck;
2483 KMP_DEBUG_ASSERT(__kmp_init_serial);
2484
2485 if (__kmp_env_consistency_check) {
2486 if (user_lock == NULL) {
2487 KMP_FATAL(LockIsUninitialized, func);
2488 }
2489 }
2490
2491 KMP_CHECK_USER_LOCK_INIT();
2492
2493 if ((__kmp_user_lock_kind == lk_tas) &&
2494 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2495 lck = (kmp_user_lock_p)user_lock;
2496 }
2497#if KMP_USE_FUTEX
2498 else if ((__kmp_user_lock_kind == lk_futex) &&
2499 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2500 lck = (kmp_user_lock_p)user_lock;
2501 }
2502#endif
2503 else {
2504 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2505 }
2506 INIT_LOCK(lck);
2507 __kmp_set_user_lock_location(lck, loc);
2508
2509#if OMPT_SUPPORT && OMPT_OPTIONAL
2510 // This is the case, if called from omp_init_lock_with_hint:
2511 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2512 if (!codeptr)
2513 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2514 if (ompt_enabled.ompt_callback_lock_init) {
2515 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2516 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2517 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2518 }
2519#endif
2520
2521#if USE_ITT_BUILD
2522 __kmp_itt_lock_creating(lck);
2523#endif /* USE_ITT_BUILD */
2524
2525#endif // KMP_USE_DYNAMIC_LOCK
2526} // __kmpc_init_lock
2527
2528/* initialize the lock */
2529void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2530#if KMP_USE_DYNAMIC_LOCK
2531
2532 KMP_DEBUG_ASSERT(__kmp_init_serial);
2533 if (__kmp_env_consistency_check && user_lock == NULL) {
2534 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2535 }
2536 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2537
2538#if OMPT_SUPPORT && OMPT_OPTIONAL
2539 // This is the case, if called from omp_init_lock_with_hint:
2540 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2541 if (!codeptr)
2542 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2543 if (ompt_enabled.ompt_callback_lock_init) {
2544 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2545 ompt_mutex_nest_lock, omp_lock_hint_none,
2546 __ompt_get_mutex_impl_type(user_lock),
2547 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2548 }
2549#endif
2550
2551#else // KMP_USE_DYNAMIC_LOCK
2552
2553 static char const *const func = "omp_init_nest_lock";
2554 kmp_user_lock_p lck;
2555 KMP_DEBUG_ASSERT(__kmp_init_serial);
2556
2557 if (__kmp_env_consistency_check) {
2558 if (user_lock == NULL) {
2559 KMP_FATAL(LockIsUninitialized, func);
2560 }
2561 }
2562
2563 KMP_CHECK_USER_LOCK_INIT();
2564
2565 if ((__kmp_user_lock_kind == lk_tas) &&
2566 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2567 OMP_NEST_LOCK_T_SIZE)) {
2568 lck = (kmp_user_lock_p)user_lock;
2569 }
2570#if KMP_USE_FUTEX
2571 else if ((__kmp_user_lock_kind == lk_futex) &&
2572 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2573 OMP_NEST_LOCK_T_SIZE)) {
2574 lck = (kmp_user_lock_p)user_lock;
2575 }
2576#endif
2577 else {
2578 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2579 }
2580
2581 INIT_NESTED_LOCK(lck);
2582 __kmp_set_user_lock_location(lck, loc);
2583
2584#if OMPT_SUPPORT && OMPT_OPTIONAL
2585 // This is the case, if called from omp_init_lock_with_hint:
2586 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2587 if (!codeptr)
2588 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2589 if (ompt_enabled.ompt_callback_lock_init) {
2590 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2591 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2592 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2593 }
2594#endif
2595
2596#if USE_ITT_BUILD
2597 __kmp_itt_lock_creating(lck);
2598#endif /* USE_ITT_BUILD */
2599
2600#endif // KMP_USE_DYNAMIC_LOCK
2601} // __kmpc_init_nest_lock
2602
2603void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2604#if KMP_USE_DYNAMIC_LOCK
2605
2606#if USE_ITT_BUILD
2607 kmp_user_lock_p lck;
2608 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2609 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2610 } else {
2611 lck = (kmp_user_lock_p)user_lock;
2612 }
2613 __kmp_itt_lock_destroyed(lck);
2614#endif
2615#if OMPT_SUPPORT && OMPT_OPTIONAL
2616 // This is the case, if called from omp_init_lock_with_hint:
2617 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2618 if (!codeptr)
2619 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2620 if (ompt_enabled.ompt_callback_lock_destroy) {
2621 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2622 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2623 }
2624#endif
2625 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2626#else
2627 kmp_user_lock_p lck;
2628
2629 if ((__kmp_user_lock_kind == lk_tas) &&
2630 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2631 lck = (kmp_user_lock_p)user_lock;
2632 }
2633#if KMP_USE_FUTEX
2634 else if ((__kmp_user_lock_kind == lk_futex) &&
2635 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2636 lck = (kmp_user_lock_p)user_lock;
2637 }
2638#endif
2639 else {
2640 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2641 }
2642
2643#if OMPT_SUPPORT && OMPT_OPTIONAL
2644 // This is the case, if called from omp_init_lock_with_hint:
2645 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2646 if (!codeptr)
2647 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2648 if (ompt_enabled.ompt_callback_lock_destroy) {
2649 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2650 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2651 }
2652#endif
2653
2654#if USE_ITT_BUILD
2655 __kmp_itt_lock_destroyed(lck);
2656#endif /* USE_ITT_BUILD */
2657 DESTROY_LOCK(lck);
2658
2659 if ((__kmp_user_lock_kind == lk_tas) &&
2660 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2661 ;
2662 }
2663#if KMP_USE_FUTEX
2664 else if ((__kmp_user_lock_kind == lk_futex) &&
2665 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2666 ;
2667 }
2668#endif
2669 else {
2670 __kmp_user_lock_free(user_lock, gtid, lck);
2671 }
2672#endif // KMP_USE_DYNAMIC_LOCK
2673} // __kmpc_destroy_lock
2674
2675/* destroy the lock */
2676void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2677#if KMP_USE_DYNAMIC_LOCK
2678
2679#if USE_ITT_BUILD
2680 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2681 __kmp_itt_lock_destroyed(ilk->lock);
2682#endif
2683#if OMPT_SUPPORT && OMPT_OPTIONAL
2684 // This is the case, if called from omp_init_lock_with_hint:
2685 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2686 if (!codeptr)
2687 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2688 if (ompt_enabled.ompt_callback_lock_destroy) {
2689 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2690 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2691 }
2692#endif
2693 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2694
2695#else // KMP_USE_DYNAMIC_LOCK
2696
2697 kmp_user_lock_p lck;
2698
2699 if ((__kmp_user_lock_kind == lk_tas) &&
2700 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2701 OMP_NEST_LOCK_T_SIZE)) {
2702 lck = (kmp_user_lock_p)user_lock;
2703 }
2704#if KMP_USE_FUTEX
2705 else if ((__kmp_user_lock_kind == lk_futex) &&
2706 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2707 OMP_NEST_LOCK_T_SIZE)) {
2708 lck = (kmp_user_lock_p)user_lock;
2709 }
2710#endif
2711 else {
2712 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2713 }
2714
2715#if OMPT_SUPPORT && OMPT_OPTIONAL
2716 // This is the case, if called from omp_init_lock_with_hint:
2717 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2718 if (!codeptr)
2719 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2720 if (ompt_enabled.ompt_callback_lock_destroy) {
2721 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2722 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2723 }
2724#endif
2725
2726#if USE_ITT_BUILD
2727 __kmp_itt_lock_destroyed(lck);
2728#endif /* USE_ITT_BUILD */
2729
2730 DESTROY_NESTED_LOCK(lck);
2731
2732 if ((__kmp_user_lock_kind == lk_tas) &&
2733 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2734 OMP_NEST_LOCK_T_SIZE)) {
2735 ;
2736 }
2737#if KMP_USE_FUTEX
2738 else if ((__kmp_user_lock_kind == lk_futex) &&
2739 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2740 OMP_NEST_LOCK_T_SIZE)) {
2741 ;
2742 }
2743#endif
2744 else {
2745 __kmp_user_lock_free(user_lock, gtid, lck);
2746 }
2747#endif // KMP_USE_DYNAMIC_LOCK
2748} // __kmpc_destroy_nest_lock
2749
2750void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2751 KMP_COUNT_BLOCK(OMP_set_lock);
2752#if KMP_USE_DYNAMIC_LOCK
2753 int tag = KMP_EXTRACT_D_TAG(user_lock);
2754#if USE_ITT_BUILD
2755 __kmp_itt_lock_acquiring(
2756 (kmp_user_lock_p)
2757 user_lock); // itt function will get to the right lock object.
2758#endif
2759#if OMPT_SUPPORT && OMPT_OPTIONAL
2760 // This is the case, if called from omp_init_lock_with_hint:
2761 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2762 if (!codeptr)
2763 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2764 if (ompt_enabled.ompt_callback_mutex_acquire) {
2765 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2766 ompt_mutex_lock, omp_lock_hint_none,
2767 __ompt_get_mutex_impl_type(user_lock),
2768 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2769 }
2770#endif
2771#if KMP_USE_INLINED_TAS
2772 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2773 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2774 } else
2775#elif KMP_USE_INLINED_FUTEX
2776 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2777 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2778 } else
2779#endif
2780 {
2781 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2782 }
2783#if USE_ITT_BUILD
2784 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2785#endif
2786#if OMPT_SUPPORT && OMPT_OPTIONAL
2787 if (ompt_enabled.ompt_callback_mutex_acquired) {
2788 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2789 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2790 }
2791#endif
2792
2793#else // KMP_USE_DYNAMIC_LOCK
2794
2795 kmp_user_lock_p lck;
2796
2797 if ((__kmp_user_lock_kind == lk_tas) &&
2798 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2799 lck = (kmp_user_lock_p)user_lock;
2800 }
2801#if KMP_USE_FUTEX
2802 else if ((__kmp_user_lock_kind == lk_futex) &&
2803 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2804 lck = (kmp_user_lock_p)user_lock;
2805 }
2806#endif
2807 else {
2808 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2809 }
2810
2811#if USE_ITT_BUILD
2812 __kmp_itt_lock_acquiring(lck);
2813#endif /* USE_ITT_BUILD */
2814#if OMPT_SUPPORT && OMPT_OPTIONAL
2815 // This is the case, if called from omp_init_lock_with_hint:
2816 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2817 if (!codeptr)
2818 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2819 if (ompt_enabled.ompt_callback_mutex_acquire) {
2820 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2821 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2822 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2823 }
2824#endif
2825
2826 ACQUIRE_LOCK(lck, gtid);
2827
2828#if USE_ITT_BUILD
2829 __kmp_itt_lock_acquired(lck);
2830#endif /* USE_ITT_BUILD */
2831
2832#if OMPT_SUPPORT && OMPT_OPTIONAL
2833 if (ompt_enabled.ompt_callback_mutex_acquired) {
2834 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2835 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2836 }
2837#endif
2838
2839#endif // KMP_USE_DYNAMIC_LOCK
2840}
2841
2842void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2843#if KMP_USE_DYNAMIC_LOCK
2844
2845#if USE_ITT_BUILD
2846 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2847#endif
2848#if OMPT_SUPPORT && OMPT_OPTIONAL
2849 // This is the case, if called from omp_init_lock_with_hint:
2850 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2851 if (!codeptr)
2852 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2853 if (ompt_enabled.enabled) {
2854 if (ompt_enabled.ompt_callback_mutex_acquire) {
2855 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2856 ompt_mutex_nest_lock, omp_lock_hint_none,
2857 __ompt_get_mutex_impl_type(user_lock),
2858 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2859 }
2860 }
2861#endif
2862 int acquire_status =
2863 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2864 (void)acquire_status;
2865#if USE_ITT_BUILD
2866 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2867#endif
2868
2869#if OMPT_SUPPORT && OMPT_OPTIONAL
2870 if (ompt_enabled.enabled) {
2871 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2872 if (ompt_enabled.ompt_callback_mutex_acquired) {
2873 // lock_first
2874 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2875 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2876 codeptr);
2877 }
2878 } else {
2879 if (ompt_enabled.ompt_callback_nest_lock) {
2880 // lock_next
2881 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2882 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2883 }
2884 }
2885 }
2886#endif
2887
2888#else // KMP_USE_DYNAMIC_LOCK
2889 int acquire_status;
2890 kmp_user_lock_p lck;
2891
2892 if ((__kmp_user_lock_kind == lk_tas) &&
2893 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2894 OMP_NEST_LOCK_T_SIZE)) {
2895 lck = (kmp_user_lock_p)user_lock;
2896 }
2897#if KMP_USE_FUTEX
2898 else if ((__kmp_user_lock_kind == lk_futex) &&
2899 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2900 OMP_NEST_LOCK_T_SIZE)) {
2901 lck = (kmp_user_lock_p)user_lock;
2902 }
2903#endif
2904 else {
2905 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2906 }
2907
2908#if USE_ITT_BUILD
2909 __kmp_itt_lock_acquiring(lck);
2910#endif /* USE_ITT_BUILD */
2911#if OMPT_SUPPORT && OMPT_OPTIONAL
2912 // This is the case, if called from omp_init_lock_with_hint:
2913 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2914 if (!codeptr)
2915 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2916 if (ompt_enabled.enabled) {
2917 if (ompt_enabled.ompt_callback_mutex_acquire) {
2918 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2919 ompt_mutex_nest_lock, omp_lock_hint_none,
2920 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2921 codeptr);
2922 }
2923 }
2924#endif
2925
2926 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2927
2928#if USE_ITT_BUILD
2929 __kmp_itt_lock_acquired(lck);
2930#endif /* USE_ITT_BUILD */
2931
2932#if OMPT_SUPPORT && OMPT_OPTIONAL
2933 if (ompt_enabled.enabled) {
2934 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2935 if (ompt_enabled.ompt_callback_mutex_acquired) {
2936 // lock_first
2937 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2938 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2939 }
2940 } else {
2941 if (ompt_enabled.ompt_callback_nest_lock) {
2942 // lock_next
2943 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2944 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2945 }
2946 }
2947 }
2948#endif
2949
2950#endif // KMP_USE_DYNAMIC_LOCK
2951}
2952
2953void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2954#if KMP_USE_DYNAMIC_LOCK
2955
2956 int tag = KMP_EXTRACT_D_TAG(user_lock);
2957#if USE_ITT_BUILD
2958 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2959#endif
2960#if KMP_USE_INLINED_TAS
2961 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2962 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2963 } else
2964#elif KMP_USE_INLINED_FUTEX
2965 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2966 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2967 } else
2968#endif
2969 {
2970 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2971 }
2972
2973#if OMPT_SUPPORT && OMPT_OPTIONAL
2974 // This is the case, if called from omp_init_lock_with_hint:
2975 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2976 if (!codeptr)
2977 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2978 if (ompt_enabled.ompt_callback_mutex_released) {
2979 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2980 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2981 }
2982#endif
2983
2984#else // KMP_USE_DYNAMIC_LOCK
2985
2986 kmp_user_lock_p lck;
2987
2988 /* Can't use serial interval since not block structured */
2989 /* release the lock */
2990
2991 if ((__kmp_user_lock_kind == lk_tas) &&
2992 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2993#if KMP_OS_LINUX && \
2994 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2995// "fast" path implemented to fix customer performance issue
2996#if USE_ITT_BUILD
2997 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2998#endif /* USE_ITT_BUILD */
2999 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
3000 KMP_MB();
3001
3002#if OMPT_SUPPORT && OMPT_OPTIONAL
3003 // This is the case, if called from omp_init_lock_with_hint:
3004 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3005 if (!codeptr)
3006 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3007 if (ompt_enabled.ompt_callback_mutex_released) {
3008 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3009 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3010 }
3011#endif
3012
3013 return;
3014#else
3015 lck = (kmp_user_lock_p)user_lock;
3016#endif
3017 }
3018#if KMP_USE_FUTEX
3019 else if ((__kmp_user_lock_kind == lk_futex) &&
3020 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3021 lck = (kmp_user_lock_p)user_lock;
3022 }
3023#endif
3024 else {
3025 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
3026 }
3027
3028#if USE_ITT_BUILD
3029 __kmp_itt_lock_releasing(lck);
3030#endif /* USE_ITT_BUILD */
3031
3032 RELEASE_LOCK(lck, gtid);
3033
3034#if OMPT_SUPPORT && OMPT_OPTIONAL
3035 // This is the case, if called from omp_init_lock_with_hint:
3036 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3037 if (!codeptr)
3038 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3039 if (ompt_enabled.ompt_callback_mutex_released) {
3040 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3041 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3042 }
3043#endif
3044
3045#endif // KMP_USE_DYNAMIC_LOCK
3046}
3047
3048/* release the lock */
3049void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3050#if KMP_USE_DYNAMIC_LOCK
3051
3052#if USE_ITT_BUILD
3053 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3054#endif
3055 int release_status =
3056 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3057 (void)release_status;
3058
3059#if OMPT_SUPPORT && OMPT_OPTIONAL
3060 // This is the case, if called from omp_init_lock_with_hint:
3061 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3062 if (!codeptr)
3063 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3064 if (ompt_enabled.enabled) {
3065 if (release_status == KMP_LOCK_RELEASED) {
3066 if (ompt_enabled.ompt_callback_mutex_released) {
3067 // release_lock_last
3068 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3069 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3070 codeptr);
3071 }
3072 } else if (ompt_enabled.ompt_callback_nest_lock) {
3073 // release_lock_prev
3074 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3075 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3076 }
3077 }
3078#endif
3079
3080#else // KMP_USE_DYNAMIC_LOCK
3081
3082 kmp_user_lock_p lck;
3083
3084 /* Can't use serial interval since not block structured */
3085
3086 if ((__kmp_user_lock_kind == lk_tas) &&
3087 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3088 OMP_NEST_LOCK_T_SIZE)) {
3089#if KMP_OS_LINUX && \
3090 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3091 // "fast" path implemented to fix customer performance issue
3092 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3093#if USE_ITT_BUILD
3094 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3095#endif /* USE_ITT_BUILD */
3096
3097#if OMPT_SUPPORT && OMPT_OPTIONAL
3098 int release_status = KMP_LOCK_STILL_HELD;
3099#endif
3100
3101 if (--(tl->lk.depth_locked) == 0) {
3102 TCW_4(tl->lk.poll, 0);
3103#if OMPT_SUPPORT && OMPT_OPTIONAL
3104 release_status = KMP_LOCK_RELEASED;
3105#endif
3106 }
3107 KMP_MB();
3108
3109#if OMPT_SUPPORT && OMPT_OPTIONAL
3110 // This is the case, if called from omp_init_lock_with_hint:
3111 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3112 if (!codeptr)
3113 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3114 if (ompt_enabled.enabled) {
3115 if (release_status == KMP_LOCK_RELEASED) {
3116 if (ompt_enabled.ompt_callback_mutex_released) {
3117 // release_lock_last
3118 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3119 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3120 }
3121 } else if (ompt_enabled.ompt_callback_nest_lock) {
3122 // release_lock_previous
3123 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3124 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3125 }
3126 }
3127#endif
3128
3129 return;
3130#else
3131 lck = (kmp_user_lock_p)user_lock;
3132#endif
3133 }
3134#if KMP_USE_FUTEX
3135 else if ((__kmp_user_lock_kind == lk_futex) &&
3136 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3137 OMP_NEST_LOCK_T_SIZE)) {
3138 lck = (kmp_user_lock_p)user_lock;
3139 }
3140#endif
3141 else {
3142 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3143 }
3144
3145#if USE_ITT_BUILD
3146 __kmp_itt_lock_releasing(lck);
3147#endif /* USE_ITT_BUILD */
3148
3149 int release_status;
3150 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3151#if OMPT_SUPPORT && OMPT_OPTIONAL
3152 // This is the case, if called from omp_init_lock_with_hint:
3153 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3154 if (!codeptr)
3155 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3156 if (ompt_enabled.enabled) {
3157 if (release_status == KMP_LOCK_RELEASED) {
3158 if (ompt_enabled.ompt_callback_mutex_released) {
3159 // release_lock_last
3160 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3161 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3162 }
3163 } else if (ompt_enabled.ompt_callback_nest_lock) {
3164 // release_lock_previous
3165 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3166 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3167 }
3168 }
3169#endif
3170
3171#endif // KMP_USE_DYNAMIC_LOCK
3172}
3173
3174/* try to acquire the lock */
3175int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3176 KMP_COUNT_BLOCK(OMP_test_lock);
3177
3178#if KMP_USE_DYNAMIC_LOCK
3179 int rc;
3180 int tag = KMP_EXTRACT_D_TAG(user_lock);
3181#if USE_ITT_BUILD
3182 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3183#endif
3184#if OMPT_SUPPORT && OMPT_OPTIONAL
3185 // This is the case, if called from omp_init_lock_with_hint:
3186 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3187 if (!codeptr)
3188 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3189 if (ompt_enabled.ompt_callback_mutex_acquire) {
3190 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3191 ompt_mutex_test_lock, omp_lock_hint_none,
3192 __ompt_get_mutex_impl_type(user_lock),
3193 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3194 }
3195#endif
3196#if KMP_USE_INLINED_TAS
3197 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3198 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3199 } else
3200#elif KMP_USE_INLINED_FUTEX
3201 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3202 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3203 } else
3204#endif
3205 {
3206 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3207 }
3208 if (rc) {
3209#if USE_ITT_BUILD
3210 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3211#endif
3212#if OMPT_SUPPORT && OMPT_OPTIONAL
3213 if (ompt_enabled.ompt_callback_mutex_acquired) {
3214 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3215 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3216 }
3217#endif
3218 return FTN_TRUE;
3219 } else {
3220#if USE_ITT_BUILD
3221 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3222#endif
3223 return FTN_FALSE;
3224 }
3225
3226#else // KMP_USE_DYNAMIC_LOCK
3227
3228 kmp_user_lock_p lck;
3229 int rc;
3230
3231 if ((__kmp_user_lock_kind == lk_tas) &&
3232 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3233 lck = (kmp_user_lock_p)user_lock;
3234 }
3235#if KMP_USE_FUTEX
3236 else if ((__kmp_user_lock_kind == lk_futex) &&
3237 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3238 lck = (kmp_user_lock_p)user_lock;
3239 }
3240#endif
3241 else {
3242 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3243 }
3244
3245#if USE_ITT_BUILD
3246 __kmp_itt_lock_acquiring(lck);
3247#endif /* USE_ITT_BUILD */
3248#if OMPT_SUPPORT && OMPT_OPTIONAL
3249 // This is the case, if called from omp_init_lock_with_hint:
3250 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3251 if (!codeptr)
3252 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3253 if (ompt_enabled.ompt_callback_mutex_acquire) {
3254 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3255 ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3256 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3257 }
3258#endif
3259
3260 rc = TEST_LOCK(lck, gtid);
3261#if USE_ITT_BUILD
3262 if (rc) {
3263 __kmp_itt_lock_acquired(lck);
3264 } else {
3265 __kmp_itt_lock_cancelled(lck);
3266 }
3267#endif /* USE_ITT_BUILD */
3268#if OMPT_SUPPORT && OMPT_OPTIONAL
3269 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3270 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3271 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3272 }
3273#endif
3274
3275 return (rc ? FTN_TRUE : FTN_FALSE);
3276
3277 /* Can't use serial interval since not block structured */
3278
3279#endif // KMP_USE_DYNAMIC_LOCK
3280}
3281
3282/* try to acquire the lock */
3283int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3284#if KMP_USE_DYNAMIC_LOCK
3285 int rc;
3286#if USE_ITT_BUILD
3287 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3288#endif
3289#if OMPT_SUPPORT && OMPT_OPTIONAL
3290 // This is the case, if called from omp_init_lock_with_hint:
3291 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3292 if (!codeptr)
3293 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3294 if (ompt_enabled.ompt_callback_mutex_acquire) {
3295 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3296 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3297 __ompt_get_mutex_impl_type(user_lock),
3298 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3299 }
3300#endif
3301 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3302#if USE_ITT_BUILD
3303 if (rc) {
3304 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3305 } else {
3306 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3307 }
3308#endif
3309#if OMPT_SUPPORT && OMPT_OPTIONAL
3310 if (ompt_enabled.enabled && rc) {
3311 if (rc == 1) {
3312 if (ompt_enabled.ompt_callback_mutex_acquired) {
3313 // lock_first
3314 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3315 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3316 codeptr);
3317 }
3318 } else {
3319 if (ompt_enabled.ompt_callback_nest_lock) {
3320 // lock_next
3321 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3322 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3323 }
3324 }
3325 }
3326#endif
3327 return rc;
3328
3329#else // KMP_USE_DYNAMIC_LOCK
3330
3331 kmp_user_lock_p lck;
3332 int rc;
3333
3334 if ((__kmp_user_lock_kind == lk_tas) &&
3335 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3336 OMP_NEST_LOCK_T_SIZE)) {
3337 lck = (kmp_user_lock_p)user_lock;
3338 }
3339#if KMP_USE_FUTEX
3340 else if ((__kmp_user_lock_kind == lk_futex) &&
3341 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3342 OMP_NEST_LOCK_T_SIZE)) {
3343 lck = (kmp_user_lock_p)user_lock;
3344 }
3345#endif
3346 else {
3347 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3348 }
3349
3350#if USE_ITT_BUILD
3351 __kmp_itt_lock_acquiring(lck);
3352#endif /* USE_ITT_BUILD */
3353
3354#if OMPT_SUPPORT && OMPT_OPTIONAL
3355 // This is the case, if called from omp_init_lock_with_hint:
3356 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3357 if (!codeptr)
3358 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3359 if (ompt_enabled.enabled) &&
3360 ompt_enabled.ompt_callback_mutex_acquire) {
3361 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3362 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3363 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3364 codeptr);
3365 }
3366#endif
3367
3368 rc = TEST_NESTED_LOCK(lck, gtid);
3369#if USE_ITT_BUILD
3370 if (rc) {
3371 __kmp_itt_lock_acquired(lck);
3372 } else {
3373 __kmp_itt_lock_cancelled(lck);
3374 }
3375#endif /* USE_ITT_BUILD */
3376#if OMPT_SUPPORT && OMPT_OPTIONAL
3377 if (ompt_enabled.enabled && rc) {
3378 if (rc == 1) {
3379 if (ompt_enabled.ompt_callback_mutex_acquired) {
3380 // lock_first
3381 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3382 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3383 }
3384 } else {
3385 if (ompt_enabled.ompt_callback_nest_lock) {
3386 // lock_next
3387 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3388 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3389 }
3390 }
3391 }
3392#endif
3393 return rc;
3394
3395 /* Can't use serial interval since not block structured */
3396
3397#endif // KMP_USE_DYNAMIC_LOCK
3398}
3399
3400// Interface to fast scalable reduce methods routines
3401
3402// keep the selected method in a thread local structure for cross-function
3403// usage: will be used in __kmpc_end_reduce* functions;
3404// another solution: to re-determine the method one more time in
3405// __kmpc_end_reduce* functions (new prototype required then)
3406// AT: which solution is better?
3407#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3408 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3409
3410#define __KMP_GET_REDUCTION_METHOD(gtid) \
3411 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3412
3413// description of the packed_reduction_method variable: look at the macros in
3414// kmp.h
3415
3416// used in a critical section reduce block
3417static __forceinline void
3418__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3419 kmp_critical_name *crit) {
3420
3421 // this lock was visible to a customer and to the threading profile tool as a
3422 // serial overhead span (although it's used for an internal purpose only)
3423 // why was it visible in previous implementation?
3424 // should we keep it visible in new reduce block?
3425 kmp_user_lock_p lck;
3426
3427#if KMP_USE_DYNAMIC_LOCK
3428
3429 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3430 // Check if it is initialized.
3431 if (*lk == 0) {
3432 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3433 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3434 KMP_GET_D_TAG(__kmp_user_lock_seq));
3435 } else {
3436 __kmp_init_indirect_csptr(crit, loc, global_tid,
3437 KMP_GET_I_TAG(__kmp_user_lock_seq));
3438 }
3439 }
3440 // Branch for accessing the actual lock object and set operation. This
3441 // branching is inevitable since this lock initialization does not follow the
3442 // normal dispatch path (lock table is not used).
3443 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3444 lck = (kmp_user_lock_p)lk;
3445 KMP_DEBUG_ASSERT(lck != NULL);
3446 if (__kmp_env_consistency_check) {
3447 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3448 }
3449 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3450 } else {
3451 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3452 lck = ilk->lock;
3453 KMP_DEBUG_ASSERT(lck != NULL);
3454 if (__kmp_env_consistency_check) {
3455 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3456 }
3457 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3458 }
3459
3460#else // KMP_USE_DYNAMIC_LOCK
3461
3462 // We know that the fast reduction code is only emitted by Intel compilers
3463 // with 32 byte critical sections. If there isn't enough space, then we
3464 // have to use a pointer.
3465 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3466 lck = (kmp_user_lock_p)crit;
3467 } else {
3468 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3469 }
3470 KMP_DEBUG_ASSERT(lck != NULL);
3471
3472 if (__kmp_env_consistency_check)
3473 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3474
3475 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3476
3477#endif // KMP_USE_DYNAMIC_LOCK
3478}
3479
3480// used in a critical section reduce block
3481static __forceinline void
3482__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3483 kmp_critical_name *crit) {
3484
3485 kmp_user_lock_p lck;
3486
3487#if KMP_USE_DYNAMIC_LOCK
3488
3489 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3490 lck = (kmp_user_lock_p)crit;
3491 if (__kmp_env_consistency_check)
3492 __kmp_pop_sync(global_tid, ct_critical, loc);
3493 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3494 } else {
3495 kmp_indirect_lock_t *ilk =
3496 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3497 if (__kmp_env_consistency_check)
3498 __kmp_pop_sync(global_tid, ct_critical, loc);
3499 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3500 }
3501
3502#else // KMP_USE_DYNAMIC_LOCK
3503
3504 // We know that the fast reduction code is only emitted by Intel compilers
3505 // with 32 byte critical sections. If there isn't enough space, then we have
3506 // to use a pointer.
3507 if (__kmp_base_user_lock_size > 32) {
3508 lck = *((kmp_user_lock_p *)crit);
3509 KMP_ASSERT(lck != NULL);
3510 } else {
3511 lck = (kmp_user_lock_p)crit;
3512 }
3513
3514 if (__kmp_env_consistency_check)
3515 __kmp_pop_sync(global_tid, ct_critical, loc);
3516
3517 __kmp_release_user_lock_with_checks(lck, global_tid);
3518
3519#endif // KMP_USE_DYNAMIC_LOCK
3520} // __kmp_end_critical_section_reduce_block
3521
3522static __forceinline int
3523__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3524 int *task_state) {
3525 kmp_team_t *team;
3526
3527 // Check if we are inside the teams construct?
3528 if (th->th.th_teams_microtask) {
3529 *team_p = team = th->th.th_team;
3530 if (team->t.t_level == th->th.th_teams_level) {
3531 // This is reduction at teams construct.
3532 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3533 // Let's swap teams temporarily for the reduction.
3534 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3535 th->th.th_team = team->t.t_parent;
3536 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3537 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3538 *task_state = th->th.th_task_state;
3539 th->th.th_task_state = 0;
3540
3541 return 1;
3542 }
3543 }
3544 return 0;
3545}
3546
3547static __forceinline void
3548__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3549 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3550 th->th.th_info.ds.ds_tid = 0;
3551 th->th.th_team = team;
3552 th->th.th_team_nproc = team->t.t_nproc;
3553 th->th.th_task_team = team->t.t_task_team[task_state];
3554 __kmp_type_convert(task_state, &(th->th.th_task_state));
3555}
3556
3557/* 2.a.i. Reduce Block without a terminating barrier */
3573kmp_int32
3574__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3575 size_t reduce_size, void *reduce_data,
3576 void (*reduce_func)(void *lhs_data, void *rhs_data),
3577 kmp_critical_name *lck) {
3578
3579 KMP_COUNT_BLOCK(REDUCE_nowait);
3580 int retval = 0;
3581 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3582 kmp_info_t *th;
3583 kmp_team_t *team;
3584 int teams_swapped = 0, task_state;
3585 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3586 __kmp_assert_valid_gtid(global_tid);
3587
3588 // why do we need this initialization here at all?
3589 // Reduction clause can not be used as a stand-alone directive.
3590
3591 // do not call __kmp_serial_initialize(), it will be called by
3592 // __kmp_parallel_initialize() if needed
3593 // possible detection of false-positive race by the threadchecker ???
3594 if (!TCR_4(__kmp_init_parallel))
3595 __kmp_parallel_initialize();
3596
3597 __kmp_resume_if_soft_paused();
3598
3599// check correctness of reduce block nesting
3600#if KMP_USE_DYNAMIC_LOCK
3601 if (__kmp_env_consistency_check)
3602 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3603#else
3604 if (__kmp_env_consistency_check)
3605 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3606#endif
3607
3608 th = __kmp_thread_from_gtid(global_tid);
3609 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3610
3611 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3612 // the value should be kept in a variable
3613 // the variable should be either a construct-specific or thread-specific
3614 // property, not a team specific property
3615 // (a thread can reach the next reduce block on the next construct, reduce
3616 // method may differ on the next construct)
3617 // an ident_t "loc" parameter could be used as a construct-specific property
3618 // (what if loc == 0?)
3619 // (if both construct-specific and team-specific variables were shared,
3620 // then unness extra syncs should be needed)
3621 // a thread-specific variable is better regarding two issues above (next
3622 // construct and extra syncs)
3623 // a thread-specific "th_local.reduction_method" variable is used currently
3624 // each thread executes 'determine' and 'set' lines (no need to execute by one
3625 // thread, to avoid unness extra syncs)
3626
3627 packed_reduction_method = __kmp_determine_reduction_method(
3628 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3629 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3630
3631 OMPT_REDUCTION_DECL(th, global_tid);
3632 if (packed_reduction_method == critical_reduce_block) {
3633
3634 OMPT_REDUCTION_BEGIN;
3635
3636 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3637 retval = 1;
3638
3639 } else if (packed_reduction_method == empty_reduce_block) {
3640
3641 OMPT_REDUCTION_BEGIN;
3642
3643 // usage: if team size == 1, no synchronization is required ( Intel
3644 // platforms only )
3645 retval = 1;
3646
3647 } else if (packed_reduction_method == atomic_reduce_block) {
3648
3649 retval = 2;
3650
3651 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3652 // won't be called by the code gen)
3653 // (it's not quite good, because the checking block has been closed by
3654 // this 'pop',
3655 // but atomic operation has not been executed yet, will be executed
3656 // slightly later, literally on next instruction)
3657 if (__kmp_env_consistency_check)
3658 __kmp_pop_sync(global_tid, ct_reduce, loc);
3659
3660 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3661 tree_reduce_block)) {
3662
3663// AT: performance issue: a real barrier here
3664// AT: (if primary thread is slow, other threads are blocked here waiting for
3665// the primary thread to come and release them)
3666// AT: (it's not what a customer might expect specifying NOWAIT clause)
3667// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3668// be confusing to a customer)
3669// AT: another implementation of *barrier_gather*nowait() (or some other design)
3670// might go faster and be more in line with sense of NOWAIT
3671// AT: TO DO: do epcc test and compare times
3672
3673// this barrier should be invisible to a customer and to the threading profile
3674// tool (it's neither a terminating barrier nor customer's code, it's
3675// used for an internal purpose)
3676#if OMPT_SUPPORT
3677 // JP: can this barrier potentially leed to task scheduling?
3678 // JP: as long as there is a barrier in the implementation, OMPT should and
3679 // will provide the barrier events
3680 // so we set-up the necessary frame/return addresses.
3681 ompt_frame_t *ompt_frame;
3682 if (ompt_enabled.enabled) {
3683 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3684 if (ompt_frame->enter_frame.ptr == NULL)
3685 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3686 }
3687 OMPT_STORE_RETURN_ADDRESS(global_tid);
3688#endif
3689#if USE_ITT_NOTIFY
3690 __kmp_threads[global_tid]->th.th_ident = loc;
3691#endif
3692 retval =
3693 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3694 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3695 retval = (retval != 0) ? (0) : (1);
3696#if OMPT_SUPPORT && OMPT_OPTIONAL
3697 if (ompt_enabled.enabled) {
3698 ompt_frame->enter_frame = ompt_data_none;
3699 }
3700#endif
3701
3702 // all other workers except primary thread should do this pop here
3703 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3704 if (__kmp_env_consistency_check) {
3705 if (retval == 0) {
3706 __kmp_pop_sync(global_tid, ct_reduce, loc);
3707 }
3708 }
3709
3710 } else {
3711
3712 // should never reach this block
3713 KMP_ASSERT(0); // "unexpected method"
3714 }
3715 if (teams_swapped) {
3716 __kmp_restore_swapped_teams(th, team, task_state);
3717 }
3718 KA_TRACE(
3719 10,
3720 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3721 global_tid, packed_reduction_method, retval));
3722
3723 return retval;
3724}
3725
3734void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3735 kmp_critical_name *lck) {
3736
3737 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3738
3739 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3740 __kmp_assert_valid_gtid(global_tid);
3741
3742 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3743
3744 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3745
3746 if (packed_reduction_method == critical_reduce_block) {
3747
3748 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3749 OMPT_REDUCTION_END;
3750
3751 } else if (packed_reduction_method == empty_reduce_block) {
3752
3753 // usage: if team size == 1, no synchronization is required ( on Intel
3754 // platforms only )
3755
3756 OMPT_REDUCTION_END;
3757
3758 } else if (packed_reduction_method == atomic_reduce_block) {
3759
3760 // neither primary thread nor other workers should get here
3761 // (code gen does not generate this call in case 2: atomic reduce block)
3762 // actually it's better to remove this elseif at all;
3763 // after removal this value will checked by the 'else' and will assert
3764
3765 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3766 tree_reduce_block)) {
3767
3768 // only primary thread gets here
3769 // OMPT: tree reduction is annotated in the barrier code
3770
3771 } else {
3772
3773 // should never reach this block
3774 KMP_ASSERT(0); // "unexpected method"
3775 }
3776
3777 if (__kmp_env_consistency_check)
3778 __kmp_pop_sync(global_tid, ct_reduce, loc);
3779
3780 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3781 global_tid, packed_reduction_method));
3782
3783 return;
3784}
3785
3786/* 2.a.ii. Reduce Block with a terminating barrier */
3787
3803kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3804 size_t reduce_size, void *reduce_data,
3805 void (*reduce_func)(void *lhs_data, void *rhs_data),
3806 kmp_critical_name *lck) {
3807 KMP_COUNT_BLOCK(REDUCE_wait);
3808 int retval = 0;
3809 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3810 kmp_info_t *th;
3811 kmp_team_t *team;
3812 int teams_swapped = 0, task_state;
3813
3814 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3815 __kmp_assert_valid_gtid(global_tid);
3816
3817 // why do we need this initialization here at all?
3818 // Reduction clause can not be a stand-alone directive.
3819
3820 // do not call __kmp_serial_initialize(), it will be called by
3821 // __kmp_parallel_initialize() if needed
3822 // possible detection of false-positive race by the threadchecker ???
3823 if (!TCR_4(__kmp_init_parallel))
3824 __kmp_parallel_initialize();
3825
3826 __kmp_resume_if_soft_paused();
3827
3828// check correctness of reduce block nesting
3829#if KMP_USE_DYNAMIC_LOCK
3830 if (__kmp_env_consistency_check)
3831 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3832#else
3833 if (__kmp_env_consistency_check)
3834 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3835#endif
3836
3837 th = __kmp_thread_from_gtid(global_tid);
3838 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3839
3840 packed_reduction_method = __kmp_determine_reduction_method(
3841 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3842 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3843
3844 OMPT_REDUCTION_DECL(th, global_tid);
3845
3846 if (packed_reduction_method == critical_reduce_block) {
3847
3848 OMPT_REDUCTION_BEGIN;
3849 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3850 retval = 1;
3851
3852 } else if (packed_reduction_method == empty_reduce_block) {
3853
3854 OMPT_REDUCTION_BEGIN;
3855 // usage: if team size == 1, no synchronization is required ( Intel
3856 // platforms only )
3857 retval = 1;
3858
3859 } else if (packed_reduction_method == atomic_reduce_block) {
3860
3861 retval = 2;
3862
3863 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3864 tree_reduce_block)) {
3865
3866// case tree_reduce_block:
3867// this barrier should be visible to a customer and to the threading profile
3868// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3869#if OMPT_SUPPORT
3870 ompt_frame_t *ompt_frame;
3871 if (ompt_enabled.enabled) {
3872 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3873 if (ompt_frame->enter_frame.ptr == NULL)
3874 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3875 }
3876 OMPT_STORE_RETURN_ADDRESS(global_tid);
3877#endif
3878#if USE_ITT_NOTIFY
3879 __kmp_threads[global_tid]->th.th_ident =
3880 loc; // needed for correct notification of frames
3881#endif
3882 retval =
3883 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3884 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3885 retval = (retval != 0) ? (0) : (1);
3886#if OMPT_SUPPORT && OMPT_OPTIONAL
3887 if (ompt_enabled.enabled) {
3888 ompt_frame->enter_frame = ompt_data_none;
3889 }
3890#endif
3891
3892 // all other workers except primary thread should do this pop here
3893 // (none of other workers except primary will enter __kmpc_end_reduce())
3894 if (__kmp_env_consistency_check) {
3895 if (retval == 0) { // 0: all other workers; 1: primary thread
3896 __kmp_pop_sync(global_tid, ct_reduce, loc);
3897 }
3898 }
3899
3900 } else {
3901
3902 // should never reach this block
3903 KMP_ASSERT(0); // "unexpected method"
3904 }
3905 if (teams_swapped) {
3906 __kmp_restore_swapped_teams(th, team, task_state);
3907 }
3908
3909 KA_TRACE(10,
3910 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3911 global_tid, packed_reduction_method, retval));
3912 return retval;
3913}
3914
3925void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3926 kmp_critical_name *lck) {
3927
3928 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3929 kmp_info_t *th;
3930 kmp_team_t *team;
3931 int teams_swapped = 0, task_state;
3932
3933 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3934 __kmp_assert_valid_gtid(global_tid);
3935
3936 th = __kmp_thread_from_gtid(global_tid);
3937 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3938
3939 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3940
3941 // this barrier should be visible to a customer and to the threading profile
3942 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3943 OMPT_REDUCTION_DECL(th, global_tid);
3944
3945 if (packed_reduction_method == critical_reduce_block) {
3946 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3947
3948 OMPT_REDUCTION_END;
3949
3950// TODO: implicit barrier: should be exposed
3951#if OMPT_SUPPORT
3952 ompt_frame_t *ompt_frame;
3953 if (ompt_enabled.enabled) {
3954 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3955 if (ompt_frame->enter_frame.ptr == NULL)
3956 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3957 }
3958 OMPT_STORE_RETURN_ADDRESS(global_tid);
3959#endif
3960#if USE_ITT_NOTIFY
3961 __kmp_threads[global_tid]->th.th_ident = loc;
3962#endif
3963 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3964#if OMPT_SUPPORT && OMPT_OPTIONAL
3965 if (ompt_enabled.enabled) {
3966 ompt_frame->enter_frame = ompt_data_none;
3967 }
3968#endif
3969
3970 } else if (packed_reduction_method == empty_reduce_block) {
3971
3972 OMPT_REDUCTION_END;
3973
3974// usage: if team size==1, no synchronization is required (Intel platforms only)
3975
3976// TODO: implicit barrier: should be exposed
3977#if OMPT_SUPPORT
3978 ompt_frame_t *ompt_frame;
3979 if (ompt_enabled.enabled) {
3980 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3981 if (ompt_frame->enter_frame.ptr == NULL)
3982 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3983 }
3984 OMPT_STORE_RETURN_ADDRESS(global_tid);
3985#endif
3986#if USE_ITT_NOTIFY
3987 __kmp_threads[global_tid]->th.th_ident = loc;
3988#endif
3989 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3990#if OMPT_SUPPORT && OMPT_OPTIONAL
3991 if (ompt_enabled.enabled) {
3992 ompt_frame->enter_frame = ompt_data_none;
3993 }
3994#endif
3995
3996 } else if (packed_reduction_method == atomic_reduce_block) {
3997
3998#if OMPT_SUPPORT
3999 ompt_frame_t *ompt_frame;
4000 if (ompt_enabled.enabled) {
4001 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4002 if (ompt_frame->enter_frame.ptr == NULL)
4003 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4004 }
4005 OMPT_STORE_RETURN_ADDRESS(global_tid);
4006#endif
4007// TODO: implicit barrier: should be exposed
4008#if USE_ITT_NOTIFY
4009 __kmp_threads[global_tid]->th.th_ident = loc;
4010#endif
4011 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4012#if OMPT_SUPPORT && OMPT_OPTIONAL
4013 if (ompt_enabled.enabled) {
4014 ompt_frame->enter_frame = ompt_data_none;
4015 }
4016#endif
4017
4018 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
4019 tree_reduce_block)) {
4020
4021 // only primary thread executes here (primary releases all other workers)
4022 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
4023 global_tid);
4024
4025 } else {
4026
4027 // should never reach this block
4028 KMP_ASSERT(0); // "unexpected method"
4029 }
4030 if (teams_swapped) {
4031 __kmp_restore_swapped_teams(th, team, task_state);
4032 }
4033
4034 if (__kmp_env_consistency_check)
4035 __kmp_pop_sync(global_tid, ct_reduce, loc);
4036
4037 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4038 global_tid, packed_reduction_method));
4039
4040 return;
4041}
4042
4043#undef __KMP_GET_REDUCTION_METHOD
4044#undef __KMP_SET_REDUCTION_METHOD
4045
4046/* end of interface to fast scalable reduce routines */
4047
4048kmp_uint64 __kmpc_get_taskid() {
4049
4050 kmp_int32 gtid;
4051 kmp_info_t *thread;
4052
4053 gtid = __kmp_get_gtid();
4054 if (gtid < 0) {
4055 return 0;
4056 }
4057 thread = __kmp_thread_from_gtid(gtid);
4058 return thread->th.th_current_task->td_task_id;
4059
4060} // __kmpc_get_taskid
4061
4062kmp_uint64 __kmpc_get_parent_taskid() {
4063
4064 kmp_int32 gtid;
4065 kmp_info_t *thread;
4066 kmp_taskdata_t *parent_task;
4067
4068 gtid = __kmp_get_gtid();
4069 if (gtid < 0) {
4070 return 0;
4071 }
4072 thread = __kmp_thread_from_gtid(gtid);
4073 parent_task = thread->th.th_current_task->td_parent;
4074 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4075
4076} // __kmpc_get_parent_taskid
4077
4089void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4090 const struct kmp_dim *dims) {
4091 __kmp_assert_valid_gtid(gtid);
4092 int j, idx;
4093 kmp_int64 last, trace_count;
4094 kmp_info_t *th = __kmp_threads[gtid];
4095 kmp_team_t *team = th->th.th_team;
4096 kmp_uint32 *flags;
4097 kmp_disp_t *pr_buf = th->th.th_dispatch;
4098 dispatch_shared_info_t *sh_buf;
4099
4100 KA_TRACE(
4101 20,
4102 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4103 gtid, num_dims, !team->t.t_serialized));
4104 KMP_DEBUG_ASSERT(dims != NULL);
4105 KMP_DEBUG_ASSERT(num_dims > 0);
4106
4107 if (team->t.t_serialized) {
4108 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4109 return; // no dependencies if team is serialized
4110 }
4111 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4112 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4113 // the next loop
4114 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4115
4116 // Save bounds info into allocated private buffer
4117 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4118 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4119 th, sizeof(kmp_int64) * (4 * num_dims + 1));
4120 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4121 pr_buf->th_doacross_info[0] =
4122 (kmp_int64)num_dims; // first element is number of dimensions
4123 // Save also address of num_done in order to access it later without knowing
4124 // the buffer index
4125 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4126 pr_buf->th_doacross_info[2] = dims[0].lo;
4127 pr_buf->th_doacross_info[3] = dims[0].up;
4128 pr_buf->th_doacross_info[4] = dims[0].st;
4129 last = 5;
4130 for (j = 1; j < num_dims; ++j) {
4131 kmp_int64
4132 range_length; // To keep ranges of all dimensions but the first dims[0]
4133 if (dims[j].st == 1) { // most common case
4134 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4135 range_length = dims[j].up - dims[j].lo + 1;
4136 } else {
4137 if (dims[j].st > 0) {
4138 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4139 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4140 } else { // negative increment
4141 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4142 range_length =
4143 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4144 }
4145 }
4146 pr_buf->th_doacross_info[last++] = range_length;
4147 pr_buf->th_doacross_info[last++] = dims[j].lo;
4148 pr_buf->th_doacross_info[last++] = dims[j].up;
4149 pr_buf->th_doacross_info[last++] = dims[j].st;
4150 }
4151
4152 // Compute total trip count.
4153 // Start with range of dims[0] which we don't need to keep in the buffer.
4154 if (dims[0].st == 1) { // most common case
4155 trace_count = dims[0].up - dims[0].lo + 1;
4156 } else if (dims[0].st > 0) {
4157 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4158 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4159 } else { // negative increment
4160 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4161 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4162 }
4163 for (j = 1; j < num_dims; ++j) {
4164 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4165 }
4166 KMP_DEBUG_ASSERT(trace_count > 0);
4167
4168 // Check if shared buffer is not occupied by other loop (idx -
4169 // __kmp_dispatch_num_buffers)
4170 if (idx != sh_buf->doacross_buf_idx) {
4171 // Shared buffer is occupied, wait for it to be free
4172 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4173 __kmp_eq_4, NULL);
4174 }
4175#if KMP_32_BIT_ARCH
4176 // Check if we are the first thread. After the CAS the first thread gets 0,
4177 // others get 1 if initialization is in progress, allocated pointer otherwise.
4178 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4179 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4180 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4181#else
4182 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4183 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4184#endif
4185 if (flags == NULL) {
4186 // we are the first thread, allocate the array of flags
4187 size_t size =
4188 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4189 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4190 KMP_MB();
4191 sh_buf->doacross_flags = flags;
4192 } else if (flags == (kmp_uint32 *)1) {
4193#if KMP_32_BIT_ARCH
4194 // initialization is still in progress, need to wait
4195 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4196#else
4197 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4198#endif
4199 KMP_YIELD(TRUE);
4200 KMP_MB();
4201 } else {
4202 KMP_MB();
4203 }
4204 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4205 pr_buf->th_doacross_flags =
4206 sh_buf->doacross_flags; // save private copy in order to not
4207 // touch shared buffer on each iteration
4208 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4209}
4210
4211void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4212 __kmp_assert_valid_gtid(gtid);
4213 kmp_int64 shft;
4214 size_t num_dims, i;
4215 kmp_uint32 flag;
4216 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4217 kmp_info_t *th = __kmp_threads[gtid];
4218 kmp_team_t *team = th->th.th_team;
4219 kmp_disp_t *pr_buf;
4220 kmp_int64 lo, up, st;
4221
4222 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4223 if (team->t.t_serialized) {
4224 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4225 return; // no dependencies if team is serialized
4226 }
4227
4228 // calculate sequential iteration number and check out-of-bounds condition
4229 pr_buf = th->th.th_dispatch;
4230 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4231 num_dims = (size_t)pr_buf->th_doacross_info[0];
4232 lo = pr_buf->th_doacross_info[2];
4233 up = pr_buf->th_doacross_info[3];
4234 st = pr_buf->th_doacross_info[4];
4235#if OMPT_SUPPORT && OMPT_OPTIONAL
4236 ompt_dependence_t deps[num_dims];
4237#endif
4238 if (st == 1) { // most common case
4239 if (vec[0] < lo || vec[0] > up) {
4240 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4241 "bounds [%lld,%lld]\n",
4242 gtid, vec[0], lo, up));
4243 return;
4244 }
4245 iter_number = vec[0] - lo;
4246 } else if (st > 0) {
4247 if (vec[0] < lo || vec[0] > up) {
4248 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4249 "bounds [%lld,%lld]\n",
4250 gtid, vec[0], lo, up));
4251 return;
4252 }
4253 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4254 } else { // negative increment
4255 if (vec[0] > lo || vec[0] < up) {
4256 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4257 "bounds [%lld,%lld]\n",
4258 gtid, vec[0], lo, up));
4259 return;
4260 }
4261 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4262 }
4263#if OMPT_SUPPORT && OMPT_OPTIONAL
4264 deps[0].variable.value = iter_number;
4265 deps[0].dependence_type = ompt_dependence_type_sink;
4266#endif
4267 for (i = 1; i < num_dims; ++i) {
4268 kmp_int64 iter, ln;
4269 size_t j = i * 4;
4270 ln = pr_buf->th_doacross_info[j + 1];
4271 lo = pr_buf->th_doacross_info[j + 2];
4272 up = pr_buf->th_doacross_info[j + 3];
4273 st = pr_buf->th_doacross_info[j + 4];
4274 if (st == 1) {
4275 if (vec[i] < lo || vec[i] > up) {
4276 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4277 "bounds [%lld,%lld]\n",
4278 gtid, vec[i], lo, up));
4279 return;
4280 }
4281 iter = vec[i] - lo;
4282 } else if (st > 0) {
4283 if (vec[i] < lo || vec[i] > up) {
4284 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4285 "bounds [%lld,%lld]\n",
4286 gtid, vec[i], lo, up));
4287 return;
4288 }
4289 iter = (kmp_uint64)(vec[i] - lo) / st;
4290 } else { // st < 0
4291 if (vec[i] > lo || vec[i] < up) {
4292 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4293 "bounds [%lld,%lld]\n",
4294 gtid, vec[i], lo, up));
4295 return;
4296 }
4297 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4298 }
4299 iter_number = iter + ln * iter_number;
4300#if OMPT_SUPPORT && OMPT_OPTIONAL
4301 deps[i].variable.value = iter;
4302 deps[i].dependence_type = ompt_dependence_type_sink;
4303#endif
4304 }
4305 shft = iter_number % 32; // use 32-bit granularity
4306 iter_number >>= 5; // divided by 32
4307 flag = 1 << shft;
4308 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4309 KMP_YIELD(TRUE);
4310 }
4311 KMP_MB();
4312#if OMPT_SUPPORT && OMPT_OPTIONAL
4313 if (ompt_enabled.ompt_callback_dependences) {
4314 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4315 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4316 }
4317#endif
4318 KA_TRACE(20,
4319 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4320 gtid, (iter_number << 5) + shft));
4321}
4322
4323void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4324 __kmp_assert_valid_gtid(gtid);
4325 kmp_int64 shft;
4326 size_t num_dims, i;
4327 kmp_uint32 flag;
4328 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4329 kmp_info_t *th = __kmp_threads[gtid];
4330 kmp_team_t *team = th->th.th_team;
4331 kmp_disp_t *pr_buf;
4332 kmp_int64 lo, st;
4333
4334 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4335 if (team->t.t_serialized) {
4336 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4337 return; // no dependencies if team is serialized
4338 }
4339
4340 // calculate sequential iteration number (same as in "wait" but no
4341 // out-of-bounds checks)
4342 pr_buf = th->th.th_dispatch;
4343 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4344 num_dims = (size_t)pr_buf->th_doacross_info[0];
4345 lo = pr_buf->th_doacross_info[2];
4346 st = pr_buf->th_doacross_info[4];
4347#if OMPT_SUPPORT && OMPT_OPTIONAL
4348 ompt_dependence_t deps[num_dims];
4349#endif
4350 if (st == 1) { // most common case
4351 iter_number = vec[0] - lo;
4352 } else if (st > 0) {
4353 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4354 } else { // negative increment
4355 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4356 }
4357#if OMPT_SUPPORT && OMPT_OPTIONAL
4358 deps[0].variable.value = iter_number;
4359 deps[0].dependence_type = ompt_dependence_type_source;
4360#endif
4361 for (i = 1; i < num_dims; ++i) {
4362 kmp_int64 iter, ln;
4363 size_t j = i * 4;
4364 ln = pr_buf->th_doacross_info[j + 1];
4365 lo = pr_buf->th_doacross_info[j + 2];
4366 st = pr_buf->th_doacross_info[j + 4];
4367 if (st == 1) {
4368 iter = vec[i] - lo;
4369 } else if (st > 0) {
4370 iter = (kmp_uint64)(vec[i] - lo) / st;
4371 } else { // st < 0
4372 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4373 }
4374 iter_number = iter + ln * iter_number;
4375#if OMPT_SUPPORT && OMPT_OPTIONAL
4376 deps[i].variable.value = iter;
4377 deps[i].dependence_type = ompt_dependence_type_source;
4378#endif
4379 }
4380#if OMPT_SUPPORT && OMPT_OPTIONAL
4381 if (ompt_enabled.ompt_callback_dependences) {
4382 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4383 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4384 }
4385#endif
4386 shft = iter_number % 32; // use 32-bit granularity
4387 iter_number >>= 5; // divided by 32
4388 flag = 1 << shft;
4389 KMP_MB();
4390 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4391 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4392 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4393 (iter_number << 5) + shft));
4394}
4395
4396void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4397 __kmp_assert_valid_gtid(gtid);
4398 kmp_int32 num_done;
4399 kmp_info_t *th = __kmp_threads[gtid];
4400 kmp_team_t *team = th->th.th_team;
4401 kmp_disp_t *pr_buf = th->th.th_dispatch;
4402
4403 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4404 if (team->t.t_serialized) {
4405 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4406 return; // nothing to do
4407 }
4408 num_done =
4409 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4410 if (num_done == th->th.th_team_nproc) {
4411 // we are the last thread, need to free shared resources
4412 int idx = pr_buf->th_doacross_buf_idx - 1;
4413 dispatch_shared_info_t *sh_buf =
4414 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4415 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4416 (kmp_int64)&sh_buf->doacross_num_done);
4417 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4418 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4419 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4420 sh_buf->doacross_flags = NULL;
4421 sh_buf->doacross_num_done = 0;
4422 sh_buf->doacross_buf_idx +=
4423 __kmp_dispatch_num_buffers; // free buffer for future re-use
4424 }
4425 // free private resources (need to keep buffer index forever)
4426 pr_buf->th_doacross_flags = NULL;
4427 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4428 pr_buf->th_doacross_info = NULL;
4429 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4430}
4431
4432/* OpenMP 5.1 Memory Management routines */
4433void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4434 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4435}
4436
4437void *omp_aligned_alloc(size_t align, size_t size,
4438 omp_allocator_handle_t allocator) {
4439 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4440}
4441
4442void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4443 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4444}
4445
4446void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4447 omp_allocator_handle_t allocator) {
4448 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4449}
4450
4451void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4452 omp_allocator_handle_t free_allocator) {
4453 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4454 free_allocator);
4455}
4456
4457void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4458 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4459}
4460/* end of OpenMP 5.1 Memory Management routines */
4461
4462int __kmpc_get_target_offload(void) {
4463 if (!__kmp_init_serial) {
4464 __kmp_serial_initialize();
4465 }
4466 return __kmp_target_offload;
4467}
4468
4469int __kmpc_pause_resource(kmp_pause_status_t level) {
4470 if (!__kmp_init_serial) {
4471 return 1; // Can't pause if runtime is not initialized
4472 }
4473 return __kmp_pause_resource(level);
4474}
4475
4476void __kmpc_error(ident_t *loc, int severity, const char *message) {
4477 if (!__kmp_init_serial)
4478 __kmp_serial_initialize();
4479
4480 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4481
4482#if OMPT_SUPPORT
4483 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4484 ompt_callbacks.ompt_callback(ompt_callback_error)(
4485 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4486 OMPT_GET_RETURN_ADDRESS(0));
4487 }
4488#endif // OMPT_SUPPORT
4489
4490 char *src_loc;
4491 if (loc && loc->psource) {
4492 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4493 src_loc =
4494 __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4495 __kmp_str_loc_free(&str_loc);
4496 } else {
4497 src_loc = __kmp_str_format("unknown");
4498 }
4499
4500 if (severity == severity_warning)
4501 KMP_WARNING(UserDirectedWarning, src_loc, message);
4502 else
4503 KMP_FATAL(UserDirectedError, src_loc, message);
4504
4505 __kmp_str_free(&src_loc);
4506}
4507
4508// Mark begin of scope directive.
4509void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4510// reserved is for extension of scope directive and not used.
4511#if OMPT_SUPPORT && OMPT_OPTIONAL
4512 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4513 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4514 int tid = __kmp_tid_from_gtid(gtid);
4515 ompt_callbacks.ompt_callback(ompt_callback_work)(
4516 ompt_work_scope, ompt_scope_begin,
4517 &(team->t.ompt_team_info.parallel_data),
4518 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4519 OMPT_GET_RETURN_ADDRESS(0));
4520 }
4521#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4522}
4523
4524// Mark end of scope directive
4525void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4526// reserved is for extension of scope directive and not used.
4527#if OMPT_SUPPORT && OMPT_OPTIONAL
4528 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4529 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4530 int tid = __kmp_tid_from_gtid(gtid);
4531 ompt_callbacks.ompt_callback(ompt_callback_work)(
4532 ompt_work_scope, ompt_scope_end,
4533 &(team->t.ompt_team_info.parallel_data),
4534 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4535 OMPT_GET_RETURN_ADDRESS(0));
4536 }
4537#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4538}
4539
4540#ifdef KMP_USE_VERSION_SYMBOLS
4541// For GOMP compatibility there are two versions of each omp_* API.
4542// One is the plain C symbol and one is the Fortran symbol with an appended
4543// underscore. When we implement a specific ompc_* version of an omp_*
4544// function, we want the plain GOMP versioned symbol to alias the ompc_* version
4545// instead of the Fortran versions in kmp_ftn_entry.h
4546extern "C" {
4547// Have to undef these from omp.h so they aren't translated into
4548// their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4549#ifdef omp_set_affinity_format
4550#undef omp_set_affinity_format
4551#endif
4552#ifdef omp_get_affinity_format
4553#undef omp_get_affinity_format
4554#endif
4555#ifdef omp_display_affinity
4556#undef omp_display_affinity
4557#endif
4558#ifdef omp_capture_affinity
4559#undef omp_capture_affinity
4560#endif
4561KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4562 "OMP_5.0");
4563KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4564 "OMP_5.0");
4565KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4566 "OMP_5.0");
4567KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4568 "OMP_5.0");
4569} // extern "C"
4570#endif
@ KMP_IDENT_WORK_LOOP
Definition kmp.h:226
@ KMP_IDENT_WORK_SECTIONS
Definition kmp.h:228
@ KMP_IDENT_AUTOPAR
Definition kmp.h:211
@ KMP_IDENT_WORK_DISTRIBUTE
Definition kmp.h:230
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, kmp_int32 cond, void *args)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition kmp.h:1744
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition kmp_stats.h:911
stats_state_e
the states which a thread can be in
Definition kmp_stats.h:63
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition kmp.h:246
char const * psource
Definition kmp.h:256
kmp_int32 flags
Definition kmp.h:248