@@ -123,6 +123,20 @@ const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural) {
123
123
return ((plural) ? " unknowns" : " unknown" );
124
124
}
125
125
126
+ const char *__kmp_hw_get_core_type_string (kmp_hw_core_type_t type) {
127
+ switch (type) {
128
+ case KMP_HW_CORE_TYPE_UNKNOWN:
129
+ return " unknown" ;
130
+ #if KMP_ARCH_X86 || KMP_ARCH_X86_64
131
+ case KMP_HW_CORE_TYPE_ATOM:
132
+ return " Intel Atom(R) processor" ;
133
+ case KMP_HW_CORE_TYPE_CORE:
134
+ return " Intel(R) Core(TM) processor" ;
135
+ #endif
136
+ }
137
+ return " unknown" ;
138
+ }
139
+
126
140
// //////////////////////////////////////////////////////////////////////////////
127
141
// kmp_hw_thread_t methods
128
142
int kmp_hw_thread_t::compare_ids (const void *a, const void *b) {
@@ -174,6 +188,9 @@ void kmp_hw_thread_t::print() const {
174
188
for (int i = 0 ; i < depth; ++i) {
175
189
printf (" %4d " , ids[i]);
176
190
}
191
+ if (core_type != KMP_HW_CORE_TYPE_UNKNOWN) {
192
+ printf (" (%s)" , __kmp_hw_get_core_type_string (core_type));
193
+ }
177
194
printf (" \n " );
178
195
}
179
196
@@ -298,13 +315,20 @@ void kmp_topology_t::_set_last_level_cache() {
298
315
void kmp_topology_t::_gather_enumeration_information () {
299
316
int previous_id[KMP_HW_LAST];
300
317
int max[KMP_HW_LAST];
318
+ int previous_core_id = kmp_hw_thread_t ::UNKNOWN_ID;
301
319
302
320
for (int i = 0 ; i < depth; ++i) {
303
321
previous_id[i] = kmp_hw_thread_t ::UNKNOWN_ID;
304
322
max[i] = 0 ;
305
323
count[i] = 0 ;
306
324
ratio[i] = 0 ;
307
325
}
326
+ if (__kmp_is_hybrid_cpu ()) {
327
+ for (int i = 0 ; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
328
+ core_types_count[i] = 0 ;
329
+ core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
330
+ }
331
+ }
308
332
for (int i = 0 ; i < num_hw_threads; ++i) {
309
333
kmp_hw_thread_t &hw_thread = hw_threads[i];
310
334
for (int layer = 0 ; layer < depth; ++layer) {
@@ -326,6 +350,15 @@ void kmp_topology_t::_gather_enumeration_information() {
326
350
for (int layer = 0 ; layer < depth; ++layer) {
327
351
previous_id[layer] = hw_thread.ids [layer];
328
352
}
353
+ // Figure out the number of each core type for hybrid CPUs
354
+ if (__kmp_is_hybrid_cpu ()) {
355
+ int core_level = get_level (KMP_HW_CORE);
356
+ if (core_level != -1 ) {
357
+ if (hw_thread.ids [core_level] != previous_core_id)
358
+ _increment_core_type (hw_thread.core_type );
359
+ previous_core_id = hw_thread.ids [core_level];
360
+ }
361
+ }
329
362
}
330
363
for (int layer = 0 ; layer < depth; ++layer) {
331
364
if (max[layer] > ratio[layer])
@@ -478,6 +511,19 @@ void kmp_topology_t::dump() const {
478
511
}
479
512
printf (" \n " );
480
513
514
+ printf (" * core_types:\n " );
515
+ for (int i = 0 ; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
516
+ if (core_types[i] != KMP_HW_CORE_TYPE_UNKNOWN) {
517
+ printf (" %d %s core%c\n " , core_types_count[i],
518
+ __kmp_hw_get_core_type_string (core_types[i]),
519
+ ((core_types_count[i] > 1 ) ? ' s' : ' ' ));
520
+ } else {
521
+ if (i == 0 )
522
+ printf (" No hybrid information available\n " );
523
+ break ;
524
+ }
525
+ }
526
+
481
527
printf (" * equivalent map:\n " );
482
528
KMP_FOREACH_HW_TYPE (i) {
483
529
const char *key = __kmp_hw_get_keyword (i);
@@ -571,6 +617,15 @@ void kmp_topology_t::print(const char *env_var) const {
571
617
}
572
618
KMP_INFORM (TopologyGeneric, env_var, buf.str , ncores);
573
619
620
+ if (__kmp_is_hybrid_cpu ()) {
621
+ for (int i = 0 ; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
622
+ if (core_types[i] == KMP_HW_CORE_TYPE_UNKNOWN)
623
+ break ;
624
+ KMP_INFORM (TopologyHybrid, env_var, core_types_count[i],
625
+ __kmp_hw_get_core_type_string (core_types[i]));
626
+ }
627
+ }
628
+
574
629
if (num_hw_threads <= 0 ) {
575
630
__kmp_str_buf_free (&buf);
576
631
return ;
@@ -585,6 +640,9 @@ void kmp_topology_t::print(const char *env_var) const {
585
640
__kmp_str_buf_print (&buf, " %s " , __kmp_hw_get_catalog_string (type));
586
641
__kmp_str_buf_print (&buf, " %d " , hw_threads[i].ids [level]);
587
642
}
643
+ if (__kmp_is_hybrid_cpu ())
644
+ __kmp_str_buf_print (
645
+ &buf, " (%s)" , __kmp_hw_get_core_type_string (hw_threads[i].core_type ));
588
646
KMP_INFORM (OSProcMapToPack, env_var, hw_threads[i].os_id , buf.str );
589
647
}
590
648
@@ -1782,6 +1840,16 @@ static bool __kmp_affinity_create_apicid_map(kmp_i18n_id_t *const msg_id) {
1782
1840
return true ;
1783
1841
}
1784
1842
1843
+ // Hybrid cpu detection using CPUID.1A
1844
+ // Thread should be pinned to processor already
1845
+ static void __kmp_get_hybrid_info (kmp_hw_core_type_t *type,
1846
+ unsigned *native_model_id) {
1847
+ kmp_cpuid buf;
1848
+ __kmp_x86_cpuid (0x1a , 0 , &buf);
1849
+ *type = (kmp_hw_core_type_t )__kmp_extract_bits<24 , 31 >(buf.eax );
1850
+ *native_model_id = __kmp_extract_bits<0 , 23 >(buf.eax );
1851
+ }
1852
+
1785
1853
// Intel(R) microarchitecture code name Nehalem, Dunnington and later
1786
1854
// architectures support a newer interface for specifying the x2APIC Ids,
1787
1855
// based on CPUID.B or CPUID.1F
@@ -2051,6 +2119,13 @@ static bool __kmp_affinity_create_x2apicid_map(kmp_i18n_id_t *const msg_id) {
2051
2119
hw_thread.ids [idx] >>= my_levels[j - 1 ].mask_width ;
2052
2120
}
2053
2121
}
2122
+ // Hybrid information
2123
+ if (__kmp_is_hybrid_cpu () && highest_leaf >= 0x1a ) {
2124
+ kmp_hw_core_type_t type;
2125
+ unsigned native_model_id;
2126
+ __kmp_get_hybrid_info (&type, &native_model_id);
2127
+ hw_thread.core_type = type;
2128
+ }
2054
2129
hw_thread_index++;
2055
2130
}
2056
2131
KMP_ASSERT (hw_thread_index > 0 );
0 commit comments