[OpenMP][host runtime] Add initial hybrid CPU support

jpeyton52 · jpeyton52 · commit acb3b187c4c8 · 2021-10-14T16:49:42.000-05:00
Detect, through CPUID.1A, and show user different core types through KMP_AFFINITY=verbose mechanism. Offer future runtime optimizations __kmp_is_hybrid_cpu() to know whether running on a hybrid system or not. Differential Revision: https://reviews.llvm.org/D110435
diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt
@@ -360,6 +360,7 @@ OmptOutdatedWorkshare        "OMPT: Cannot determine workshare type; using the d
 OmpNoAllocator               "Allocator %1$s is not available, will use default allocator."
 TopologyGeneric              "%1$s: %2$s (%3$d total cores)"
 AffGranularityBad            "%1$s: granularity setting: %2$s does not exist in topology.  Using granularity=%3$s instead."
+TopologyHybrid               "%1$s: hybrid core type detected: %2$d %3$s cores."
 
 # --- OpenMP errors detected at runtime ---
 #
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
@@ -1222,7 +1222,8 @@ typedef struct kmp_cpuid {
 typedef struct kmp_cpuinfo_flags_t {
   unsigned sse2 : 1; // 0 if SSE2 instructions are not supported, 1 otherwise.
   unsigned rtm : 1; // 0 if RTM instructions are not supported, 1 otherwise.
-  unsigned reserved : 30; // Ensure size of 32 bits
+  unsigned hybrid : 1;
+  unsigned reserved : 29; // Ensure size of 32 bits
 } kmp_cpuinfo_flags_t;
 
 typedef struct kmp_cpuinfo {
@@ -2984,6 +2985,9 @@ extern int __kmp_storage_map_verbose_specified;
 
 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
 extern kmp_cpuinfo_t __kmp_cpuinfo;
+static inline bool __kmp_is_hybrid_cpu() { return __kmp_cpuinfo.flags.hybrid; }
+#else
+static inline bool __kmp_is_hybrid_cpu() { return false; }
 #endif
 
 extern volatile int __kmp_init_serial;
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
@@ -123,6 +123,20 @@ const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural) {
   return ((plural) ? "unknowns" : "unknown");
 }
 
+const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type) {
+  switch (type) {
+  case KMP_HW_CORE_TYPE_UNKNOWN:
+    return "unknown";
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+  case KMP_HW_CORE_TYPE_ATOM:
+    return "Intel Atom(R) processor";
+  case KMP_HW_CORE_TYPE_CORE:
+    return "Intel(R) Core(TM) processor";
+#endif
+  }
+  return "unknown";
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // kmp_hw_thread_t methods
 int kmp_hw_thread_t::compare_ids(const void *a, const void *b) {
@@ -174,6 +188,9 @@ void kmp_hw_thread_t::print() const {
   for (int i = 0; i < depth; ++i) {
     printf("%4d ", ids[i]);
   }
+  if (core_type != KMP_HW_CORE_TYPE_UNKNOWN) {
+    printf(" (%s)", __kmp_hw_get_core_type_string(core_type));
+  }
   printf("\n");
 }
 
@@ -298,13 +315,20 @@ void kmp_topology_t::_set_last_level_cache() {
 void kmp_topology_t::_gather_enumeration_information() {
   int previous_id[KMP_HW_LAST];
   int max[KMP_HW_LAST];
+  int previous_core_id = kmp_hw_thread_t::UNKNOWN_ID;
 
   for (int i = 0; i < depth; ++i) {
     previous_id[i] = kmp_hw_thread_t::UNKNOWN_ID;
     max[i] = 0;
     count[i] = 0;
     ratio[i] = 0;
   }
+  if (__kmp_is_hybrid_cpu()) {
+    for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
+      core_types_count[i] = 0;
+      core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
+    }
+  }
   for (int i = 0; i < num_hw_threads; ++i) {
     kmp_hw_thread_t &hw_thread = hw_threads[i];
     for (int layer = 0; layer < depth; ++layer) {
@@ -326,6 +350,15 @@ void kmp_topology_t::_gather_enumeration_information() {
     for (int layer = 0; layer < depth; ++layer) {
       previous_id[layer] = hw_thread.ids[layer];
     }
+    // Figure out the number of each core type for hybrid CPUs
+    if (__kmp_is_hybrid_cpu()) {
+      int core_level = get_level(KMP_HW_CORE);
+      if (core_level != -1) {
+        if (hw_thread.ids[core_level] != previous_core_id)
+          _increment_core_type(hw_thread.core_type);
+        previous_core_id = hw_thread.ids[core_level];
+      }
+    }
   }
   for (int layer = 0; layer < depth; ++layer) {
     if (max[layer] > ratio[layer])
@@ -478,6 +511,19 @@ void kmp_topology_t::dump() const {
   }
   printf("\n");
 
+  printf("* core_types:\n");
+  for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
+    if (core_types[i] != KMP_HW_CORE_TYPE_UNKNOWN) {
+      printf("    %d %s core%c\n", core_types_count[i],
+             __kmp_hw_get_core_type_string(core_types[i]),
+             ((core_types_count[i] > 1) ? 's' : ' '));
+    } else {
+      if (i == 0)
+        printf("No hybrid information available\n");
+      break;
+    }
+  }
+
   printf("* equivalent map:\n");
   KMP_FOREACH_HW_TYPE(i) {
     const char *key = __kmp_hw_get_keyword(i);
@@ -571,6 +617,15 @@ void kmp_topology_t::print(const char *env_var) const {
   }
   KMP_INFORM(TopologyGeneric, env_var, buf.str, ncores);
 
+  if (__kmp_is_hybrid_cpu()) {
+    for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
+      if (core_types[i] == KMP_HW_CORE_TYPE_UNKNOWN)
+        break;
+      KMP_INFORM(TopologyHybrid, env_var, core_types_count[i],
+                 __kmp_hw_get_core_type_string(core_types[i]));
+    }
+  }
+
   if (num_hw_threads <= 0) {
     __kmp_str_buf_free(&buf);
     return;
@@ -585,6 +640,9 @@ void kmp_topology_t::print(const char *env_var) const {
       __kmp_str_buf_print(&buf, "%s ", __kmp_hw_get_catalog_string(type));
       __kmp_str_buf_print(&buf, "%d ", hw_threads[i].ids[level]);
     }
+    if (__kmp_is_hybrid_cpu())
+      __kmp_str_buf_print(
+          &buf, "(%s)", __kmp_hw_get_core_type_string(hw_threads[i].core_type));
     KMP_INFORM(OSProcMapToPack, env_var, hw_threads[i].os_id, buf.str);
   }
 
@@ -1782,6 +1840,16 @@ static bool __kmp_affinity_create_apicid_map(kmp_i18n_id_t *const msg_id) {
   return true;
 }
 
+// Hybrid cpu detection using CPUID.1A
+// Thread should be pinned to processor already
+static void __kmp_get_hybrid_info(kmp_hw_core_type_t *type,
+                                  unsigned *native_model_id) {
+  kmp_cpuid buf;
+  __kmp_x86_cpuid(0x1a, 0, &buf);
+  *type = (kmp_hw_core_type_t)__kmp_extract_bits<24, 31>(buf.eax);
+  *native_model_id = __kmp_extract_bits<0, 23>(buf.eax);
+}
+
 // Intel(R) microarchitecture code name Nehalem, Dunnington and later
 // architectures support a newer interface for specifying the x2APIC Ids,
 // based on CPUID.B or CPUID.1F
@@ -2051,6 +2119,13 @@ static bool __kmp_affinity_create_x2apicid_map(kmp_i18n_id_t *const msg_id) {
         hw_thread.ids[idx] >>= my_levels[j - 1].mask_width;
       }
     }
+    // Hybrid information
+    if (__kmp_is_hybrid_cpu() && highest_leaf >= 0x1a) {
+      kmp_hw_core_type_t type;
+      unsigned native_model_id;
+      __kmp_get_hybrid_info(&type, &native_model_id);
+      hw_thread.core_type = type;
+    }
     hw_thread_index++;
   }
   KMP_ASSERT(hw_thread_index > 0);
diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h
@@ -598,6 +598,17 @@ class KMPNativeAffinity : public KMPAffinity {
 #endif /* KMP_OS_WINDOWS */
 #endif /* KMP_AFFINITY_SUPPORTED */
 
+typedef enum kmp_hw_core_type_t {
+  KMP_HW_CORE_TYPE_UNKNOWN = 0x0,
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+  KMP_HW_CORE_TYPE_ATOM = 0x20,
+  KMP_HW_CORE_TYPE_CORE = 0x40,
+  KMP_HW_MAX_NUM_CORE_TYPES = 3,
+#else
+  KMP_HW_MAX_NUM_CORE_TYPES = 1,
+#endif
+} kmp_hw_core_type_t;
+
 class kmp_hw_thread_t {
 public:
   static const int UNKNOWN_ID = -1;
@@ -607,11 +618,14 @@ class kmp_hw_thread_t {
   int sub_ids[KMP_HW_LAST];
   bool leader;
   int os_id;
+  kmp_hw_core_type_t core_type;
+
   void print() const;
   void clear() {
     for (int i = 0; i < (int)KMP_HW_LAST; ++i)
       ids[i] = UNKNOWN_ID;
     leader = false;
+    core_type = KMP_HW_CORE_TYPE_UNKNOWN;
   }
 };
 
@@ -637,6 +651,11 @@ class kmp_topology_t {
   // Storage containing the absolute number of each topology layer
   int *count;
 
+  // Storage containing the core types and the number of
+  // each core type for hybrid processors
+  kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
+  int core_types_count[KMP_HW_MAX_NUM_CORE_TYPES];
+
   // The hardware threads array
   // hw_threads is num_hw_threads long
   // Each hw_thread's ids and sub_ids are depth deep
@@ -675,6 +694,20 @@ class kmp_topology_t {
   // Set the last level cache equivalent type
   void _set_last_level_cache();
 
+  // Increments the number of cores of type 'type'
+  void _increment_core_type(kmp_hw_core_type_t type) {
+    for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
+      if (core_types[i] == KMP_HW_CORE_TYPE_UNKNOWN) {
+        core_types[i] = type;
+        core_types_count[i] = 1;
+        break;
+      } else if (core_types[i] == type) {
+        core_types_count[i]++;
+        break;
+      }
+    }
+  }
+
 public:
   // Force use of allocate()/deallocate()
   kmp_topology_t() = delete;
diff --git a/openmp/runtime/src/kmp_utility.cpp b/openmp/runtime/src/kmp_utility.cpp
@@ -248,13 +248,19 @@ void __kmp_query_cpuid(kmp_cpuinfo_t *p) {
     }
 #endif
     p->flags.rtm = 0;
+    p->flags.hybrid = 0;
     if (max_arg > 7) {
       /* RTM bit CPUID.07:EBX, bit 11 */
+      /* HYRBID bit CPUID.07:EDX, bit 15 */
       __kmp_x86_cpuid(7, 0, &buf);
       p->flags.rtm = (buf.ebx >> 11) & 1;
+      p->flags.hybrid = (buf.edx >> 15) & 1;
       if (p->flags.rtm) {
         KA_TRACE(trace_level, (" RTM"));
       }
+      if (p->flags.hybrid) {
+        KA_TRACE(trace_level, (" HYBRID"));
+      }
     }
   }
 

Original file line number	Diff line number	Diff line change
`@@ -360,6 +360,7 @@ OmptOutdatedWorkshare "OMPT: Cannot determine workshare type; using the d`
`360`	`360`	`OmpNoAllocator "Allocator %1$s is not available, will use default allocator."`
`361`	`361`	`TopologyGeneric "%1$s: %2$s (%3$d total cores)"`
`362`	`362`	`AffGranularityBad "%1$s: granularity setting: %2$s does not exist in topology. Using granularity=%3$s instead."`
	`363`	`+TopologyHybrid "%1$s: hybrid core type detected: %2$d %3$s cores."`
`363`	`364`
`364`	`365`	`# --- OpenMP errors detected at runtime ---`
`365`	`366`	`#`
Original file line number	Diff line number	Diff line change
`@@ -248,13 +248,19 @@ void __kmp_query_cpuid(kmp_cpuinfo_t *p) {`
`248`	`248`	`}`
`249`	`249`	`#endif`
`250`	`250`	`p->flags.rtm = 0;`
	`251`	`+ p->flags.hybrid = 0;`
`251`	`252`	`if (max_arg > 7) {`
`252`	`253`	`/* RTM bit CPUID.07:EBX, bit 11 */`
	`254`	`+ /* HYRBID bit CPUID.07:EDX, bit 15 */`
`253`	`255`	`__kmp_x86_cpuid(7, 0, &buf);`
`254`	`256`	`p->flags.rtm = (buf.ebx >> 11) & 1;`
	`257`	`+ p->flags.hybrid = (buf.edx >> 15) & 1;`
`255`	`258`	`if (p->flags.rtm) {`
`256`	`259`	`KA_TRACE(trace_level, (" RTM"));`
`257`	`260`	`}`
	`261`	`+ if (p->flags.hybrid) {`
	`262`	`+ KA_TRACE(trace_level, (" HYBRID"));`
	`263`	`+ }`
`258`	`264`	`}`
`259`	`265`	`}`
`260`	`266`