diff mbox

[gomp4] backport nvptx_exec cleanups

Message ID a6208784-172f-b48c-4d5e-450c57b9e630@codesourcery.com
State New
Headers show

Commit Message

Cesar Philippidis Nov. 7, 2016, 8:36 p.m. UTC
gomp-4_0-branch already contains the default nvptx runtime enhancements
that was recently applied to trunk
<https://gcc.gnu.org/ml/gcc-patches/2016-11/msg00252.html>. However, I
made some tweaks in trunk involving both pthread locking and formatting
changes, which this patch backports.

I've applied this patch to gomp-4_0-branch.

Cesar
diff mbox

Patch

2016-11-07  Cesar Philippidis  <cesar@codesourcery.com>

	libgomp/
	Backport from trunk
	2016-11-02  Cesar Philippidis  <cesar@codesourcery.com>
		    Nathan Sidwell  <nathan@acm.org>

	* plugin/plugin-nvptx.c (nvptx_exec): Interrogate board attributes
	to determine default geometry.

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index bd18418..2e7b020 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -910,10 +910,11 @@  nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
 	 variable to specify runtime defaults. */
       static int default_dims[GOMP_DIM_MAX];
 
+      pthread_mutex_lock (&ptx_dev_lock);
       if (!default_dims[0])
 	{
 	  /* We only read the environment variable once.  You can't
-	     change it in the middle of execution.  The sytntax  is
+	     change it in the middle of execution.  The syntax  is
 	     the same as for the -fopenacc-dim compilation option.  */
 	  const char *env_var = getenv ("GOMP_OPENACC_DIM");
 	  if (env_var)
@@ -942,15 +943,17 @@  nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
 	  CUdevice dev = nvptx_thread()->ptx_dev->dev;
 	  /* 32 is the default for known hardware.  */
 	  int gang = 0, worker = 32, vector = 32;
+	  CUdevice_attribute cu_tpb, cu_ws, cu_mpc, cu_tpm;
 
-	  if (CUDA_SUCCESS == cuDeviceGetAttribute
-	      (&block_size, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, dev)
-	      && CUDA_SUCCESS == cuDeviceGetAttribute
-	      (&warp_size, CU_DEVICE_ATTRIBUTE_WARP_SIZE, dev)
-	      && CUDA_SUCCESS == cuDeviceGetAttribute
-	      (&dev_size, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev)
-	      && CUDA_SUCCESS == cuDeviceGetAttribute
-	      (&cpu_size, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, dev))
+	  cu_tpb = CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK;
+	  cu_ws = CU_DEVICE_ATTRIBUTE_WARP_SIZE;
+	  cu_mpc = CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT;
+	  cu_tpm  = CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR;
+
+	  if (cuDeviceGetAttribute (&block_size, cu_tpb, dev) == CUDA_SUCCESS
+	      && cuDeviceGetAttribute (&warp_size, cu_ws, dev) == CUDA_SUCCESS
+	      && cuDeviceGetAttribute (&dev_size, cu_mpc, dev) == CUDA_SUCCESS
+	      && cuDeviceGetAttribute (&cpu_size, cu_tpm, dev)  == CUDA_SUCCESS)
 	    {
 	      GOMP_PLUGIN_debug (0, " warp_size=%d, block_size=%d,"
 				 " dev_size=%d, cpu_size=%d\n",
@@ -980,6 +983,7 @@  nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
 			     default_dims[GOMP_DIM_WORKER],
 			     default_dims[GOMP_DIM_VECTOR]);
 	}
+      pthread_mutex_unlock (&ptx_dev_lock);
 
       for (i = 0; i != GOMP_DIM_MAX; i++)
 	if (!dims[i])