diff --git a/src/Profile/TauGenericAPI.cpp b/src/Profile/TauGenericAPI.cpp
index f8680b2a8..1f3e0a59e 100644
--- a/src/Profile/TauGenericAPI.cpp
+++ b/src/Profile/TauGenericAPI.cpp
@@ -71,11 +71,9 @@ void ps_tool_register_thread(void) {
 void Tau_profile_exit_all_threads();
 
 void ps_tool_finalize(void) {
-#ifndef TAU_MPI
-    //Tau_destructor_trigger();
-    //Tau_profile_exit_all_threads();
-    Tau_exit("stub exiting");
-#endif
+    // Do nothing! it's tempting to call Tau_exit() here, but with GPU
+    // support we not get handled correctly. instead, just let the TAU
+    // main wrapper handle the exit.
 }
 
 void ps_tool_pause_measurement(void) {
diff --git a/src/Profile/TauGpuAdapterOpenCL.cpp b/src/Profile/TauGpuAdapterOpenCL.cpp
index 7b5aa08cc..ff36fd722 100644
--- a/src/Profile/TauGpuAdapterOpenCL.cpp
+++ b/src/Profile/TauGpuAdapterOpenCL.cpp
@@ -308,12 +308,20 @@ OpenCLGpuEvent * Tau_opencl_retrieve_gpu(cl_command_queue q)
   if (err != CL_SUCCESS)
   {	printf("error in clGetCommandQueueInfo CONTEXT.\n"); }
 
-  //err = clGetDeviceInfo(id, CL_DEVICE_VENDOR_ID, sizeof(cl_uint), &vendor, NULL);
+  char deviceName[256];
+  char deviceVendor[256];
 
+  err = clGetDeviceInfo(id, CL_DEVICE_NAME, 256, deviceName, NULL);
+  if (err != CL_SUCCESS)
+  {	printf("error in clGetDeviceInfo CL_DEVICE_NAME.\n"); }
+
+  err = clGetDeviceInfo(id, CL_DEVICE_VENDOR, 256, deviceVendor, NULL);
+  if (err != CL_SUCCESS)
+  {	printf("error in clGetDeviceInfo CL_DEVICE_VENDOR.\n"); }
 
-  printf("device id: %d.\n", id);
-  printf("command id: %lld.\n", q);
-  printf("vendor id: %d.\n", vendor);
+  printf("device name: %s\n", deviceName);
+  printf("vendor name: %s\n", deviceVendor);
+  printf("command id: %lld\n", q);
   double sync_offset = Tau_opencl_sync_clocks(q, context);
 #if defined(PTHREADS) || defined(TAU_OPENMP)
   // Create a virtual thread for this command queue