@@ -2026,23 +2026,89 @@ pi_result cuda_piProgramGetInfo(pi_program program, pi_program_info param_name,
2026
2026
return {};
2027
2027
}
2028
2028
2029
- pi_result cuda_piProgramLink ( // TODO: change interface to return error code
2030
- pi_context context, pi_uint32 num_devices, const pi_device *device_list,
2031
- const char *options, pi_uint32 num_input_programs,
2032
- const pi_program *input_programs,
2033
- void (*pfn_notify)(pi_program program, void *user_data), void *user_data,
2034
- pi_program *ret_program) {
2035
- cl::sycl::detail::pi::die (" cuda_piProgramLink not implemented" );
2036
- return {};
2029
+ pi_result cuda_piProgramLink (pi_context context, pi_uint32 num_devices,
2030
+ const pi_device *device_list, const char *options,
2031
+ pi_uint32 num_input_programs,
2032
+ const pi_program *input_programs,
2033
+ void (*pfn_notify)(pi_program program,
2034
+ void *user_data),
2035
+ void *user_data, pi_program *ret_program) {
2036
+
2037
+ assert (ret_program != nullptr );
2038
+ assert (num_devices == 1 || num_devices == 0 );
2039
+ assert (device_list != nullptr || num_devices == 0 );
2040
+ assert (pfn_notify == nullptr );
2041
+ assert (user_data == nullptr );
2042
+ pi_result retError = PI_SUCCESS;
2043
+
2044
+ try {
2045
+ ScopedContext active (context);
2046
+
2047
+ CUlinkState state;
2048
+ std::unique_ptr<_pi_program> retProgram{new _pi_program{context}};
2049
+
2050
+ // TODO: Linker options
2051
+ retError = PI_CHECK_ERROR (cuLinkCreate (0 , nullptr , nullptr , &state));
2052
+ try {
2053
+ for (size_t i = 0 ; i < num_input_programs; ++i) {
2054
+ pi_program program = input_programs[i];
2055
+ retError = PI_CHECK_ERROR (cuLinkAddData (
2056
+ state, CU_JIT_INPUT_PTX, const_cast <char *>(program->source_ ),
2057
+ program->sourceLength_ , nullptr , 0 , nullptr , nullptr ));
2058
+ }
2059
+ void *cubin = nullptr ;
2060
+ size_t cubinSize = 0 ;
2061
+ retError = PI_CHECK_ERROR (cuLinkComplete (state, &cubin, &cubinSize));
2062
+
2063
+ retError = retProgram->create_from_source (
2064
+ static_cast <const char *>(cubin), cubinSize);
2065
+
2066
+ if (retError != PI_SUCCESS) {
2067
+ return retError;
2068
+ }
2069
+
2070
+ retError = retProgram->build_program (options);
2071
+
2072
+ if (retError != PI_SUCCESS) {
2073
+ return retError;
2074
+ }
2075
+ } catch (...) {
2076
+ // Upon error attempt cleanup
2077
+ PI_CHECK_ERROR (cuLinkDestroy (state));
2078
+ throw ;
2079
+ }
2080
+
2081
+ retError = PI_CHECK_ERROR (cuLinkDestroy (state));
2082
+ *ret_program = retProgram.release ();
2083
+
2084
+ } catch (pi_result err) {
2085
+ retError = err;
2086
+ }
2087
+ return retError;
2037
2088
}
2038
2089
2039
2090
pi_result cuda_piProgramCompile (
2040
2091
pi_program program, pi_uint32 num_devices, const pi_device *device_list,
2041
2092
const char *options, pi_uint32 num_input_headers,
2042
2093
const pi_program *input_headers, const char **header_include_names,
2043
2094
void (*pfn_notify)(pi_program program, void *user_data), void *user_data) {
2044
- cl::sycl::detail::pi::die (" cuda_piProgramCompile not implemented" );
2045
- return {};
2095
+ assert (program != nullptr );
2096
+ assert (num_devices == 1 || num_devices == 0 );
2097
+ assert (device_list != nullptr || num_devices == 0 );
2098
+ assert (pfn_notify == nullptr );
2099
+ assert (user_data == nullptr );
2100
+ assert (num_input_headers == 0 );
2101
+ pi_result retError = PI_SUCCESS;
2102
+
2103
+ try {
2104
+ ScopedContext active (program->get_context ());
2105
+
2106
+ program->build_program (options);
2107
+
2108
+ } catch (pi_result err) {
2109
+ retError = err;
2110
+ }
2111
+ return retError;
2046
2112
}
2047
2113
2048
2114
pi_result cuda_piProgramGetBuildInfo (pi_program program, pi_device device,
0 commit comments