So this morning I thought, how hard can it be to write my own specialized preprocessor that understands everything needed to correctly emit multiple tailored kernels. And I had it working in less than two hours. Rah rah, sis boom bah! If I want to, I can straightforwardly make and maintain an arbitrary number of variants, for example, to support all the radiuses one could ever want.
See below how simple it is. The prefix file is the stuff at the top of the output .cu file that is not repeated. The input file specifies the kernel template that will be repeated with substitutions. For example, |NAME| gets replaced with NLM, NLM2, ... Some of the variant replacements are the same for all variants. I do it that way to allow for the variants to diverge in the future. Don't get picky on my code; I just dashed it off quick and dirty.
Code: Select all
#include <stdio.h>
#include <string>
void replaceAll(std::string& str, const std::string& from, const std::string& to)
{
if (from.empty())
return;
size_t start_pos = 0;
while ((start_pos = str.find(from, start_pos)) != std::string::npos)
{
str.replace(start_pos, from.length(), to);
start_pos += to.length(); // In case 'to' contains 'from', like replacing 'x' with 'yx'
}
}
int main(int argc, char *argv[])
{
int i;
FILE* fp, * wfp;
char line[1024];
#define NUM_VARIANTS 6
const char *name[NUM_VARIANTS ] =
{
"NLM", "NLM2", "NLM3", "NLM_hdr", "NLM2_hdr", "NLM3_hdr"
};
const char* window_radius[NUM_VARIANTS ] =
{
"4", "6", "8", "4", "6", "8"
};
const char* block_radius[NUM_VARIANTS ] =
{
"2", "2", "2", "2", "2", "2"
};
const char* weight_threshold[NUM_VARIANTS ] =
{
"0.10f", "0.10f", "0.10f", "0.10f", "0.10f", "0.10f"
};
const char* threshold[NUM_VARIANTS ] =
{
"0.10f", "0.10f", "0.10f", "0.10f", "0.10f", "0.10f"
};
char inv_window_area[NUM_VARIANTS ][128];
const char* type[NUM_VARIANTS ] =
{
"unsigned char", "unsigned char", "unsigned char", "unsigned int", "unsigned int", "unsigned int"
};
const char* factor1[NUM_VARIANTS ] =
{
"256.0f", "256.0f", "256.0f", "65536.0f", "65536.0f", "65536.0f"
};
const char* factor2[NUM_VARIANTS ] =
{
"255.0f", "255.0f", "255.0f", "65535.0f", "65535.0f", "65535.0f"
};
for (i = 0; i < NUM_VARIANTS ; i++)
{
double tmp;
tmp = (1.0f / ((2 * atof(window_radius[i]) + 1) * (2 * atof(window_radius[i]) + 1)));
sprintf_s(inv_window_area[i], 128, "%.8f", tmp);
}
fopen_s(&wfp, argv[3], "w");
if (wfp == NULL)
{
printf("Couldn't open output file %s.\n", argv[2]);
return 1;
}
fopen_s(&fp, argv[1], "r");
if (fp == NULL)
{
printf("Couldn't open prefix file %s.\n", argv[1]);
return 1;
}
while (fgets(line, 1024, fp) != NULL)
{
fputs(line, wfp);
}
fputs("\n", wfp);
fclose(fp);
for (i = 0; i < NUM_VARIANTS ; i++)
{
fopen_s(&fp, argv[2], "r");
if (fp == NULL)
{
printf("Couldn't open input file %s.\n", argv[1]);
return 1;
}
while (fgets(line, 1024, fp) != NULL)
{
std::string tmp = line;
replaceAll(tmp, "|NAME|", name[i]);
replaceAll(tmp, "|WINDOW_RADIUS|", window_radius[i]);
replaceAll(tmp, "|BLOCK_RADIUS|", block_radius[i]);
replaceAll(tmp, "|WEIGHT_THRESHOLD|", weight_threshold[i]);
replaceAll(tmp, "|THRESHOLD|", threshold[i]);
replaceAll(tmp, "|INV_WINDOW_AREA|", inv_window_area[i]);
replaceAll(tmp, "|TYPE|", type[i]);
replaceAll(tmp, "|FACTOR1|", factor1[i]);
replaceAll(tmp, "|FACTOR2|", factor2[i]);
strcpy_s(line, 1024, tmp.c_str());
fputs(line, wfp);
}
fputs("\n", wfp);
fclose(fp);
}
fclose(wfp);
return 0;
}