Fix input data dimensions
This commit is contained in:
@@ -21,25 +21,27 @@ namespace pywrap {
|
||||
}
|
||||
|
||||
// Ensure tensor is contiguous and in the expected format
|
||||
X = X.contiguous();
|
||||
auto X_copy = X.contiguous();
|
||||
|
||||
if (X.dtype() != torch::kFloat32) {
|
||||
if (X_copy.dtype() != torch::kFloat32) {
|
||||
throw std::runtime_error("tensor2numpy: Expected float32 tensor");
|
||||
}
|
||||
|
||||
int64_t m = X.size(0);
|
||||
int64_t n = X.size(1);
|
||||
// Transpose from [features, samples] to [samples, features] for Python classifiers
|
||||
X_copy = X_copy.transpose(0, 1);
|
||||
|
||||
int64_t m = X_copy.size(0);
|
||||
int64_t n = X_copy.size(1);
|
||||
|
||||
// Calculate correct strides in bytes
|
||||
int64_t element_size = X.element_size();
|
||||
int64_t stride0 = X.stride(0) * element_size;
|
||||
int64_t stride1 = X.stride(1) * element_size;
|
||||
int64_t element_size = X_copy.element_size();
|
||||
int64_t stride0 = X_copy.stride(0) * element_size;
|
||||
int64_t stride1 = X_copy.stride(1) * element_size;
|
||||
|
||||
auto Xn = np::from_data(X.data_ptr(), np::dtype::get_builtin<float>(),
|
||||
auto Xn = np::from_data(X_copy.data_ptr(), np::dtype::get_builtin<float>(),
|
||||
bp::make_tuple(m, n),
|
||||
bp::make_tuple(stride0, stride1),
|
||||
bp::object());
|
||||
// Don't transpose - tensor is already in correct [samples, features] format
|
||||
return Xn;
|
||||
}
|
||||
np::ndarray tensorInt2numpy(torch::Tensor& X)
|
||||
@@ -50,25 +52,27 @@ namespace pywrap {
|
||||
}
|
||||
|
||||
// Ensure tensor is contiguous and in the expected format
|
||||
X = X.contiguous();
|
||||
auto X_copy = X.contiguous();
|
||||
|
||||
if (X.dtype() != torch::kInt32) {
|
||||
if (X_copy.dtype() != torch::kInt32) {
|
||||
throw std::runtime_error("tensorInt2numpy: Expected int32 tensor");
|
||||
}
|
||||
|
||||
int64_t m = X.size(0);
|
||||
int64_t n = X.size(1);
|
||||
// Transpose from [features, samples] to [samples, features] for Python classifiers
|
||||
X_copy = X_copy.transpose(0, 1);
|
||||
|
||||
int64_t m = X_copy.size(0);
|
||||
int64_t n = X_copy.size(1);
|
||||
|
||||
// Calculate correct strides in bytes
|
||||
int64_t element_size = X.element_size();
|
||||
int64_t stride0 = X.stride(0) * element_size;
|
||||
int64_t stride1 = X.stride(1) * element_size;
|
||||
int64_t element_size = X_copy.element_size();
|
||||
int64_t stride0 = X_copy.stride(0) * element_size;
|
||||
int64_t stride1 = X_copy.stride(1) * element_size;
|
||||
|
||||
auto Xn = np::from_data(X.data_ptr(), np::dtype::get_builtin<int>(),
|
||||
auto Xn = np::from_data(X_copy.data_ptr(), np::dtype::get_builtin<int>(),
|
||||
bp::make_tuple(m, n),
|
||||
bp::make_tuple(stride0, stride1),
|
||||
bp::object());
|
||||
// Don't transpose - tensor is already in correct [samples, features] format
|
||||
return Xn;
|
||||
}
|
||||
std::pair<np::ndarray, np::ndarray> tensors2numpy(torch::Tensor& X, torch::Tensor& y)
|
||||
@@ -78,10 +82,11 @@ namespace pywrap {
|
||||
throw std::runtime_error("tensors2numpy: Expected 1D y tensor, got " + std::to_string(y.dim()) + "D");
|
||||
}
|
||||
|
||||
// Validate dimensions match
|
||||
if (X.size(0) != y.size(0)) {
|
||||
// Validate dimensions match (X is [features, samples], y is [samples])
|
||||
// X.size(1) is samples, y.size(0) is samples
|
||||
if (X.size(1) != y.size(0)) {
|
||||
throw std::runtime_error("tensors2numpy: X and y dimension mismatch: X[" +
|
||||
std::to_string(X.size(0)) + "], y[" + std::to_string(y.size(0)) + "]");
|
||||
std::to_string(X.size(1)) + "], y[" + std::to_string(y.size(0)) + "]");
|
||||
}
|
||||
|
||||
// Ensure y tensor is contiguous
|
||||
|
@@ -61,23 +61,26 @@ tuple<torch::Tensor, torch::Tensor, std::vector<std::string>, std::string, map<s
|
||||
auto states = map<std::string, std::vector<int>>();
|
||||
if (discretize_dataset) {
|
||||
auto Xr = discretizeDataset(X, y);
|
||||
// Create tensor as [samples, features] not [features, samples]
|
||||
Xd = torch::zeros({ static_cast<int>(Xr[0].size()), static_cast<int>(Xr.size()) }, torch::kInt32);
|
||||
// Create tensor as [features, samples] (bayesnet format)
|
||||
// Xr has same structure as X: Xr[i] is i-th feature, Xr[i].size() is number of samples
|
||||
Xd = torch::zeros({ static_cast<int>(Xr.size()), static_cast<int>(Xr[0].size()) }, torch::kInt32);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
states[features[i]] = std::vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
|
||||
auto item = states.at(features[i]);
|
||||
iota(begin(item), end(item), 0);
|
||||
// Put data as column i (feature i)
|
||||
Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32));
|
||||
// Put data as row i (feature i)
|
||||
Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32));
|
||||
}
|
||||
states[className] = std::vector<int>(*max_element(y.begin(), y.end()) + 1);
|
||||
iota(begin(states.at(className)), end(states.at(className)), 0);
|
||||
} else {
|
||||
// Create tensor as [samples, features] not [features, samples]
|
||||
Xd = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, torch::kFloat32);
|
||||
// Create tensor as [features, samples] (bayesnet format)
|
||||
// X[i] is i-th feature, X[i].size() is number of samples
|
||||
// We want tensor[features, samples], so [X.size(), X[0].size()]
|
||||
Xd = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kFloat32);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
// Put data as column i (feature i)
|
||||
Xd.index_put_({ "...", i }, torch::tensor(X[i]));
|
||||
// Put data as row i (feature i)
|
||||
Xd.index_put_({ i, "..." }, torch::tensor(X[i]));
|
||||
}
|
||||
}
|
||||
return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
|
||||
|
@@ -22,9 +22,10 @@ public:
|
||||
tie(Xt, yt, featurest, classNamet, statest) = loadDataset(file_name, true, discretize);
|
||||
// Xv is always discretized
|
||||
tie(Xv, yv, featuresv, classNamev, statesv) = loadFile(file_name);
|
||||
auto yresized = yt.view({ yt.size(0), 1 });
|
||||
dataset = torch::cat({ Xt, yresized }, 1);
|
||||
nSamples = dataset.size(0);
|
||||
// Xt is [features, samples], yt is [samples], need to reshape y to [1, samples] for concatenation
|
||||
auto yresized = yt.view({ 1, yt.size(0) });
|
||||
dataset = torch::cat({ Xt, yresized }, 0);
|
||||
nSamples = dataset.size(1); // samples is the second dimension now
|
||||
weights = torch::full({ nSamples }, 1.0 / nSamples, torch::kDouble);
|
||||
weightsv = std::vector<double>(nSamples, 1.0 / nSamples);
|
||||
classNumStates = discretize ? statest.at(classNamet).size() : 0;
|
||||
|
Reference in New Issue
Block a user