Eliminate redundant memory and enhance memory usage
1. Eliminated Redundant Memory Usage - Before: Maintained both X (float) and Xs (string) vectors simultaneously → 2x memory usage - After: Use temporary categoricalData only during processing, deallocated automatically → ~50% memory reduction 2. Implemented Memory Pre-allocation - Before: Vectors grew dynamically causing memory fragmentation - After: X.assign(numFeatures, std::vector<float>(numSamples)) pre-allocates all memory upfront - Benefit: Eliminates reallocation overhead and memory fragmentation 3. Added Robust Exception Handling - Before: stof(token) could crash on malformed data - After: Wrapped in try-catch with descriptive error messages - Improvement: Prevents crashes and provides debugging information 4. Optimized String Processing - Before: type += type_w + " " caused O(n²) string concatenation - After: Used std::ostringstream for efficient string building - Benefit: Better performance on files with complex attribute types
This commit is contained in:
@@ -34,15 +34,16 @@ TEST_CASE("Load Test", "[ArffFiles]")
|
||||
REQUIRE(arff.getLines().size() == 150);
|
||||
REQUIRE(arff.getLines()[0] == "5.1,3.5,1.4,0.2,Iris-setosa");
|
||||
REQUIRE(arff.getLines()[149] == "5.9,3.0,5.1,1.8,Iris-virginica");
|
||||
REQUIRE(arff.getX().size() == 4);
|
||||
REQUIRE(arff.getX().size() == 4); // 4 features
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
REQUIRE(arff.getX()[i].size() == 150);
|
||||
REQUIRE(arff.getX()[i].size() == 150); // 150 samples per feature
|
||||
}
|
||||
// Test first 4 samples: X[feature][sample]
|
||||
auto expected = std::vector<std::vector<float>>{
|
||||
{5.1, 4.9, 4.7, 4.6},
|
||||
{3.5, 3.0, 3.2, 3.1},
|
||||
{1.4, 1.4, 1.3, 1.5},
|
||||
{0.2, 0.2, 0.2, 0.2}
|
||||
{5.1, 4.9, 4.7, 4.6}, // Feature 0 (sepallength)
|
||||
{3.5, 3.0, 3.2, 3.1}, // Feature 1 (sepalwidth)
|
||||
{1.4, 1.4, 1.3, 1.5}, // Feature 2 (petallength)
|
||||
{0.2, 0.2, 0.2, 0.2} // Feature 3 (petalwidth)
|
||||
};
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < 4; ++j)
|
||||
@@ -79,15 +80,16 @@ TEST_CASE("Load with class name", "[ArffFiles]")
|
||||
REQUIRE(arff.getLines().size() == 214);
|
||||
REQUIRE(arff.getLines()[0] == "1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0,0,'build wind float'");
|
||||
REQUIRE(arff.getLines()[149] == "1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0,0,'build wind non-float'");
|
||||
REQUIRE(arff.getX().size() == 9);
|
||||
REQUIRE(arff.getX().size() == 9); // 9 features
|
||||
for (int i = 0; i < 9; ++i) {
|
||||
REQUIRE(arff.getX()[i].size() == 214);
|
||||
REQUIRE(arff.getX()[i].size() == 214); // 214 samples per feature
|
||||
}
|
||||
// Test first 4 samples: X[feature][sample]
|
||||
std::vector<std::vector<float>> expected = {
|
||||
{1.51793, 1.51643, 1.51793, 1.51299},
|
||||
{12.79, 12.16, 13.21, 14.4 },
|
||||
{3.5, 3.52, 3.48, 1.74},
|
||||
{1.12, 1.35, 1.41, 1.54}
|
||||
{1.51793, 1.51643, 1.51793, 1.51299}, // Feature 0
|
||||
{12.79, 12.16, 13.21, 14.4}, // Feature 1
|
||||
{3.5, 3.52, 3.48, 1.74}, // Feature 2
|
||||
{1.12, 1.35, 1.41, 1.54} // Feature 3
|
||||
};
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < 4; ++j)
|
||||
|
Reference in New Issue
Block a user