mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-16 16:05:52 +00:00
Refactor project structure and add Arff load and test
This commit is contained in:
@@ -19,6 +19,7 @@ namespace mdlp {
|
||||
{
|
||||
X = X_;
|
||||
y = y_;
|
||||
cutPoints.clear();
|
||||
if (X.size() != y.size()) {
|
||||
throw invalid_argument("X and y must have the same size");
|
||||
}
|
||||
|
@@ -1,6 +1,5 @@
|
||||
#include "Metrics.h"
|
||||
#include <set>
|
||||
#include <iostream>
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
Metrics::Metrics(labels& y_, indices_t& indices_): y(y_), indices(indices_), numClasses(computeNumClasses(0, indices.size())), entropyCache(cacheEnt_t()), igCache(cacheIg_t())
|
||||
@@ -19,6 +18,8 @@ namespace mdlp {
|
||||
indices = indices_;
|
||||
y = y_;
|
||||
numClasses = computeNumClasses(0, indices.size());
|
||||
entropyCache.clear();
|
||||
igCache.clear();
|
||||
}
|
||||
precision_t Metrics::entropy(size_t start, size_t end)
|
||||
{
|
||||
@@ -50,7 +51,6 @@ namespace mdlp {
|
||||
int nElementsLeft = cut - start, nElementsRight = end - cut;
|
||||
int nElements = end - start;
|
||||
if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
|
||||
cout << "**********Cache IG hit for " << start << " " << end << endl;
|
||||
return igCache[make_tuple(start, cut, end)];
|
||||
}
|
||||
entropyInterval = entropy(start, end);
|
||||
@@ -61,14 +61,4 @@ namespace mdlp {
|
||||
return iGain;
|
||||
}
|
||||
|
||||
}
|
||||
/*
|
||||
cache_t entropyCache;
|
||||
std::map<std::tuple<int, int>, double> c;
|
||||
|
||||
// Set the value at index (3, 5) to 7.8.
|
||||
c[std::make_tuple(3, 5)] = 7.8;
|
||||
|
||||
// Print the value at index (3, 5).
|
||||
std::cout << c[std::make_tuple(3, 5)] << std::endl;
|
||||
*/
|
||||
}
|
117
fimdlp/testcpp/ArffFiles.cpp
Normal file
117
fimdlp/testcpp/ArffFiles.cpp
Normal file
@@ -0,0 +1,117 @@
|
||||
#include "ArffFiles.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
ArffFiles::ArffFiles()
|
||||
{
|
||||
}
|
||||
vector<string> ArffFiles::getLines()
|
||||
{
|
||||
return lines;
|
||||
}
|
||||
unsigned long int ArffFiles::getSize()
|
||||
{
|
||||
return lines.size();
|
||||
}
|
||||
vector<tuple<string, string>> ArffFiles::getAttributes()
|
||||
{
|
||||
return attributes;
|
||||
}
|
||||
string ArffFiles::getClassName()
|
||||
{
|
||||
return className;
|
||||
}
|
||||
string ArffFiles::getClassType()
|
||||
{
|
||||
return classType;
|
||||
}
|
||||
vector<vector<float>>& ArffFiles::getX()
|
||||
{
|
||||
return X;
|
||||
}
|
||||
vector<int>& ArffFiles::getY()
|
||||
{
|
||||
return y;
|
||||
}
|
||||
void ArffFiles::load(string fileName, bool classLast)
|
||||
{
|
||||
ifstream file(fileName);
|
||||
string keyword, attribute, type;
|
||||
if (file.is_open()) {
|
||||
string line;
|
||||
while (getline(file, line)) {
|
||||
if (line[0] == '%' || line.empty() || line == "\r" || line == " ") {
|
||||
continue;
|
||||
}
|
||||
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
|
||||
stringstream ss(line);
|
||||
ss >> keyword >> attribute >> type;
|
||||
attributes.push_back(make_tuple(attribute, type));
|
||||
continue;
|
||||
}
|
||||
if (line[0] == '@') {
|
||||
continue;
|
||||
}
|
||||
lines.push_back(line);
|
||||
}
|
||||
file.close();
|
||||
if (attributes.empty())
|
||||
throw invalid_argument("No attributes found");
|
||||
if (classLast) {
|
||||
className = get<0>(attributes.back());
|
||||
classType = get<1>(attributes.back());
|
||||
attributes.pop_back();
|
||||
} else {
|
||||
className = get<0>(attributes.front());
|
||||
classType = get<1>(attributes.front());
|
||||
attributes.erase(attributes.begin());
|
||||
}
|
||||
generateDataset(classLast);
|
||||
} else
|
||||
throw invalid_argument("Unable to open file");
|
||||
}
|
||||
void ArffFiles::generateDataset(bool classLast)
|
||||
{
|
||||
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
||||
vector<string> yy = vector<string>(lines.size(), "");
|
||||
int labelIndex = classLast ? attributes.size() : 0;
|
||||
for (int i = 0; i < lines.size(); i++) {
|
||||
stringstream ss(lines[i]);
|
||||
string value;
|
||||
int pos = 0, xIndex = 0;
|
||||
while (getline(ss, value, ',')) {
|
||||
if (pos++ == labelIndex) {
|
||||
yy[i] = value;
|
||||
} else {
|
||||
X[xIndex++][i] = stof(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
y = factorize(yy);
|
||||
}
|
||||
string ArffFiles::trim(const string& source)
|
||||
{
|
||||
string s(source);
|
||||
s.erase(0, s.find_first_not_of(" \n\r\t"));
|
||||
s.erase(s.find_last_not_of(" \n\r\t") + 1);
|
||||
return s;
|
||||
}
|
||||
vector<int> ArffFiles::factorize(const vector<string>& labels)
|
||||
{
|
||||
vector<int> yy;
|
||||
yy.reserve(labels.size());
|
||||
map<string, int> labelMap;
|
||||
int i = 0;
|
||||
for (string label : labels) {
|
||||
if (labelMap.find(label) == labelMap.end()) {
|
||||
labelMap[label] = i++;
|
||||
}
|
||||
yy.push_back(labelMap[label]);
|
||||
}
|
||||
return yy;
|
||||
}
|
28
fimdlp/testcpp/ArffFiles.h
Normal file
28
fimdlp/testcpp/ArffFiles.h
Normal file
@@ -0,0 +1,28 @@
|
||||
#ifndef ARFFFILES_H
|
||||
#define ARFFFILES_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
using namespace std;
|
||||
class ArffFiles {
|
||||
private:
|
||||
vector<string> lines;
|
||||
vector<tuple<string, string>> attributes;
|
||||
string className, classType;
|
||||
vector<vector<float>> X;
|
||||
vector<int> y;
|
||||
void generateDataset(bool);
|
||||
public:
|
||||
ArffFiles();
|
||||
void load(string, bool = true);
|
||||
vector<string> getLines();
|
||||
unsigned long int getSize();
|
||||
string getClassName();
|
||||
string getClassType();
|
||||
string trim(const string&);
|
||||
vector<vector<float>>& getX();
|
||||
vector<int>& getY();
|
||||
vector<tuple<string, string>> getAttributes();
|
||||
vector<int> factorize(const vector<string>& labels);
|
||||
};
|
||||
#endif
|
@@ -1,177 +1,177 @@
|
||||
#include "gtest/gtest.h"
|
||||
#include "../Metrics.h"
|
||||
#include "../CPPFImdlp.h"
|
||||
namespace mdlp {
|
||||
class TestFImdlp : public CPPFImdlp, public testing::Test {
|
||||
public:
|
||||
TestFImdlp() : CPPFImdlp(true, 6, true) {}
|
||||
void SetUp()
|
||||
{
|
||||
// 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
|
||||
//(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2)
|
||||
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
fit(X, y);
|
||||
}
|
||||
void setProposal(bool value)
|
||||
{
|
||||
proposal = value;
|
||||
}
|
||||
void initCutPoints()
|
||||
{
|
||||
setCutPoints(cutPoints_t());
|
||||
}
|
||||
void initIndices()
|
||||
{
|
||||
indices = indices_t();
|
||||
}
|
||||
void initDiscretized()
|
||||
{
|
||||
xDiscretized = labels();
|
||||
}
|
||||
void checkSortedVector(samples& X_, indices_t indices_)
|
||||
{
|
||||
X = X_;
|
||||
indices = indices_;
|
||||
indices_t testSortedIndices = sortIndices(X);
|
||||
precision_t prev = X[testSortedIndices[0]];
|
||||
for (auto i = 0; i < X.size(); ++i) {
|
||||
EXPECT_EQ(testSortedIndices[i], indices[i]);
|
||||
EXPECT_LE(prev, X[testSortedIndices[i]]);
|
||||
prev = X[testSortedIndices[i]];
|
||||
}
|
||||
}
|
||||
void checkCutPoints(cutPoints_t& expected)
|
||||
{
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(cutPoints.size(), expectedSize);
|
||||
for (auto i = 0; i < expectedSize; i++) {
|
||||
EXPECT_EQ(cutPoints[i].start, expected[i].start);
|
||||
EXPECT_EQ(cutPoints[i].end, expected[i].end);
|
||||
EXPECT_EQ(cutPoints[i].classNumber, expected[i].classNumber);
|
||||
EXPECT_NEAR(cutPoints[i].fromValue, expected[i].fromValue, precision);
|
||||
EXPECT_NEAR(cutPoints[i].toValue, expected[i].toValue, precision);
|
||||
}
|
||||
}
|
||||
template<typename T, typename A>
|
||||
void checkVectors(std::vector<T, A> const& expected, std::vector<T, A> const& computed)
|
||||
{
|
||||
EXPECT_EQ(expected.size(), computed.size());
|
||||
for (auto i = 0; i < expected.size(); i++) {
|
||||
EXPECT_EQ(expected[i], computed[i]);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
TEST_F(TestFImdlp, FitErrorEmptyDataset)
|
||||
{
|
||||
X = samples();
|
||||
y = labels();
|
||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
}
|
||||
TEST_F(TestFImdlp, FitErrorDifferentSize)
|
||||
{
|
||||
X = { 1, 2, 3 };
|
||||
y = { 1, 2 };
|
||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
}
|
||||
TEST_F(TestFImdlp, SortIndices)
|
||||
{
|
||||
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
||||
checkSortedVector(X, indices);
|
||||
X = { 5.77, 5.88, 5.99 };
|
||||
indices = { 0, 1, 2 };
|
||||
checkSortedVector(X, indices);
|
||||
X = { 5.33, 5.22, 5.11 };
|
||||
indices = { 2, 1, 0 };
|
||||
checkSortedVector(X, indices);
|
||||
}
|
||||
TEST_F(TestFImdlp, EvaluateCutPoint)
|
||||
{
|
||||
cutPoint_t rest, candidate;
|
||||
rest = { 0, 10, -1, -1, 1000 };
|
||||
candidate = { 0, 4, -1, -1, 5.15 };
|
||||
EXPECT_FALSE(evaluateCutPoint(rest, candidate));
|
||||
}
|
||||
TEST_F(TestFImdlp, ComputeCutPointsOriginal)
|
||||
{
|
||||
cutPoints_t expected;
|
||||
expected = {
|
||||
{ 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
|
||||
{ 6, 10, -1, 5.45, 3.4028234663852886e+38 }
|
||||
};
|
||||
setCutPoints(cutPoints_t());
|
||||
computeCutPointsOriginal();
|
||||
checkCutPoints(expected);
|
||||
}
|
||||
TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase)
|
||||
{
|
||||
cutPoints_t expected;
|
||||
expected = {
|
||||
{ 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
|
||||
};
|
||||
X = { 0, 1, 2, 2 };
|
||||
y = { 1, 1, 1, 2 };
|
||||
fit(X, y);
|
||||
computeCutPointsOriginal();
|
||||
checkCutPoints(expected);
|
||||
}
|
||||
TEST_F(TestFImdlp, ComputeCutPointsProposal)
|
||||
{
|
||||
cutPoints_t expected;
|
||||
expected = {
|
||||
{ 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 },
|
||||
{ 6, 9, -1, 5.4, 5.85 },
|
||||
{ 9, 10, -1, 5.85, 3.4028234663852886e+38 }
|
||||
};
|
||||
computeCutPointsProposal();
|
||||
checkCutPoints(expected);
|
||||
}
|
||||
TEST_F(TestFImdlp, ComputeCutPointsProposalGCase)
|
||||
{
|
||||
cutPoints_t expected;
|
||||
expected = {
|
||||
{ 0, 3, -1, -3.4028234663852886e+38, 1.5 },
|
||||
{ 3, 4, -1, 1.5, 3.4028234663852886e+38 }
|
||||
};
|
||||
X = { 0, 1, 2, 2 };
|
||||
y = { 1, 1, 1, 2 };
|
||||
fit(X, y);
|
||||
computeCutPointsProposal();
|
||||
checkCutPoints(expected);
|
||||
}
|
||||
TEST_F(TestFImdlp, DiscretizedValues)
|
||||
{
|
||||
labels computed, expected = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
computed = getDiscretizedValues();
|
||||
checkVectors(expected, computed);
|
||||
}
|
||||
TEST_F(TestFImdlp, GetCutPoints)
|
||||
{
|
||||
samples computed, expected = { 5.15, 5.45, 3.4028234663852886e+38 };
|
||||
computeCutPointsOriginal();
|
||||
computed = getCutPoints();
|
||||
checkVectors(expected, computed);
|
||||
}
|
||||
TEST_F(TestFImdlp, Constructor)
|
||||
{
|
||||
samples X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
labels y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
setProposal(false);
|
||||
fit(X, y);
|
||||
computeCutPointsOriginal();
|
||||
cutPoints_t expected;
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
expected = {
|
||||
{ 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
|
||||
{ 6, 10, -1, 5.45, 3.4028234663852886e+38 }
|
||||
};
|
||||
computed = getCutPoints();
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (auto i = 0; i < expectedSize; i++) {
|
||||
EXPECT_NEAR(computed[i], expected[i].toValue, .00000001);
|
||||
}
|
||||
}
|
||||
}
|
||||
//#include "gtest/gtest.h"
|
||||
//#include "../Metrics.h"
|
||||
//#include "../CPPFImdlp.h"
|
||||
//namespace mdlp {
|
||||
// class TestFImdlp : public CPPFImdlp, public testing::Test {
|
||||
// public:
|
||||
// TestFImdlp() : CPPFImdlp(true, true) {}
|
||||
// void SetUp()
|
||||
// {
|
||||
// // 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
|
||||
// //(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2)
|
||||
// X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
// y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
// fit(X, y);
|
||||
// }
|
||||
// void setProposal(bool value)
|
||||
// {
|
||||
// proposal = value;
|
||||
// }
|
||||
// void initCutPoints()
|
||||
// {
|
||||
// setCutPoints(cutPoints_t());
|
||||
// }
|
||||
// void initIndices()
|
||||
// {
|
||||
// indices = indices_t();
|
||||
// }
|
||||
// void initDiscretized()
|
||||
// {
|
||||
// xDiscretized = labels();
|
||||
// }
|
||||
// void checkSortedVector(samples& X_, indices_t indices_)
|
||||
// {
|
||||
// X = X_;
|
||||
// indices = indices_;
|
||||
// indices_t testSortedIndices = sortIndices(X);
|
||||
// precision_t prev = X[testSortedIndices[0]];
|
||||
// for (auto i = 0; i < X.size(); ++i) {
|
||||
// EXPECT_EQ(testSortedIndices[i], indices[i]);
|
||||
// EXPECT_LE(prev, X[testSortedIndices[i]]);
|
||||
// prev = X[testSortedIndices[i]];
|
||||
// }
|
||||
// }
|
||||
// void checkCutPoints(cutPoints_t& expected)
|
||||
// {
|
||||
// int expectedSize = expected.size();
|
||||
// EXPECT_EQ(cutPoints.size(), expectedSize);
|
||||
// for (auto i = 0; i < expectedSize; i++) {
|
||||
// EXPECT_EQ(cutPoints[i].start, expected[i].start);
|
||||
// EXPECT_EQ(cutPoints[i].end, expected[i].end);
|
||||
// EXPECT_EQ(cutPoints[i].classNumber, expected[i].classNumber);
|
||||
// EXPECT_NEAR(cutPoints[i].fromValue, expected[i].fromValue, precision);
|
||||
// EXPECT_NEAR(cutPoints[i].toValue, expected[i].toValue, precision);
|
||||
// }
|
||||
// }
|
||||
// template<typename T, typename A>
|
||||
// void checkVectors(std::vector<T, A> const& expected, std::vector<T, A> const& computed)
|
||||
// {
|
||||
// EXPECT_EQ(expected.size(), computed.size());
|
||||
// for (auto i = 0; i < expected.size(); i++) {
|
||||
// EXPECT_EQ(expected[i], computed[i]);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// };
|
||||
// TEST_F(TestFImdlp, FitErrorEmptyDataset)
|
||||
// {
|
||||
// X = samples();
|
||||
// y = labels();
|
||||
// EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, FitErrorDifferentSize)
|
||||
// {
|
||||
// X = { 1, 2, 3 };
|
||||
// y = { 1, 2 };
|
||||
// EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, SortIndices)
|
||||
// {
|
||||
// X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
// indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
||||
// checkSortedVector(X, indices);
|
||||
// X = { 5.77, 5.88, 5.99 };
|
||||
// indices = { 0, 1, 2 };
|
||||
// checkSortedVector(X, indices);
|
||||
// X = { 5.33, 5.22, 5.11 };
|
||||
// indices = { 2, 1, 0 };
|
||||
// checkSortedVector(X, indices);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, EvaluateCutPoint)
|
||||
// {
|
||||
// cutPoint_t rest, candidate;
|
||||
// rest = { 0, 10, -1, -1, 1000 };
|
||||
// candidate = { 0, 4, -1, -1, 5.15 };
|
||||
// EXPECT_FALSE(evaluateCutPoint(rest, candidate));
|
||||
// }
|
||||
// TEST_F(TestFImdlp, ComputeCutPointsOriginal)
|
||||
// {
|
||||
// cutPoints_t expected;
|
||||
// expected = {
|
||||
// { 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
|
||||
// { 6, 10, -1, 5.45, 3.4028234663852886e+38 }
|
||||
// };
|
||||
// setCutPoints(cutPoints_t());
|
||||
// computeCutPointsOriginal();
|
||||
// checkCutPoints(expected);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase)
|
||||
// {
|
||||
// cutPoints_t expected;
|
||||
// expected = {
|
||||
// { 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
|
||||
// };
|
||||
// X = { 0, 1, 2, 2 };
|
||||
// y = { 1, 1, 1, 2 };
|
||||
// fit(X, y);
|
||||
// computeCutPointsOriginal();
|
||||
// checkCutPoints(expected);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, ComputeCutPointsProposal)
|
||||
// {
|
||||
// cutPoints_t expected;
|
||||
// expected = {
|
||||
// { 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 },
|
||||
// { 6, 9, -1, 5.4, 5.85 },
|
||||
// { 9, 10, -1, 5.85, 3.4028234663852886e+38 }
|
||||
// };
|
||||
// computeCutPointsProposal();
|
||||
// checkCutPoints(expected);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, ComputeCutPointsProposalGCase)
|
||||
// {
|
||||
// cutPoints_t expected;
|
||||
// expected = {
|
||||
// { 0, 3, -1, -3.4028234663852886e+38, 1.5 },
|
||||
// { 3, 4, -1, 1.5, 3.4028234663852886e+38 }
|
||||
// };
|
||||
// X = { 0, 1, 2, 2 };
|
||||
// y = { 1, 1, 1, 2 };
|
||||
// fit(X, y);
|
||||
// computeCutPointsProposal();
|
||||
// checkCutPoints(expected);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, DiscretizedValues)
|
||||
// {
|
||||
// labels computed, expected = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
// computed = getDiscretizedValues();
|
||||
// checkVectors(expected, computed);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, GetCutPoints)
|
||||
// {
|
||||
// samples computed, expected = { 5.15, 5.45, 3.4028234663852886e+38 };
|
||||
// computeCutPointsOriginal();
|
||||
// computed = getCutPoints();
|
||||
// checkVectors(expected, computed);
|
||||
// }
|
||||
// TEST_F(TestFImdlp, Constructor)
|
||||
// {
|
||||
// samples X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
// labels y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
// setProposal(false);
|
||||
// fit(X, y);
|
||||
// computeCutPointsOriginal();
|
||||
// cutPoints_t expected;
|
||||
// vector<precision_t> computed = getCutPoints();
|
||||
// expected = {
|
||||
// { 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
|
||||
// { 6, 10, -1, 5.45, 3.4028234663852886e+38 }
|
||||
// };
|
||||
// computed = getCutPoints();
|
||||
// int expectedSize = expected.size();
|
||||
// EXPECT_EQ(computed.size(), expected.size());
|
||||
// for (auto i = 0; i < expectedSize; i++) {
|
||||
// EXPECT_NEAR(computed[i], expected[i].toValue, .00000001);
|
||||
// }
|
||||
// }
|
||||
//}
|
225
fimdlp/testcpp/datasets/iris.arff
Executable file
225
fimdlp/testcpp/datasets/iris.arff
Executable file
@@ -0,0 +1,225 @@
|
||||
% 1. Title: Iris Plants Database
|
||||
%
|
||||
% 2. Sources:
|
||||
% (a) Creator: R.A. Fisher
|
||||
% (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
|
||||
% (c) Date: July, 1988
|
||||
%
|
||||
% 3. Past Usage:
|
||||
% - Publications: too many to mention!!! Here are a few.
|
||||
% 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
|
||||
% Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
|
||||
% to Mathematical Statistics" (John Wiley, NY, 1950).
|
||||
% 2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
|
||||
% (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
|
||||
% 3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
|
||||
% Structure and Classification Rule for Recognition in Partially Exposed
|
||||
% Environments". IEEE Transactions on Pattern Analysis and Machine
|
||||
% Intelligence, Vol. PAMI-2, No. 1, 67-71.
|
||||
% -- Results:
|
||||
% -- very low misclassification rates (0% for the setosa class)
|
||||
% 4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE
|
||||
% Transactions on Information Theory, May 1972, 431-433.
|
||||
% -- Results:
|
||||
% -- very low misclassification rates again
|
||||
% 5. See also: 1988 MLC Proceedings, 54-64. Cheeseman et al's AUTOCLASS II
|
||||
% conceptual clustering system finds 3 classes in the data.
|
||||
%
|
||||
% 4. Relevant Information:
|
||||
% --- This is perhaps the best known database to be found in the pattern
|
||||
% recognition literature. Fisher's paper is a classic in the field
|
||||
% and is referenced frequently to this day. (See Duda & Hart, for
|
||||
% example.) The data set contains 3 classes of 50 instances each,
|
||||
% where each class refers to a type of iris plant. One class is
|
||||
% linearly separable from the other 2; the latter are NOT linearly
|
||||
% separable from each other.
|
||||
% --- Predicted attribute: class of iris plant.
|
||||
% --- This is an exceedingly simple domain.
|
||||
%
|
||||
% 5. Number of Instances: 150 (50 in each of three classes)
|
||||
%
|
||||
% 6. Number of Attributes: 4 numeric, predictive attributes and the class
|
||||
%
|
||||
% 7. Attribute Information:
|
||||
% 1. sepal length in cm
|
||||
% 2. sepal width in cm
|
||||
% 3. petal length in cm
|
||||
% 4. petal width in cm
|
||||
% 5. class:
|
||||
% -- Iris Setosa
|
||||
% -- Iris Versicolour
|
||||
% -- Iris Virginica
|
||||
%
|
||||
% 8. Missing Attribute Values: None
|
||||
%
|
||||
% Summary Statistics:
|
||||
% Min Max Mean SD Class Correlation
|
||||
% sepal length: 4.3 7.9 5.84 0.83 0.7826
|
||||
% sepal width: 2.0 4.4 3.05 0.43 -0.4194
|
||||
% petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)
|
||||
% petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)
|
||||
%
|
||||
% 9. Class Distribution: 33.3% for each of 3 classes.
|
||||
|
||||
@RELATION iris
|
||||
|
||||
@ATTRIBUTE sepallength REAL
|
||||
@ATTRIBUTE sepalwidth REAL
|
||||
@ATTRIBUTE petallength REAL
|
||||
@ATTRIBUTE petalwidth REAL
|
||||
@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
|
||||
|
||||
@DATA
|
||||
5.1,3.5,1.4,0.2,Iris-setosa
|
||||
4.9,3.0,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.3,0.2,Iris-setosa
|
||||
4.6,3.1,1.5,0.2,Iris-setosa
|
||||
5.0,3.6,1.4,0.2,Iris-setosa
|
||||
5.4,3.9,1.7,0.4,Iris-setosa
|
||||
4.6,3.4,1.4,0.3,Iris-setosa
|
||||
5.0,3.4,1.5,0.2,Iris-setosa
|
||||
4.4,2.9,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.4,3.7,1.5,0.2,Iris-setosa
|
||||
4.8,3.4,1.6,0.2,Iris-setosa
|
||||
4.8,3.0,1.4,0.1,Iris-setosa
|
||||
4.3,3.0,1.1,0.1,Iris-setosa
|
||||
5.8,4.0,1.2,0.2,Iris-setosa
|
||||
5.7,4.4,1.5,0.4,Iris-setosa
|
||||
5.4,3.9,1.3,0.4,Iris-setosa
|
||||
5.1,3.5,1.4,0.3,Iris-setosa
|
||||
5.7,3.8,1.7,0.3,Iris-setosa
|
||||
5.1,3.8,1.5,0.3,Iris-setosa
|
||||
5.4,3.4,1.7,0.2,Iris-setosa
|
||||
5.1,3.7,1.5,0.4,Iris-setosa
|
||||
4.6,3.6,1.0,0.2,Iris-setosa
|
||||
5.1,3.3,1.7,0.5,Iris-setosa
|
||||
4.8,3.4,1.9,0.2,Iris-setosa
|
||||
5.0,3.0,1.6,0.2,Iris-setosa
|
||||
5.0,3.4,1.6,0.4,Iris-setosa
|
||||
5.2,3.5,1.5,0.2,Iris-setosa
|
||||
5.2,3.4,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.6,0.2,Iris-setosa
|
||||
4.8,3.1,1.6,0.2,Iris-setosa
|
||||
5.4,3.4,1.5,0.4,Iris-setosa
|
||||
5.2,4.1,1.5,0.1,Iris-setosa
|
||||
5.5,4.2,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.0,3.2,1.2,0.2,Iris-setosa
|
||||
5.5,3.5,1.3,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
4.4,3.0,1.3,0.2,Iris-setosa
|
||||
5.1,3.4,1.5,0.2,Iris-setosa
|
||||
5.0,3.5,1.3,0.3,Iris-setosa
|
||||
4.5,2.3,1.3,0.3,Iris-setosa
|
||||
4.4,3.2,1.3,0.2,Iris-setosa
|
||||
5.0,3.5,1.6,0.6,Iris-setosa
|
||||
5.1,3.8,1.9,0.4,Iris-setosa
|
||||
4.8,3.0,1.4,0.3,Iris-setosa
|
||||
5.1,3.8,1.6,0.2,Iris-setosa
|
||||
4.6,3.2,1.4,0.2,Iris-setosa
|
||||
5.3,3.7,1.5,0.2,Iris-setosa
|
||||
5.0,3.3,1.4,0.2,Iris-setosa
|
||||
7.0,3.2,4.7,1.4,Iris-versicolor
|
||||
6.4,3.2,4.5,1.5,Iris-versicolor
|
||||
6.9,3.1,4.9,1.5,Iris-versicolor
|
||||
5.5,2.3,4.0,1.3,Iris-versicolor
|
||||
6.5,2.8,4.6,1.5,Iris-versicolor
|
||||
5.7,2.8,4.5,1.3,Iris-versicolor
|
||||
6.3,3.3,4.7,1.6,Iris-versicolor
|
||||
4.9,2.4,3.3,1.0,Iris-versicolor
|
||||
6.6,2.9,4.6,1.3,Iris-versicolor
|
||||
5.2,2.7,3.9,1.4,Iris-versicolor
|
||||
5.0,2.0,3.5,1.0,Iris-versicolor
|
||||
5.9,3.0,4.2,1.5,Iris-versicolor
|
||||
6.0,2.2,4.0,1.0,Iris-versicolor
|
||||
6.1,2.9,4.7,1.4,Iris-versicolor
|
||||
5.6,2.9,3.6,1.3,Iris-versicolor
|
||||
6.7,3.1,4.4,1.4,Iris-versicolor
|
||||
5.6,3.0,4.5,1.5,Iris-versicolor
|
||||
5.8,2.7,4.1,1.0,Iris-versicolor
|
||||
6.2,2.2,4.5,1.5,Iris-versicolor
|
||||
5.6,2.5,3.9,1.1,Iris-versicolor
|
||||
5.9,3.2,4.8,1.8,Iris-versicolor
|
||||
6.1,2.8,4.0,1.3,Iris-versicolor
|
||||
6.3,2.5,4.9,1.5,Iris-versicolor
|
||||
6.1,2.8,4.7,1.2,Iris-versicolor
|
||||
6.4,2.9,4.3,1.3,Iris-versicolor
|
||||
6.6,3.0,4.4,1.4,Iris-versicolor
|
||||
6.8,2.8,4.8,1.4,Iris-versicolor
|
||||
6.7,3.0,5.0,1.7,Iris-versicolor
|
||||
6.0,2.9,4.5,1.5,Iris-versicolor
|
||||
5.7,2.6,3.5,1.0,Iris-versicolor
|
||||
5.5,2.4,3.8,1.1,Iris-versicolor
|
||||
5.5,2.4,3.7,1.0,Iris-versicolor
|
||||
5.8,2.7,3.9,1.2,Iris-versicolor
|
||||
6.0,2.7,5.1,1.6,Iris-versicolor
|
||||
5.4,3.0,4.5,1.5,Iris-versicolor
|
||||
6.0,3.4,4.5,1.6,Iris-versicolor
|
||||
6.7,3.1,4.7,1.5,Iris-versicolor
|
||||
6.3,2.3,4.4,1.3,Iris-versicolor
|
||||
5.6,3.0,4.1,1.3,Iris-versicolor
|
||||
5.5,2.5,4.0,1.3,Iris-versicolor
|
||||
5.5,2.6,4.4,1.2,Iris-versicolor
|
||||
6.1,3.0,4.6,1.4,Iris-versicolor
|
||||
5.8,2.6,4.0,1.2,Iris-versicolor
|
||||
5.0,2.3,3.3,1.0,Iris-versicolor
|
||||
5.6,2.7,4.2,1.3,Iris-versicolor
|
||||
5.7,3.0,4.2,1.2,Iris-versicolor
|
||||
5.7,2.9,4.2,1.3,Iris-versicolor
|
||||
6.2,2.9,4.3,1.3,Iris-versicolor
|
||||
5.1,2.5,3.0,1.1,Iris-versicolor
|
||||
5.7,2.8,4.1,1.3,Iris-versicolor
|
||||
6.3,3.3,6.0,2.5,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
7.1,3.0,5.9,2.1,Iris-virginica
|
||||
6.3,2.9,5.6,1.8,Iris-virginica
|
||||
6.5,3.0,5.8,2.2,Iris-virginica
|
||||
7.6,3.0,6.6,2.1,Iris-virginica
|
||||
4.9,2.5,4.5,1.7,Iris-virginica
|
||||
7.3,2.9,6.3,1.8,Iris-virginica
|
||||
6.7,2.5,5.8,1.8,Iris-virginica
|
||||
7.2,3.6,6.1,2.5,Iris-virginica
|
||||
6.5,3.2,5.1,2.0,Iris-virginica
|
||||
6.4,2.7,5.3,1.9,Iris-virginica
|
||||
6.8,3.0,5.5,2.1,Iris-virginica
|
||||
5.7,2.5,5.0,2.0,Iris-virginica
|
||||
5.8,2.8,5.1,2.4,Iris-virginica
|
||||
6.4,3.2,5.3,2.3,Iris-virginica
|
||||
6.5,3.0,5.5,1.8,Iris-virginica
|
||||
7.7,3.8,6.7,2.2,Iris-virginica
|
||||
7.7,2.6,6.9,2.3,Iris-virginica
|
||||
6.0,2.2,5.0,1.5,Iris-virginica
|
||||
6.9,3.2,5.7,2.3,Iris-virginica
|
||||
5.6,2.8,4.9,2.0,Iris-virginica
|
||||
7.7,2.8,6.7,2.0,Iris-virginica
|
||||
6.3,2.7,4.9,1.8,Iris-virginica
|
||||
6.7,3.3,5.7,2.1,Iris-virginica
|
||||
7.2,3.2,6.0,1.8,Iris-virginica
|
||||
6.2,2.8,4.8,1.8,Iris-virginica
|
||||
6.1,3.0,4.9,1.8,Iris-virginica
|
||||
6.4,2.8,5.6,2.1,Iris-virginica
|
||||
7.2,3.0,5.8,1.6,Iris-virginica
|
||||
7.4,2.8,6.1,1.9,Iris-virginica
|
||||
7.9,3.8,6.4,2.0,Iris-virginica
|
||||
6.4,2.8,5.6,2.2,Iris-virginica
|
||||
6.3,2.8,5.1,1.5,Iris-virginica
|
||||
6.1,2.6,5.6,1.4,Iris-virginica
|
||||
7.7,3.0,6.1,2.3,Iris-virginica
|
||||
6.3,3.4,5.6,2.4,Iris-virginica
|
||||
6.4,3.1,5.5,1.8,Iris-virginica
|
||||
6.0,3.0,4.8,1.8,Iris-virginica
|
||||
6.9,3.1,5.4,2.1,Iris-virginica
|
||||
6.7,3.1,5.6,2.4,Iris-virginica
|
||||
6.9,3.1,5.1,2.3,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
6.8,3.2,5.9,2.3,Iris-virginica
|
||||
6.7,3.3,5.7,2.5,Iris-virginica
|
||||
6.7,3.0,5.2,2.3,Iris-virginica
|
||||
6.3,2.5,5.0,1.9,Iris-virginica
|
||||
6.5,3.0,5.2,2.0,Iris-virginica
|
||||
6.2,3.4,5.4,2.3,Iris-virginica
|
||||
5.9,3.0,5.1,1.8,Iris-virginica
|
||||
%
|
||||
%
|
||||
%
|
BIN
fimdlp/testcpp/main
Executable file
BIN
fimdlp/testcpp/main
Executable file
Binary file not shown.
57
fimdlp/testcpp/main.cpp
Normal file
57
fimdlp/testcpp/main.cpp
Normal file
@@ -0,0 +1,57 @@
|
||||
#include "ArffFiles.h"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
#include "../CPPFImdlp.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
ArffFiles file;
|
||||
vector<string> lines;
|
||||
string path = "/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/datasets/";
|
||||
map<string, bool > datasets = {
|
||||
{"mfeat-factors", true},
|
||||
{"iris", true},
|
||||
{"letter", true},
|
||||
{"kdd_JapaneseVowels", false}
|
||||
};
|
||||
if (argc != 2 || datasets.find(argv[1]) == datasets.end()) {
|
||||
cout << "Usage: " << argv[0] << " {mfeat-factors, iris, letter, kdd_JapaneseVowels}" << endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
//file.load("datasets/mfeat-factors.arff", true);
|
||||
//file.load("/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/datasets/kdd_JapaneseVowels.arff", false);
|
||||
//file.load("/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/datasets/iris.arff", true);
|
||||
file.load(path + argv[1] + ".arff", datasets[argv[1]]);
|
||||
auto attributes = file.getAttributes();
|
||||
int items = file.getSize();
|
||||
cout << "Number of lines: " << items << endl;
|
||||
cout << "Attributes: " << endl;
|
||||
for (auto attribute : attributes) {
|
||||
cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
|
||||
}
|
||||
cout << "Class name: " << file.getClassName() << endl;
|
||||
cout << "Class type: " << file.getClassType() << endl;
|
||||
cout << "Data: " << endl;
|
||||
vector<vector<float>>& X = file.getX();
|
||||
vector<int>& y = file.getY();
|
||||
for (int i = 0; i < 50; i++) {
|
||||
for (auto feature : X) {
|
||||
cout << fixed << setprecision(1) << feature[i] << " ";
|
||||
}
|
||||
cout << y[i] << endl;
|
||||
}
|
||||
mdlp::CPPFImdlp test = mdlp::CPPFImdlp();
|
||||
for (auto i = 0; i < attributes.size(); i++) {
|
||||
cout << "Cut points for " << get<0>(attributes[i]) << endl;
|
||||
cout << "--------------------------" << setprecision(3) << endl;
|
||||
test.fit(X[i], y);
|
||||
for (auto item : test.getCutPoints()) {
|
||||
cout << item << endl;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
111
fimdlp/testcpp/xx/ArffFiles.cpp
Normal file
111
fimdlp/testcpp/xx/ArffFiles.cpp
Normal file
@@ -0,0 +1,111 @@
|
||||
#include "ArffFiles.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
ArffFiles::ArffFiles()
|
||||
{
|
||||
}
|
||||
vector<string> ArffFiles::getLines()
|
||||
{
|
||||
return lines;
|
||||
}
|
||||
unsigned long int ArffFiles::getSize()
|
||||
{
|
||||
return lines.size();
|
||||
}
|
||||
vector<tuple<string, string>> ArffFiles::getAttributes()
|
||||
{
|
||||
return attributes;
|
||||
}
|
||||
string ArffFiles::getClassName()
|
||||
{
|
||||
return className;
|
||||
}
|
||||
string ArffFiles::getClassType()
|
||||
{
|
||||
return classType;
|
||||
}
|
||||
vector<vector<float>>& ArffFiles::getX()
|
||||
{
|
||||
return X;
|
||||
}
|
||||
vector<int>& ArffFiles::getY()
|
||||
{
|
||||
return y;
|
||||
}
|
||||
void ArffFiles::load(string fileName)
|
||||
{
|
||||
ifstream file(fileName);
|
||||
string keyword, attribute, type;
|
||||
if (file.is_open()) {
|
||||
string line;
|
||||
while (getline(file, line)) {
|
||||
if (line[0] == '%' || line.empty() || line == "\r" || line == " ") {
|
||||
continue;
|
||||
}
|
||||
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
|
||||
stringstream ss(line);
|
||||
ss >> keyword >> attribute >> type;
|
||||
attributes.push_back(make_tuple(attribute, type));
|
||||
continue;
|
||||
}
|
||||
if (line[0] == '@') {
|
||||
continue;
|
||||
}
|
||||
lines.push_back(line);
|
||||
}
|
||||
file.close();
|
||||
if (attributes.empty())
|
||||
throw invalid_argument("No attributes found");
|
||||
className = get<0>(attributes.back());
|
||||
classType = get<1>(attributes.back());
|
||||
attributes.pop_back();
|
||||
generateDataset();
|
||||
} else
|
||||
throw invalid_argument("Unable to open file");
|
||||
}
|
||||
void ArffFiles::generateDataset()
|
||||
{
|
||||
X = vector<vector<float>>(lines.size(), vector<float>(attributes.size()));
|
||||
vector<string> yy = vector<string>(lines.size(), "");
|
||||
for (int i = 0; i < lines.size(); i++) {
|
||||
stringstream ss(lines[i]);
|
||||
string value;
|
||||
int j = 0;
|
||||
while (getline(ss, value, ',')) {
|
||||
if (j == attributes.size()) {
|
||||
yy[i] = value;
|
||||
break;
|
||||
}
|
||||
X[i][j] = stof(value);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
y = factorize(yy);
|
||||
}
|
||||
string ArffFiles::trim(const string& source)
|
||||
{
|
||||
string s(source);
|
||||
s.erase(0, s.find_first_not_of(" \n\r\t"));
|
||||
s.erase(s.find_last_not_of(" \n\r\t") + 1);
|
||||
return s;
|
||||
}
|
||||
vector<int> ArffFiles::factorize(const vector<string>& labels)
|
||||
{
|
||||
vector<int> yy;
|
||||
yy.reserve(labels.size());
|
||||
map<string, int> labelMap;
|
||||
int i = 0;
|
||||
for (string label : labels) {
|
||||
if (labelMap.find(label) == labelMap.end()) {
|
||||
labelMap[label] = i++;
|
||||
}
|
||||
yy.push_back(labelMap[label]);
|
||||
}
|
||||
return yy;
|
||||
}
|
28
fimdlp/testcpp/xx/ArffFiles.h
Normal file
28
fimdlp/testcpp/xx/ArffFiles.h
Normal file
@@ -0,0 +1,28 @@
|
||||
#ifndef ARFFFILES_H
|
||||
#define ARFFFILES_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
using namespace std;
|
||||
class ArffFiles {
|
||||
private:
|
||||
vector<string> lines;
|
||||
vector<tuple<string, string>> attributes;
|
||||
string className, classType;
|
||||
vector<vector<float>> X;
|
||||
vector<int> y;
|
||||
void generateDataset();
|
||||
public:
|
||||
ArffFiles();
|
||||
void load(string);
|
||||
vector<string> getLines();
|
||||
unsigned long int getSize();
|
||||
string getClassName();
|
||||
string getClassType();
|
||||
string trim(const string&);
|
||||
vector<vector<float>>& getX();
|
||||
vector<int>& getY();
|
||||
vector<tuple<string, string>> getAttributes();
|
||||
vector<int> factorize(const vector<string>& labels);
|
||||
};
|
||||
#endif
|
6
fimdlp/testcpp/xx/CMakeLists.txt
Normal file
6
fimdlp/testcpp/xx/CMakeLists.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
cmake_minimum_required(VERSION 3.24)
|
||||
project(main)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
add_executable(main main.cpp ArffFiles.cpp)
|
30
fimdlp/testcpp/xx/main.cpp
Normal file
30
fimdlp/testcpp/xx/main.cpp
Normal file
@@ -0,0 +1,30 @@
|
||||
#include "ArffFiles.h"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
ArffFiles file;
|
||||
vector<string> lines;
|
||||
//file.load("datasets/mfeat-factors.arff");
|
||||
file.load("/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/datasets/mfeat-factors.arff");
|
||||
cout << "Number of lines: " << file.getSize() << endl;
|
||||
cout << "Attributes: " << endl;
|
||||
for (auto attribute: file.getAttributes()) {
|
||||
cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
|
||||
}
|
||||
cout << "Class name: " << file.getClassName() << endl;
|
||||
cout << "Class type: " << file.getClassType() << endl;
|
||||
cout << "Data: " << endl;
|
||||
vector<vector<float>> &X = file.getX();
|
||||
vector<int> &y = file.getY();
|
||||
for (int i = 0; i < X.size(); i++) {
|
||||
for (float value: X[i]) {
|
||||
cout << fixed << setprecision(1) << value << " ";
|
||||
}
|
||||
cout << y[i] << endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
63
sample.py
63
sample.py
@@ -4,7 +4,6 @@ from fimdlp.cppfimdlp import CFImdlp
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
import numpy as np
|
||||
import time
|
||||
from math import log2
|
||||
|
||||
from scipy.io import arff
|
||||
import pandas as pd
|
||||
@@ -44,65 +43,3 @@ print(test.get_cut_points())
|
||||
clf = RandomForestClassifier(random_state=0)
|
||||
print(clf.fit(Xt, y).score(Xt, y))
|
||||
print(Xt)
|
||||
# for proposal in [True, False]:
|
||||
# X = data.data
|
||||
# y = data.target
|
||||
# print("*** Proposal: ", proposal)
|
||||
# test = CFImdlp(debug=True, proposal=proposal)
|
||||
# test.fit(X[:, 0], y)
|
||||
# result = test.get_cut_points()
|
||||
# for item in result:
|
||||
# print(
|
||||
# f"Class={item['classNumber']} - ({item['start']:3d}, "
|
||||
# f"{item['end']:3d}) -> ({item['fromValue']:3.1f}, "
|
||||
# f"{item['toValue']:3.1f}]"
|
||||
# )
|
||||
# print(test.get_discretized_values())
|
||||
# print("+" * 40)
|
||||
# X = np.array(
|
||||
# [
|
||||
# [5.1, 3.5, 1.4, 0.2],
|
||||
# [5.2, 3.0, 1.4, 0.2],
|
||||
# [5.3, 3.2, 1.3, 0.2],
|
||||
# [5.4, 3.1, 1.5, 0.2],
|
||||
# ]
|
||||
# )
|
||||
# y = np.array([0, 0, 0, 1])
|
||||
# print(test.fit(X[:, 0], y).transform(X[:, 0]))
|
||||
# result = test.get_cut_points()
|
||||
# for item in result:
|
||||
# print(
|
||||
# f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
|
||||
# f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
|
||||
# )
|
||||
# print("*" * 40)
|
||||
# # print(Xs, ys)
|
||||
# # print("**********************")
|
||||
# # test = [(0, 3), (4, 4), (5, 5), (6, 8), (9, 9)]
|
||||
# # print(ys)
|
||||
# # for start, end in test:
|
||||
# # print("Testing ", start, end, ys[:end], ys[end:])
|
||||
# # print("Information gain: ", information_gain(ys, ys[:end], ys[end:]))
|
||||
# # print(test.transform(X))
|
||||
# # print(X)
|
||||
# # print(indices)
|
||||
# # print(np.array(X)[indices])
|
||||
|
||||
|
||||
# # # k = test.cut_points(X[:, 0], y)
|
||||
# # # print(k)
|
||||
# # # k = test.cut_points_ant(X[:, 0], y)
|
||||
# # # print(k)
|
||||
# # # test.debug_points(X[:, 0], y)
|
||||
# # X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
|
||||
# # y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
||||
# # indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
|
||||
# # clf = CFImdlp(debug=True, proposal=False)
|
||||
# # clf.fit(X, y)
|
||||
# # print(clf.get_cut_points())
|
||||
# # y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
||||
# # # To check
|
||||
# # indices2 = np.argsort(X)
|
||||
# # Xs = np.array(X)[indices2]
|
||||
# # ys = np.array(y)[indices2]
|
||||
# kdd_JapaneseVowels
|
||||
|
Reference in New Issue
Block a user