mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 16:35:52 +00:00
Refactor project structure and add Arff load and test
This commit is contained in:
@@ -19,6 +19,7 @@ namespace mdlp {
|
|||||||
{
|
{
|
||||||
X = X_;
|
X = X_;
|
||||||
y = y_;
|
y = y_;
|
||||||
|
cutPoints.clear();
|
||||||
if (X.size() != y.size()) {
|
if (X.size() != y.size()) {
|
||||||
throw invalid_argument("X and y must have the same size");
|
throw invalid_argument("X and y must have the same size");
|
||||||
}
|
}
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
#include "Metrics.h"
|
#include "Metrics.h"
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <iostream>
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
Metrics::Metrics(labels& y_, indices_t& indices_): y(y_), indices(indices_), numClasses(computeNumClasses(0, indices.size())), entropyCache(cacheEnt_t()), igCache(cacheIg_t())
|
Metrics::Metrics(labels& y_, indices_t& indices_): y(y_), indices(indices_), numClasses(computeNumClasses(0, indices.size())), entropyCache(cacheEnt_t()), igCache(cacheIg_t())
|
||||||
@@ -19,6 +18,8 @@ namespace mdlp {
|
|||||||
indices = indices_;
|
indices = indices_;
|
||||||
y = y_;
|
y = y_;
|
||||||
numClasses = computeNumClasses(0, indices.size());
|
numClasses = computeNumClasses(0, indices.size());
|
||||||
|
entropyCache.clear();
|
||||||
|
igCache.clear();
|
||||||
}
|
}
|
||||||
precision_t Metrics::entropy(size_t start, size_t end)
|
precision_t Metrics::entropy(size_t start, size_t end)
|
||||||
{
|
{
|
||||||
@@ -50,7 +51,6 @@ namespace mdlp {
|
|||||||
int nElementsLeft = cut - start, nElementsRight = end - cut;
|
int nElementsLeft = cut - start, nElementsRight = end - cut;
|
||||||
int nElements = end - start;
|
int nElements = end - start;
|
||||||
if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
|
if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
|
||||||
cout << "**********Cache IG hit for " << start << " " << end << endl;
|
|
||||||
return igCache[make_tuple(start, cut, end)];
|
return igCache[make_tuple(start, cut, end)];
|
||||||
}
|
}
|
||||||
entropyInterval = entropy(start, end);
|
entropyInterval = entropy(start, end);
|
||||||
@@ -62,13 +62,3 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
cache_t entropyCache;
|
|
||||||
std::map<std::tuple<int, int>, double> c;
|
|
||||||
|
|
||||||
// Set the value at index (3, 5) to 7.8.
|
|
||||||
c[std::make_tuple(3, 5)] = 7.8;
|
|
||||||
|
|
||||||
// Print the value at index (3, 5).
|
|
||||||
std::cout << c[std::make_tuple(3, 5)] << std::endl;
|
|
||||||
*/
|
|
117
fimdlp/testcpp/ArffFiles.cpp
Normal file
117
fimdlp/testcpp/ArffFiles.cpp
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
#include "ArffFiles.h"
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <map>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
ArffFiles::ArffFiles()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
vector<string> ArffFiles::getLines()
|
||||||
|
{
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
unsigned long int ArffFiles::getSize()
|
||||||
|
{
|
||||||
|
return lines.size();
|
||||||
|
}
|
||||||
|
vector<tuple<string, string>> ArffFiles::getAttributes()
|
||||||
|
{
|
||||||
|
return attributes;
|
||||||
|
}
|
||||||
|
string ArffFiles::getClassName()
|
||||||
|
{
|
||||||
|
return className;
|
||||||
|
}
|
||||||
|
string ArffFiles::getClassType()
|
||||||
|
{
|
||||||
|
return classType;
|
||||||
|
}
|
||||||
|
vector<vector<float>>& ArffFiles::getX()
|
||||||
|
{
|
||||||
|
return X;
|
||||||
|
}
|
||||||
|
vector<int>& ArffFiles::getY()
|
||||||
|
{
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
void ArffFiles::load(string fileName, bool classLast)
|
||||||
|
{
|
||||||
|
ifstream file(fileName);
|
||||||
|
string keyword, attribute, type;
|
||||||
|
if (file.is_open()) {
|
||||||
|
string line;
|
||||||
|
while (getline(file, line)) {
|
||||||
|
if (line[0] == '%' || line.empty() || line == "\r" || line == " ") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
|
||||||
|
stringstream ss(line);
|
||||||
|
ss >> keyword >> attribute >> type;
|
||||||
|
attributes.push_back(make_tuple(attribute, type));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (line[0] == '@') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
lines.push_back(line);
|
||||||
|
}
|
||||||
|
file.close();
|
||||||
|
if (attributes.empty())
|
||||||
|
throw invalid_argument("No attributes found");
|
||||||
|
if (classLast) {
|
||||||
|
className = get<0>(attributes.back());
|
||||||
|
classType = get<1>(attributes.back());
|
||||||
|
attributes.pop_back();
|
||||||
|
} else {
|
||||||
|
className = get<0>(attributes.front());
|
||||||
|
classType = get<1>(attributes.front());
|
||||||
|
attributes.erase(attributes.begin());
|
||||||
|
}
|
||||||
|
generateDataset(classLast);
|
||||||
|
} else
|
||||||
|
throw invalid_argument("Unable to open file");
|
||||||
|
}
|
||||||
|
void ArffFiles::generateDataset(bool classLast)
|
||||||
|
{
|
||||||
|
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
||||||
|
vector<string> yy = vector<string>(lines.size(), "");
|
||||||
|
int labelIndex = classLast ? attributes.size() : 0;
|
||||||
|
for (int i = 0; i < lines.size(); i++) {
|
||||||
|
stringstream ss(lines[i]);
|
||||||
|
string value;
|
||||||
|
int pos = 0, xIndex = 0;
|
||||||
|
while (getline(ss, value, ',')) {
|
||||||
|
if (pos++ == labelIndex) {
|
||||||
|
yy[i] = value;
|
||||||
|
} else {
|
||||||
|
X[xIndex++][i] = stof(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
y = factorize(yy);
|
||||||
|
}
|
||||||
|
string ArffFiles::trim(const string& source)
|
||||||
|
{
|
||||||
|
string s(source);
|
||||||
|
s.erase(0, s.find_first_not_of(" \n\r\t"));
|
||||||
|
s.erase(s.find_last_not_of(" \n\r\t") + 1);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
vector<int> ArffFiles::factorize(const vector<string>& labels)
|
||||||
|
{
|
||||||
|
vector<int> yy;
|
||||||
|
yy.reserve(labels.size());
|
||||||
|
map<string, int> labelMap;
|
||||||
|
int i = 0;
|
||||||
|
for (string label : labels) {
|
||||||
|
if (labelMap.find(label) == labelMap.end()) {
|
||||||
|
labelMap[label] = i++;
|
||||||
|
}
|
||||||
|
yy.push_back(labelMap[label]);
|
||||||
|
}
|
||||||
|
return yy;
|
||||||
|
}
|
28
fimdlp/testcpp/ArffFiles.h
Normal file
28
fimdlp/testcpp/ArffFiles.h
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
#ifndef ARFFFILES_H
|
||||||
|
#define ARFFFILES_H
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <tuple>
|
||||||
|
using namespace std;
|
||||||
|
class ArffFiles {
|
||||||
|
private:
|
||||||
|
vector<string> lines;
|
||||||
|
vector<tuple<string, string>> attributes;
|
||||||
|
string className, classType;
|
||||||
|
vector<vector<float>> X;
|
||||||
|
vector<int> y;
|
||||||
|
void generateDataset(bool);
|
||||||
|
public:
|
||||||
|
ArffFiles();
|
||||||
|
void load(string, bool = true);
|
||||||
|
vector<string> getLines();
|
||||||
|
unsigned long int getSize();
|
||||||
|
string getClassName();
|
||||||
|
string getClassType();
|
||||||
|
string trim(const string&);
|
||||||
|
vector<vector<float>>& getX();
|
||||||
|
vector<int>& getY();
|
||||||
|
vector<tuple<string, string>> getAttributes();
|
||||||
|
vector<int> factorize(const vector<string>& labels);
|
||||||
|
};
|
||||||
|
#endif
|
@@ -1,177 +1,177 @@
|
|||||||
#include "gtest/gtest.h"
|
//#include "gtest/gtest.h"
|
||||||
#include "../Metrics.h"
|
//#include "../Metrics.h"
|
||||||
#include "../CPPFImdlp.h"
|
//#include "../CPPFImdlp.h"
|
||||||
namespace mdlp {
|
//namespace mdlp {
|
||||||
class TestFImdlp : public CPPFImdlp, public testing::Test {
|
// class TestFImdlp : public CPPFImdlp, public testing::Test {
|
||||||
public:
|
// public:
|
||||||
TestFImdlp() : CPPFImdlp(true, 6, true) {}
|
// TestFImdlp() : CPPFImdlp(true, true) {}
|
||||||
void SetUp()
|
// void SetUp()
|
||||||
{
|
// {
|
||||||
// 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
|
// // 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
|
||||||
//(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2)
|
// //(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2)
|
||||||
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
// X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||||
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
// y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||||
fit(X, y);
|
// fit(X, y);
|
||||||
}
|
// }
|
||||||
void setProposal(bool value)
|
// void setProposal(bool value)
|
||||||
{
|
// {
|
||||||
proposal = value;
|
// proposal = value;
|
||||||
}
|
// }
|
||||||
void initCutPoints()
|
// void initCutPoints()
|
||||||
{
|
// {
|
||||||
setCutPoints(cutPoints_t());
|
// setCutPoints(cutPoints_t());
|
||||||
}
|
// }
|
||||||
void initIndices()
|
// void initIndices()
|
||||||
{
|
// {
|
||||||
indices = indices_t();
|
// indices = indices_t();
|
||||||
}
|
// }
|
||||||
void initDiscretized()
|
// void initDiscretized()
|
||||||
{
|
// {
|
||||||
xDiscretized = labels();
|
// xDiscretized = labels();
|
||||||
}
|
// }
|
||||||
void checkSortedVector(samples& X_, indices_t indices_)
|
// void checkSortedVector(samples& X_, indices_t indices_)
|
||||||
{
|
// {
|
||||||
X = X_;
|
// X = X_;
|
||||||
indices = indices_;
|
// indices = indices_;
|
||||||
indices_t testSortedIndices = sortIndices(X);
|
// indices_t testSortedIndices = sortIndices(X);
|
||||||
precision_t prev = X[testSortedIndices[0]];
|
// precision_t prev = X[testSortedIndices[0]];
|
||||||
for (auto i = 0; i < X.size(); ++i) {
|
// for (auto i = 0; i < X.size(); ++i) {
|
||||||
EXPECT_EQ(testSortedIndices[i], indices[i]);
|
// EXPECT_EQ(testSortedIndices[i], indices[i]);
|
||||||
EXPECT_LE(prev, X[testSortedIndices[i]]);
|
// EXPECT_LE(prev, X[testSortedIndices[i]]);
|
||||||
prev = X[testSortedIndices[i]];
|
// prev = X[testSortedIndices[i]];
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
void checkCutPoints(cutPoints_t& expected)
|
// void checkCutPoints(cutPoints_t& expected)
|
||||||
{
|
// {
|
||||||
int expectedSize = expected.size();
|
// int expectedSize = expected.size();
|
||||||
EXPECT_EQ(cutPoints.size(), expectedSize);
|
// EXPECT_EQ(cutPoints.size(), expectedSize);
|
||||||
for (auto i = 0; i < expectedSize; i++) {
|
// for (auto i = 0; i < expectedSize; i++) {
|
||||||
EXPECT_EQ(cutPoints[i].start, expected[i].start);
|
// EXPECT_EQ(cutPoints[i].start, expected[i].start);
|
||||||
EXPECT_EQ(cutPoints[i].end, expected[i].end);
|
// EXPECT_EQ(cutPoints[i].end, expected[i].end);
|
||||||
EXPECT_EQ(cutPoints[i].classNumber, expected[i].classNumber);
|
// EXPECT_EQ(cutPoints[i].classNumber, expected[i].classNumber);
|
||||||
EXPECT_NEAR(cutPoints[i].fromValue, expected[i].fromValue, precision);
|
// EXPECT_NEAR(cutPoints[i].fromValue, expected[i].fromValue, precision);
|
||||||
EXPECT_NEAR(cutPoints[i].toValue, expected[i].toValue, precision);
|
// EXPECT_NEAR(cutPoints[i].toValue, expected[i].toValue, precision);
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
template<typename T, typename A>
|
// template<typename T, typename A>
|
||||||
void checkVectors(std::vector<T, A> const& expected, std::vector<T, A> const& computed)
|
// void checkVectors(std::vector<T, A> const& expected, std::vector<T, A> const& computed)
|
||||||
{
|
// {
|
||||||
EXPECT_EQ(expected.size(), computed.size());
|
// EXPECT_EQ(expected.size(), computed.size());
|
||||||
for (auto i = 0; i < expected.size(); i++) {
|
// for (auto i = 0; i < expected.size(); i++) {
|
||||||
EXPECT_EQ(expected[i], computed[i]);
|
// EXPECT_EQ(expected[i], computed[i]);
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
};
|
// };
|
||||||
TEST_F(TestFImdlp, FitErrorEmptyDataset)
|
// TEST_F(TestFImdlp, FitErrorEmptyDataset)
|
||||||
{
|
// {
|
||||||
X = samples();
|
// X = samples();
|
||||||
y = labels();
|
// y = labels();
|
||||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
// EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||||
}
|
// }
|
||||||
TEST_F(TestFImdlp, FitErrorDifferentSize)
|
// TEST_F(TestFImdlp, FitErrorDifferentSize)
|
||||||
{
|
// {
|
||||||
X = { 1, 2, 3 };
|
// X = { 1, 2, 3 };
|
||||||
y = { 1, 2 };
|
// y = { 1, 2 };
|
||||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
// EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||||
}
|
// }
|
||||||
TEST_F(TestFImdlp, SortIndices)
|
// TEST_F(TestFImdlp, SortIndices)
|
||||||
{
|
// {
|
||||||
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
// X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||||
indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
// indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
||||||
checkSortedVector(X, indices);
|
// checkSortedVector(X, indices);
|
||||||
X = { 5.77, 5.88, 5.99 };
|
// X = { 5.77, 5.88, 5.99 };
|
||||||
indices = { 0, 1, 2 };
|
// indices = { 0, 1, 2 };
|
||||||
checkSortedVector(X, indices);
|
// checkSortedVector(X, indices);
|
||||||
X = { 5.33, 5.22, 5.11 };
|
// X = { 5.33, 5.22, 5.11 };
|
||||||
indices = { 2, 1, 0 };
|
// indices = { 2, 1, 0 };
|
||||||
checkSortedVector(X, indices);
|
// checkSortedVector(X, indices);
|
||||||
}
|
// }
|
||||||
TEST_F(TestFImdlp, EvaluateCutPoint)
|
// TEST_F(TestFImdlp, EvaluateCutPoint)
|
||||||
{
|
// {
|
||||||
cutPoint_t rest, candidate;
|
// cutPoint_t rest, candidate;
|
||||||
rest = { 0, 10, -1, -1, 1000 };
|
// rest = { 0, 10, -1, -1, 1000 };
|
||||||
candidate = { 0, 4, -1, -1, 5.15 };
|
// candidate = { 0, 4, -1, -1, 5.15 };
|
||||||
EXPECT_FALSE(evaluateCutPoint(rest, candidate));
|
// EXPECT_FALSE(evaluateCutPoint(rest, candidate));
|
||||||
}
|
// }
|
||||||
TEST_F(TestFImdlp, ComputeCutPointsOriginal)
|
// TEST_F(TestFImdlp, ComputeCutPointsOriginal)
|
||||||
{
|
// {
|
||||||
cutPoints_t expected;
|
// cutPoints_t expected;
|
||||||
expected = {
|
// expected = {
|
||||||
{ 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
|
// { 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
|
||||||
{ 6, 10, -1, 5.45, 3.4028234663852886e+38 }
|
// { 6, 10, -1, 5.45, 3.4028234663852886e+38 }
|
||||||
};
|
// };
|
||||||
setCutPoints(cutPoints_t());
|
// setCutPoints(cutPoints_t());
|
||||||
computeCutPointsOriginal();
|
// computeCutPointsOriginal();
|
||||||
checkCutPoints(expected);
|
// checkCutPoints(expected);
|
||||||
}
|
// }
|
||||||
TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase)
|
// TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase)
|
||||||
{
|
// {
|
||||||
cutPoints_t expected;
|
// cutPoints_t expected;
|
||||||
expected = {
|
// expected = {
|
||||||
{ 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
|
// { 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
|
||||||
};
|
// };
|
||||||
X = { 0, 1, 2, 2 };
|
// X = { 0, 1, 2, 2 };
|
||||||
y = { 1, 1, 1, 2 };
|
// y = { 1, 1, 1, 2 };
|
||||||
fit(X, y);
|
// fit(X, y);
|
||||||
computeCutPointsOriginal();
|
// computeCutPointsOriginal();
|
||||||
checkCutPoints(expected);
|
// checkCutPoints(expected);
|
||||||
}
|
// }
|
||||||
TEST_F(TestFImdlp, ComputeCutPointsProposal)
|
// TEST_F(TestFImdlp, ComputeCutPointsProposal)
|
||||||
{
|
// {
|
||||||
cutPoints_t expected;
|
// cutPoints_t expected;
|
||||||
expected = {
|
// expected = {
|
||||||
{ 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 },
|
// { 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 },
|
||||||
{ 6, 9, -1, 5.4, 5.85 },
|
// { 6, 9, -1, 5.4, 5.85 },
|
||||||
{ 9, 10, -1, 5.85, 3.4028234663852886e+38 }
|
// { 9, 10, -1, 5.85, 3.4028234663852886e+38 }
|
||||||
};
|
// };
|
||||||
computeCutPointsProposal();
|
// computeCutPointsProposal();
|
||||||
checkCutPoints(expected);
|
// checkCutPoints(expected);
|
||||||
}
|
// }
|
||||||
TEST_F(TestFImdlp, ComputeCutPointsProposalGCase)
|
// TEST_F(TestFImdlp, ComputeCutPointsProposalGCase)
|
||||||
{
|
// {
|
||||||
cutPoints_t expected;
|
// cutPoints_t expected;
|
||||||
expected = {
|
// expected = {
|
||||||
{ 0, 3, -1, -3.4028234663852886e+38, 1.5 },
|
// { 0, 3, -1, -3.4028234663852886e+38, 1.5 },
|
||||||
{ 3, 4, -1, 1.5, 3.4028234663852886e+38 }
|
// { 3, 4, -1, 1.5, 3.4028234663852886e+38 }
|
||||||
};
|
// };
|
||||||
X = { 0, 1, 2, 2 };
|
// X = { 0, 1, 2, 2 };
|
||||||
y = { 1, 1, 1, 2 };
|
// y = { 1, 1, 1, 2 };
|
||||||
fit(X, y);
|
// fit(X, y);
|
||||||
computeCutPointsProposal();
|
// computeCutPointsProposal();
|
||||||
checkCutPoints(expected);
|
// checkCutPoints(expected);
|
||||||
}
|
// }
|
||||||
TEST_F(TestFImdlp, DiscretizedValues)
|
// TEST_F(TestFImdlp, DiscretizedValues)
|
||||||
{
|
// {
|
||||||
labels computed, expected = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
// labels computed, expected = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
computed = getDiscretizedValues();
|
// computed = getDiscretizedValues();
|
||||||
checkVectors(expected, computed);
|
// checkVectors(expected, computed);
|
||||||
}
|
// }
|
||||||
TEST_F(TestFImdlp, GetCutPoints)
|
// TEST_F(TestFImdlp, GetCutPoints)
|
||||||
{
|
// {
|
||||||
samples computed, expected = { 5.15, 5.45, 3.4028234663852886e+38 };
|
// samples computed, expected = { 5.15, 5.45, 3.4028234663852886e+38 };
|
||||||
computeCutPointsOriginal();
|
// computeCutPointsOriginal();
|
||||||
computed = getCutPoints();
|
// computed = getCutPoints();
|
||||||
checkVectors(expected, computed);
|
// checkVectors(expected, computed);
|
||||||
}
|
// }
|
||||||
TEST_F(TestFImdlp, Constructor)
|
// TEST_F(TestFImdlp, Constructor)
|
||||||
{
|
// {
|
||||||
samples X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
// samples X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||||
labels y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
// labels y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||||
setProposal(false);
|
// setProposal(false);
|
||||||
fit(X, y);
|
// fit(X, y);
|
||||||
computeCutPointsOriginal();
|
// computeCutPointsOriginal();
|
||||||
cutPoints_t expected;
|
// cutPoints_t expected;
|
||||||
vector<precision_t> computed = getCutPoints();
|
// vector<precision_t> computed = getCutPoints();
|
||||||
expected = {
|
// expected = {
|
||||||
{ 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
|
// { 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
|
||||||
{ 6, 10, -1, 5.45, 3.4028234663852886e+38 }
|
// { 6, 10, -1, 5.45, 3.4028234663852886e+38 }
|
||||||
};
|
// };
|
||||||
computed = getCutPoints();
|
// computed = getCutPoints();
|
||||||
int expectedSize = expected.size();
|
// int expectedSize = expected.size();
|
||||||
EXPECT_EQ(computed.size(), expected.size());
|
// EXPECT_EQ(computed.size(), expected.size());
|
||||||
for (auto i = 0; i < expectedSize; i++) {
|
// for (auto i = 0; i < expectedSize; i++) {
|
||||||
EXPECT_NEAR(computed[i], expected[i].toValue, .00000001);
|
// EXPECT_NEAR(computed[i], expected[i].toValue, .00000001);
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
//}
|
225
fimdlp/testcpp/datasets/iris.arff
Executable file
225
fimdlp/testcpp/datasets/iris.arff
Executable file
@@ -0,0 +1,225 @@
|
|||||||
|
% 1. Title: Iris Plants Database
|
||||||
|
%
|
||||||
|
% 2. Sources:
|
||||||
|
% (a) Creator: R.A. Fisher
|
||||||
|
% (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
|
||||||
|
% (c) Date: July, 1988
|
||||||
|
%
|
||||||
|
% 3. Past Usage:
|
||||||
|
% - Publications: too many to mention!!! Here are a few.
|
||||||
|
% 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
|
||||||
|
% Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
|
||||||
|
% to Mathematical Statistics" (John Wiley, NY, 1950).
|
||||||
|
% 2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
|
||||||
|
% (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
|
||||||
|
% 3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
|
||||||
|
% Structure and Classification Rule for Recognition in Partially Exposed
|
||||||
|
% Environments". IEEE Transactions on Pattern Analysis and Machine
|
||||||
|
% Intelligence, Vol. PAMI-2, No. 1, 67-71.
|
||||||
|
% -- Results:
|
||||||
|
% -- very low misclassification rates (0% for the setosa class)
|
||||||
|
% 4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE
|
||||||
|
% Transactions on Information Theory, May 1972, 431-433.
|
||||||
|
% -- Results:
|
||||||
|
% -- very low misclassification rates again
|
||||||
|
% 5. See also: 1988 MLC Proceedings, 54-64. Cheeseman et al's AUTOCLASS II
|
||||||
|
% conceptual clustering system finds 3 classes in the data.
|
||||||
|
%
|
||||||
|
% 4. Relevant Information:
|
||||||
|
% --- This is perhaps the best known database to be found in the pattern
|
||||||
|
% recognition literature. Fisher's paper is a classic in the field
|
||||||
|
% and is referenced frequently to this day. (See Duda & Hart, for
|
||||||
|
% example.) The data set contains 3 classes of 50 instances each,
|
||||||
|
% where each class refers to a type of iris plant. One class is
|
||||||
|
% linearly separable from the other 2; the latter are NOT linearly
|
||||||
|
% separable from each other.
|
||||||
|
% --- Predicted attribute: class of iris plant.
|
||||||
|
% --- This is an exceedingly simple domain.
|
||||||
|
%
|
||||||
|
% 5. Number of Instances: 150 (50 in each of three classes)
|
||||||
|
%
|
||||||
|
% 6. Number of Attributes: 4 numeric, predictive attributes and the class
|
||||||
|
%
|
||||||
|
% 7. Attribute Information:
|
||||||
|
% 1. sepal length in cm
|
||||||
|
% 2. sepal width in cm
|
||||||
|
% 3. petal length in cm
|
||||||
|
% 4. petal width in cm
|
||||||
|
% 5. class:
|
||||||
|
% -- Iris Setosa
|
||||||
|
% -- Iris Versicolour
|
||||||
|
% -- Iris Virginica
|
||||||
|
%
|
||||||
|
% 8. Missing Attribute Values: None
|
||||||
|
%
|
||||||
|
% Summary Statistics:
|
||||||
|
% Min Max Mean SD Class Correlation
|
||||||
|
% sepal length: 4.3 7.9 5.84 0.83 0.7826
|
||||||
|
% sepal width: 2.0 4.4 3.05 0.43 -0.4194
|
||||||
|
% petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)
|
||||||
|
% petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)
|
||||||
|
%
|
||||||
|
% 9. Class Distribution: 33.3% for each of 3 classes.
|
||||||
|
|
||||||
|
@RELATION iris
|
||||||
|
|
||||||
|
@ATTRIBUTE sepallength REAL
|
||||||
|
@ATTRIBUTE sepalwidth REAL
|
||||||
|
@ATTRIBUTE petallength REAL
|
||||||
|
@ATTRIBUTE petalwidth REAL
|
||||||
|
@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
|
||||||
|
|
||||||
|
@DATA
|
||||||
|
5.1,3.5,1.4,0.2,Iris-setosa
|
||||||
|
4.9,3.0,1.4,0.2,Iris-setosa
|
||||||
|
4.7,3.2,1.3,0.2,Iris-setosa
|
||||||
|
4.6,3.1,1.5,0.2,Iris-setosa
|
||||||
|
5.0,3.6,1.4,0.2,Iris-setosa
|
||||||
|
5.4,3.9,1.7,0.4,Iris-setosa
|
||||||
|
4.6,3.4,1.4,0.3,Iris-setosa
|
||||||
|
5.0,3.4,1.5,0.2,Iris-setosa
|
||||||
|
4.4,2.9,1.4,0.2,Iris-setosa
|
||||||
|
4.9,3.1,1.5,0.1,Iris-setosa
|
||||||
|
5.4,3.7,1.5,0.2,Iris-setosa
|
||||||
|
4.8,3.4,1.6,0.2,Iris-setosa
|
||||||
|
4.8,3.0,1.4,0.1,Iris-setosa
|
||||||
|
4.3,3.0,1.1,0.1,Iris-setosa
|
||||||
|
5.8,4.0,1.2,0.2,Iris-setosa
|
||||||
|
5.7,4.4,1.5,0.4,Iris-setosa
|
||||||
|
5.4,3.9,1.3,0.4,Iris-setosa
|
||||||
|
5.1,3.5,1.4,0.3,Iris-setosa
|
||||||
|
5.7,3.8,1.7,0.3,Iris-setosa
|
||||||
|
5.1,3.8,1.5,0.3,Iris-setosa
|
||||||
|
5.4,3.4,1.7,0.2,Iris-setosa
|
||||||
|
5.1,3.7,1.5,0.4,Iris-setosa
|
||||||
|
4.6,3.6,1.0,0.2,Iris-setosa
|
||||||
|
5.1,3.3,1.7,0.5,Iris-setosa
|
||||||
|
4.8,3.4,1.9,0.2,Iris-setosa
|
||||||
|
5.0,3.0,1.6,0.2,Iris-setosa
|
||||||
|
5.0,3.4,1.6,0.4,Iris-setosa
|
||||||
|
5.2,3.5,1.5,0.2,Iris-setosa
|
||||||
|
5.2,3.4,1.4,0.2,Iris-setosa
|
||||||
|
4.7,3.2,1.6,0.2,Iris-setosa
|
||||||
|
4.8,3.1,1.6,0.2,Iris-setosa
|
||||||
|
5.4,3.4,1.5,0.4,Iris-setosa
|
||||||
|
5.2,4.1,1.5,0.1,Iris-setosa
|
||||||
|
5.5,4.2,1.4,0.2,Iris-setosa
|
||||||
|
4.9,3.1,1.5,0.1,Iris-setosa
|
||||||
|
5.0,3.2,1.2,0.2,Iris-setosa
|
||||||
|
5.5,3.5,1.3,0.2,Iris-setosa
|
||||||
|
4.9,3.1,1.5,0.1,Iris-setosa
|
||||||
|
4.4,3.0,1.3,0.2,Iris-setosa
|
||||||
|
5.1,3.4,1.5,0.2,Iris-setosa
|
||||||
|
5.0,3.5,1.3,0.3,Iris-setosa
|
||||||
|
4.5,2.3,1.3,0.3,Iris-setosa
|
||||||
|
4.4,3.2,1.3,0.2,Iris-setosa
|
||||||
|
5.0,3.5,1.6,0.6,Iris-setosa
|
||||||
|
5.1,3.8,1.9,0.4,Iris-setosa
|
||||||
|
4.8,3.0,1.4,0.3,Iris-setosa
|
||||||
|
5.1,3.8,1.6,0.2,Iris-setosa
|
||||||
|
4.6,3.2,1.4,0.2,Iris-setosa
|
||||||
|
5.3,3.7,1.5,0.2,Iris-setosa
|
||||||
|
5.0,3.3,1.4,0.2,Iris-setosa
|
||||||
|
7.0,3.2,4.7,1.4,Iris-versicolor
|
||||||
|
6.4,3.2,4.5,1.5,Iris-versicolor
|
||||||
|
6.9,3.1,4.9,1.5,Iris-versicolor
|
||||||
|
5.5,2.3,4.0,1.3,Iris-versicolor
|
||||||
|
6.5,2.8,4.6,1.5,Iris-versicolor
|
||||||
|
5.7,2.8,4.5,1.3,Iris-versicolor
|
||||||
|
6.3,3.3,4.7,1.6,Iris-versicolor
|
||||||
|
4.9,2.4,3.3,1.0,Iris-versicolor
|
||||||
|
6.6,2.9,4.6,1.3,Iris-versicolor
|
||||||
|
5.2,2.7,3.9,1.4,Iris-versicolor
|
||||||
|
5.0,2.0,3.5,1.0,Iris-versicolor
|
||||||
|
5.9,3.0,4.2,1.5,Iris-versicolor
|
||||||
|
6.0,2.2,4.0,1.0,Iris-versicolor
|
||||||
|
6.1,2.9,4.7,1.4,Iris-versicolor
|
||||||
|
5.6,2.9,3.6,1.3,Iris-versicolor
|
||||||
|
6.7,3.1,4.4,1.4,Iris-versicolor
|
||||||
|
5.6,3.0,4.5,1.5,Iris-versicolor
|
||||||
|
5.8,2.7,4.1,1.0,Iris-versicolor
|
||||||
|
6.2,2.2,4.5,1.5,Iris-versicolor
|
||||||
|
5.6,2.5,3.9,1.1,Iris-versicolor
|
||||||
|
5.9,3.2,4.8,1.8,Iris-versicolor
|
||||||
|
6.1,2.8,4.0,1.3,Iris-versicolor
|
||||||
|
6.3,2.5,4.9,1.5,Iris-versicolor
|
||||||
|
6.1,2.8,4.7,1.2,Iris-versicolor
|
||||||
|
6.4,2.9,4.3,1.3,Iris-versicolor
|
||||||
|
6.6,3.0,4.4,1.4,Iris-versicolor
|
||||||
|
6.8,2.8,4.8,1.4,Iris-versicolor
|
||||||
|
6.7,3.0,5.0,1.7,Iris-versicolor
|
||||||
|
6.0,2.9,4.5,1.5,Iris-versicolor
|
||||||
|
5.7,2.6,3.5,1.0,Iris-versicolor
|
||||||
|
5.5,2.4,3.8,1.1,Iris-versicolor
|
||||||
|
5.5,2.4,3.7,1.0,Iris-versicolor
|
||||||
|
5.8,2.7,3.9,1.2,Iris-versicolor
|
||||||
|
6.0,2.7,5.1,1.6,Iris-versicolor
|
||||||
|
5.4,3.0,4.5,1.5,Iris-versicolor
|
||||||
|
6.0,3.4,4.5,1.6,Iris-versicolor
|
||||||
|
6.7,3.1,4.7,1.5,Iris-versicolor
|
||||||
|
6.3,2.3,4.4,1.3,Iris-versicolor
|
||||||
|
5.6,3.0,4.1,1.3,Iris-versicolor
|
||||||
|
5.5,2.5,4.0,1.3,Iris-versicolor
|
||||||
|
5.5,2.6,4.4,1.2,Iris-versicolor
|
||||||
|
6.1,3.0,4.6,1.4,Iris-versicolor
|
||||||
|
5.8,2.6,4.0,1.2,Iris-versicolor
|
||||||
|
5.0,2.3,3.3,1.0,Iris-versicolor
|
||||||
|
5.6,2.7,4.2,1.3,Iris-versicolor
|
||||||
|
5.7,3.0,4.2,1.2,Iris-versicolor
|
||||||
|
5.7,2.9,4.2,1.3,Iris-versicolor
|
||||||
|
6.2,2.9,4.3,1.3,Iris-versicolor
|
||||||
|
5.1,2.5,3.0,1.1,Iris-versicolor
|
||||||
|
5.7,2.8,4.1,1.3,Iris-versicolor
|
||||||
|
6.3,3.3,6.0,2.5,Iris-virginica
|
||||||
|
5.8,2.7,5.1,1.9,Iris-virginica
|
||||||
|
7.1,3.0,5.9,2.1,Iris-virginica
|
||||||
|
6.3,2.9,5.6,1.8,Iris-virginica
|
||||||
|
6.5,3.0,5.8,2.2,Iris-virginica
|
||||||
|
7.6,3.0,6.6,2.1,Iris-virginica
|
||||||
|
4.9,2.5,4.5,1.7,Iris-virginica
|
||||||
|
7.3,2.9,6.3,1.8,Iris-virginica
|
||||||
|
6.7,2.5,5.8,1.8,Iris-virginica
|
||||||
|
7.2,3.6,6.1,2.5,Iris-virginica
|
||||||
|
6.5,3.2,5.1,2.0,Iris-virginica
|
||||||
|
6.4,2.7,5.3,1.9,Iris-virginica
|
||||||
|
6.8,3.0,5.5,2.1,Iris-virginica
|
||||||
|
5.7,2.5,5.0,2.0,Iris-virginica
|
||||||
|
5.8,2.8,5.1,2.4,Iris-virginica
|
||||||
|
6.4,3.2,5.3,2.3,Iris-virginica
|
||||||
|
6.5,3.0,5.5,1.8,Iris-virginica
|
||||||
|
7.7,3.8,6.7,2.2,Iris-virginica
|
||||||
|
7.7,2.6,6.9,2.3,Iris-virginica
|
||||||
|
6.0,2.2,5.0,1.5,Iris-virginica
|
||||||
|
6.9,3.2,5.7,2.3,Iris-virginica
|
||||||
|
5.6,2.8,4.9,2.0,Iris-virginica
|
||||||
|
7.7,2.8,6.7,2.0,Iris-virginica
|
||||||
|
6.3,2.7,4.9,1.8,Iris-virginica
|
||||||
|
6.7,3.3,5.7,2.1,Iris-virginica
|
||||||
|
7.2,3.2,6.0,1.8,Iris-virginica
|
||||||
|
6.2,2.8,4.8,1.8,Iris-virginica
|
||||||
|
6.1,3.0,4.9,1.8,Iris-virginica
|
||||||
|
6.4,2.8,5.6,2.1,Iris-virginica
|
||||||
|
7.2,3.0,5.8,1.6,Iris-virginica
|
||||||
|
7.4,2.8,6.1,1.9,Iris-virginica
|
||||||
|
7.9,3.8,6.4,2.0,Iris-virginica
|
||||||
|
6.4,2.8,5.6,2.2,Iris-virginica
|
||||||
|
6.3,2.8,5.1,1.5,Iris-virginica
|
||||||
|
6.1,2.6,5.6,1.4,Iris-virginica
|
||||||
|
7.7,3.0,6.1,2.3,Iris-virginica
|
||||||
|
6.3,3.4,5.6,2.4,Iris-virginica
|
||||||
|
6.4,3.1,5.5,1.8,Iris-virginica
|
||||||
|
6.0,3.0,4.8,1.8,Iris-virginica
|
||||||
|
6.9,3.1,5.4,2.1,Iris-virginica
|
||||||
|
6.7,3.1,5.6,2.4,Iris-virginica
|
||||||
|
6.9,3.1,5.1,2.3,Iris-virginica
|
||||||
|
5.8,2.7,5.1,1.9,Iris-virginica
|
||||||
|
6.8,3.2,5.9,2.3,Iris-virginica
|
||||||
|
6.7,3.3,5.7,2.5,Iris-virginica
|
||||||
|
6.7,3.0,5.2,2.3,Iris-virginica
|
||||||
|
6.3,2.5,5.0,1.9,Iris-virginica
|
||||||
|
6.5,3.0,5.2,2.0,Iris-virginica
|
||||||
|
6.2,3.4,5.4,2.3,Iris-virginica
|
||||||
|
5.9,3.0,5.1,1.8,Iris-virginica
|
||||||
|
%
|
||||||
|
%
|
||||||
|
%
|
BIN
fimdlp/testcpp/main
Executable file
BIN
fimdlp/testcpp/main
Executable file
Binary file not shown.
57
fimdlp/testcpp/main.cpp
Normal file
57
fimdlp/testcpp/main.cpp
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
#include "ArffFiles.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
#include <iomanip>
|
||||||
|
#include "../CPPFImdlp.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
int main(int argc, char** argv)
|
||||||
|
{
|
||||||
|
ArffFiles file;
|
||||||
|
vector<string> lines;
|
||||||
|
string path = "/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/datasets/";
|
||||||
|
map<string, bool > datasets = {
|
||||||
|
{"mfeat-factors", true},
|
||||||
|
{"iris", true},
|
||||||
|
{"letter", true},
|
||||||
|
{"kdd_JapaneseVowels", false}
|
||||||
|
};
|
||||||
|
if (argc != 2 || datasets.find(argv[1]) == datasets.end()) {
|
||||||
|
cout << "Usage: " << argv[0] << " {mfeat-factors, iris, letter, kdd_JapaneseVowels}" << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
//file.load("datasets/mfeat-factors.arff", true);
|
||||||
|
//file.load("/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/datasets/kdd_JapaneseVowels.arff", false);
|
||||||
|
//file.load("/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/datasets/iris.arff", true);
|
||||||
|
file.load(path + argv[1] + ".arff", datasets[argv[1]]);
|
||||||
|
auto attributes = file.getAttributes();
|
||||||
|
int items = file.getSize();
|
||||||
|
cout << "Number of lines: " << items << endl;
|
||||||
|
cout << "Attributes: " << endl;
|
||||||
|
for (auto attribute : attributes) {
|
||||||
|
cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
|
||||||
|
}
|
||||||
|
cout << "Class name: " << file.getClassName() << endl;
|
||||||
|
cout << "Class type: " << file.getClassType() << endl;
|
||||||
|
cout << "Data: " << endl;
|
||||||
|
vector<vector<float>>& X = file.getX();
|
||||||
|
vector<int>& y = file.getY();
|
||||||
|
for (int i = 0; i < 50; i++) {
|
||||||
|
for (auto feature : X) {
|
||||||
|
cout << fixed << setprecision(1) << feature[i] << " ";
|
||||||
|
}
|
||||||
|
cout << y[i] << endl;
|
||||||
|
}
|
||||||
|
mdlp::CPPFImdlp test = mdlp::CPPFImdlp();
|
||||||
|
for (auto i = 0; i < attributes.size(); i++) {
|
||||||
|
cout << "Cut points for " << get<0>(attributes[i]) << endl;
|
||||||
|
cout << "--------------------------" << setprecision(3) << endl;
|
||||||
|
test.fit(X[i], y);
|
||||||
|
for (auto item : test.getCutPoints()) {
|
||||||
|
cout << item << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
111
fimdlp/testcpp/xx/ArffFiles.cpp
Normal file
111
fimdlp/testcpp/xx/ArffFiles.cpp
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
#include "ArffFiles.h"
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <map>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
ArffFiles::ArffFiles()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
vector<string> ArffFiles::getLines()
|
||||||
|
{
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
unsigned long int ArffFiles::getSize()
|
||||||
|
{
|
||||||
|
return lines.size();
|
||||||
|
}
|
||||||
|
vector<tuple<string, string>> ArffFiles::getAttributes()
|
||||||
|
{
|
||||||
|
return attributes;
|
||||||
|
}
|
||||||
|
string ArffFiles::getClassName()
|
||||||
|
{
|
||||||
|
return className;
|
||||||
|
}
|
||||||
|
string ArffFiles::getClassType()
|
||||||
|
{
|
||||||
|
return classType;
|
||||||
|
}
|
||||||
|
vector<vector<float>>& ArffFiles::getX()
|
||||||
|
{
|
||||||
|
return X;
|
||||||
|
}
|
||||||
|
vector<int>& ArffFiles::getY()
|
||||||
|
{
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
void ArffFiles::load(string fileName)
|
||||||
|
{
|
||||||
|
ifstream file(fileName);
|
||||||
|
string keyword, attribute, type;
|
||||||
|
if (file.is_open()) {
|
||||||
|
string line;
|
||||||
|
while (getline(file, line)) {
|
||||||
|
if (line[0] == '%' || line.empty() || line == "\r" || line == " ") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
|
||||||
|
stringstream ss(line);
|
||||||
|
ss >> keyword >> attribute >> type;
|
||||||
|
attributes.push_back(make_tuple(attribute, type));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (line[0] == '@') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
lines.push_back(line);
|
||||||
|
}
|
||||||
|
file.close();
|
||||||
|
if (attributes.empty())
|
||||||
|
throw invalid_argument("No attributes found");
|
||||||
|
className = get<0>(attributes.back());
|
||||||
|
classType = get<1>(attributes.back());
|
||||||
|
attributes.pop_back();
|
||||||
|
generateDataset();
|
||||||
|
} else
|
||||||
|
throw invalid_argument("Unable to open file");
|
||||||
|
}
|
||||||
|
void ArffFiles::generateDataset()
|
||||||
|
{
|
||||||
|
X = vector<vector<float>>(lines.size(), vector<float>(attributes.size()));
|
||||||
|
vector<string> yy = vector<string>(lines.size(), "");
|
||||||
|
for (int i = 0; i < lines.size(); i++) {
|
||||||
|
stringstream ss(lines[i]);
|
||||||
|
string value;
|
||||||
|
int j = 0;
|
||||||
|
while (getline(ss, value, ',')) {
|
||||||
|
if (j == attributes.size()) {
|
||||||
|
yy[i] = value;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
X[i][j] = stof(value);
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
y = factorize(yy);
|
||||||
|
}
|
||||||
|
string ArffFiles::trim(const string& source)
|
||||||
|
{
|
||||||
|
string s(source);
|
||||||
|
s.erase(0, s.find_first_not_of(" \n\r\t"));
|
||||||
|
s.erase(s.find_last_not_of(" \n\r\t") + 1);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
vector<int> ArffFiles::factorize(const vector<string>& labels)
|
||||||
|
{
|
||||||
|
vector<int> yy;
|
||||||
|
yy.reserve(labels.size());
|
||||||
|
map<string, int> labelMap;
|
||||||
|
int i = 0;
|
||||||
|
for (string label : labels) {
|
||||||
|
if (labelMap.find(label) == labelMap.end()) {
|
||||||
|
labelMap[label] = i++;
|
||||||
|
}
|
||||||
|
yy.push_back(labelMap[label]);
|
||||||
|
}
|
||||||
|
return yy;
|
||||||
|
}
|
28
fimdlp/testcpp/xx/ArffFiles.h
Normal file
28
fimdlp/testcpp/xx/ArffFiles.h
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
#ifndef ARFFFILES_H
|
||||||
|
#define ARFFFILES_H
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <tuple>
|
||||||
|
using namespace std;
|
||||||
|
class ArffFiles {
|
||||||
|
private:
|
||||||
|
vector<string> lines;
|
||||||
|
vector<tuple<string, string>> attributes;
|
||||||
|
string className, classType;
|
||||||
|
vector<vector<float>> X;
|
||||||
|
vector<int> y;
|
||||||
|
void generateDataset();
|
||||||
|
public:
|
||||||
|
ArffFiles();
|
||||||
|
void load(string);
|
||||||
|
vector<string> getLines();
|
||||||
|
unsigned long int getSize();
|
||||||
|
string getClassName();
|
||||||
|
string getClassType();
|
||||||
|
string trim(const string&);
|
||||||
|
vector<vector<float>>& getX();
|
||||||
|
vector<int>& getY();
|
||||||
|
vector<tuple<string, string>> getAttributes();
|
||||||
|
vector<int> factorize(const vector<string>& labels);
|
||||||
|
};
|
||||||
|
#endif
|
6
fimdlp/testcpp/xx/CMakeLists.txt
Normal file
6
fimdlp/testcpp/xx/CMakeLists.txt
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.24)
|
||||||
|
project(main)
|
||||||
|
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
|
||||||
|
add_executable(main main.cpp ArffFiles.cpp)
|
30
fimdlp/testcpp/xx/main.cpp
Normal file
30
fimdlp/testcpp/xx/main.cpp
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
#include "ArffFiles.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
ArffFiles file;
|
||||||
|
vector<string> lines;
|
||||||
|
//file.load("datasets/mfeat-factors.arff");
|
||||||
|
file.load("/Users/rmontanana/Code/FImdlp/fimdlp/testcpp/datasets/mfeat-factors.arff");
|
||||||
|
cout << "Number of lines: " << file.getSize() << endl;
|
||||||
|
cout << "Attributes: " << endl;
|
||||||
|
for (auto attribute: file.getAttributes()) {
|
||||||
|
cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
|
||||||
|
}
|
||||||
|
cout << "Class name: " << file.getClassName() << endl;
|
||||||
|
cout << "Class type: " << file.getClassType() << endl;
|
||||||
|
cout << "Data: " << endl;
|
||||||
|
vector<vector<float>> &X = file.getX();
|
||||||
|
vector<int> &y = file.getY();
|
||||||
|
for (int i = 0; i < X.size(); i++) {
|
||||||
|
for (float value: X[i]) {
|
||||||
|
cout << fixed << setprecision(1) << value << " ";
|
||||||
|
}
|
||||||
|
cout << y[i] << endl;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
63
sample.py
63
sample.py
@@ -4,7 +4,6 @@ from fimdlp.cppfimdlp import CFImdlp
|
|||||||
from sklearn.ensemble import RandomForestClassifier
|
from sklearn.ensemble import RandomForestClassifier
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import time
|
import time
|
||||||
from math import log2
|
|
||||||
|
|
||||||
from scipy.io import arff
|
from scipy.io import arff
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@@ -44,65 +43,3 @@ print(test.get_cut_points())
|
|||||||
clf = RandomForestClassifier(random_state=0)
|
clf = RandomForestClassifier(random_state=0)
|
||||||
print(clf.fit(Xt, y).score(Xt, y))
|
print(clf.fit(Xt, y).score(Xt, y))
|
||||||
print(Xt)
|
print(Xt)
|
||||||
# for proposal in [True, False]:
|
|
||||||
# X = data.data
|
|
||||||
# y = data.target
|
|
||||||
# print("*** Proposal: ", proposal)
|
|
||||||
# test = CFImdlp(debug=True, proposal=proposal)
|
|
||||||
# test.fit(X[:, 0], y)
|
|
||||||
# result = test.get_cut_points()
|
|
||||||
# for item in result:
|
|
||||||
# print(
|
|
||||||
# f"Class={item['classNumber']} - ({item['start']:3d}, "
|
|
||||||
# f"{item['end']:3d}) -> ({item['fromValue']:3.1f}, "
|
|
||||||
# f"{item['toValue']:3.1f}]"
|
|
||||||
# )
|
|
||||||
# print(test.get_discretized_values())
|
|
||||||
# print("+" * 40)
|
|
||||||
# X = np.array(
|
|
||||||
# [
|
|
||||||
# [5.1, 3.5, 1.4, 0.2],
|
|
||||||
# [5.2, 3.0, 1.4, 0.2],
|
|
||||||
# [5.3, 3.2, 1.3, 0.2],
|
|
||||||
# [5.4, 3.1, 1.5, 0.2],
|
|
||||||
# ]
|
|
||||||
# )
|
|
||||||
# y = np.array([0, 0, 0, 1])
|
|
||||||
# print(test.fit(X[:, 0], y).transform(X[:, 0]))
|
|
||||||
# result = test.get_cut_points()
|
|
||||||
# for item in result:
|
|
||||||
# print(
|
|
||||||
# f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
|
|
||||||
# f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
|
|
||||||
# )
|
|
||||||
# print("*" * 40)
|
|
||||||
# # print(Xs, ys)
|
|
||||||
# # print("**********************")
|
|
||||||
# # test = [(0, 3), (4, 4), (5, 5), (6, 8), (9, 9)]
|
|
||||||
# # print(ys)
|
|
||||||
# # for start, end in test:
|
|
||||||
# # print("Testing ", start, end, ys[:end], ys[end:])
|
|
||||||
# # print("Information gain: ", information_gain(ys, ys[:end], ys[end:]))
|
|
||||||
# # print(test.transform(X))
|
|
||||||
# # print(X)
|
|
||||||
# # print(indices)
|
|
||||||
# # print(np.array(X)[indices])
|
|
||||||
|
|
||||||
|
|
||||||
# # # k = test.cut_points(X[:, 0], y)
|
|
||||||
# # # print(k)
|
|
||||||
# # # k = test.cut_points_ant(X[:, 0], y)
|
|
||||||
# # # print(k)
|
|
||||||
# # # test.debug_points(X[:, 0], y)
|
|
||||||
# # X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
|
|
||||||
# # y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
|
||||||
# # indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
|
|
||||||
# # clf = CFImdlp(debug=True, proposal=False)
|
|
||||||
# # clf.fit(X, y)
|
|
||||||
# # print(clf.get_cut_points())
|
|
||||||
# # y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
|
||||||
# # # To check
|
|
||||||
# # indices2 = np.argsort(X)
|
|
||||||
# # Xs = np.array(X)[indices2]
|
|
||||||
# # ys = np.array(y)[indices2]
|
|
||||||
# kdd_JapaneseVowels
|
|
||||||
|
Reference in New Issue
Block a user