Compare commits
6 Commits
Author | SHA1 | Date | |
---|---|---|---|
8a7d4e0238 | |||
e2ac5fde12
|
|||
332324a6c2
|
|||
8b17695163
|
|||
81f2e706d0
|
|||
4d6cad8f08
|
12
.claude/settings.local.json
Normal file
12
.claude/settings.local.json
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"permissions": {
|
||||||
|
"allow": [
|
||||||
|
"Bash(find:*)",
|
||||||
|
"Bash(mkdir:*)",
|
||||||
|
"Bash(cmake:*)",
|
||||||
|
"Bash(make:*)",
|
||||||
|
"Bash(cat:*)"
|
||||||
|
],
|
||||||
|
"deny": []
|
||||||
|
}
|
||||||
|
}
|
3
.gitignore
vendored
3
.gitignore
vendored
@@ -37,4 +37,5 @@ build_*/**
|
|||||||
cmake-build*/**
|
cmake-build*/**
|
||||||
.idea
|
.idea
|
||||||
puml/**
|
puml/**
|
||||||
.vscode/settings.json
|
.vscode/settings.json
|
||||||
|
CMakeUserPresets.json
|
112
ArffFiles.hpp
112
ArffFiles.hpp
@@ -10,6 +10,8 @@
|
|||||||
#include <cctype> // std::isdigit
|
#include <cctype> // std::isdigit
|
||||||
#include <algorithm> // std::all_of std::transform
|
#include <algorithm> // std::all_of std::transform
|
||||||
#include <filesystem> // For file size checking
|
#include <filesystem> // For file size checking
|
||||||
|
#include "arffFiles_config.h"
|
||||||
|
|
||||||
|
|
||||||
// Summary information structure for ARFF files
|
// Summary information structure for ARFF files
|
||||||
struct ArffSummary {
|
struct ArffSummary {
|
||||||
@@ -24,28 +26,28 @@ struct ArffSummary {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Header-only C++17 library for parsing ARFF (Attribute-Relation File Format) files
|
* @brief Header-only C++17 library for parsing ARFF (Attribute-Relation File Format) files
|
||||||
*
|
*
|
||||||
* This class provides functionality to load and parse ARFF files, automatically detecting
|
* This class provides functionality to load and parse ARFF files, automatically detecting
|
||||||
* numeric vs categorical features and performing factorization of categorical attributes.
|
* numeric vs categorical features and performing factorization of categorical attributes.
|
||||||
*
|
*
|
||||||
* @warning THREAD SAFETY: This class is NOT thread-safe!
|
* @warning THREAD SAFETY: This class is NOT thread-safe!
|
||||||
*
|
*
|
||||||
* Thread Safety Considerations:
|
* Thread Safety Considerations:
|
||||||
* - Multiple instances can be used safely in different threads (each instance is independent)
|
* - Multiple instances can be used safely in different threads (each instance is independent)
|
||||||
* - A single instance MUST NOT be accessed concurrently from multiple threads
|
* - A single instance MUST NOT be accessed concurrently from multiple threads
|
||||||
* - All member functions (including getters) modify or access mutable state
|
* - All member functions (including getters) modify or access mutable state
|
||||||
* - Static methods (summary, trim, split) are thread-safe as they don't access instance state
|
* - Static methods (summary, trim, split) are thread-safe as they don't access instance state
|
||||||
*
|
*
|
||||||
* Memory Safety:
|
* Memory Safety:
|
||||||
* - Built-in protection against resource exhaustion with configurable limits
|
* - Built-in protection against resource exhaustion with configurable limits
|
||||||
* - File size limit: 100 MB (DEFAULT_MAX_FILE_SIZE)
|
* - File size limit: 100 MB (DEFAULT_MAX_FILE_SIZE)
|
||||||
* - Sample count limit: 1 million samples (DEFAULT_MAX_SAMPLES)
|
* - Sample count limit: 1 million samples (DEFAULT_MAX_SAMPLES)
|
||||||
* - Feature count limit: 10,000 features (DEFAULT_MAX_FEATURES)
|
* - Feature count limit: 10,000 features (DEFAULT_MAX_FEATURES)
|
||||||
*
|
*
|
||||||
* Usage Patterns:
|
* Usage Patterns:
|
||||||
* - Single-threaded: Create one instance, call load(), then access data via getters
|
* - Single-threaded: Create one instance, call load(), then access data via getters
|
||||||
* - Multi-threaded: Create separate instances per thread, or use external synchronization
|
* - Multi-threaded: Create separate instances per thread, or use external synchronization
|
||||||
*
|
*
|
||||||
* @example
|
* @example
|
||||||
* // Thread-safe usage pattern:
|
* // Thread-safe usage pattern:
|
||||||
* void processFile(const std::string& filename) {
|
* void processFile(const std::string& filename) {
|
||||||
@@ -55,24 +57,23 @@ struct ArffSummary {
|
|||||||
* auto y = arff.getY();
|
* auto y = arff.getY();
|
||||||
* // Process data...
|
* // Process data...
|
||||||
* }
|
* }
|
||||||
*
|
*
|
||||||
* @example
|
* @example
|
||||||
* // UNSAFE usage pattern:
|
* // UNSAFE usage pattern:
|
||||||
* ArffFiles globalArff; // Global instance
|
* ArffFiles globalArff; // Global instance
|
||||||
* // Thread 1: globalArff.load("file1.arff"); // UNSAFE!
|
* // Thread 1: globalArff.load("file1.arff"); // UNSAFE!
|
||||||
* // Thread 2: globalArff.load("file2.arff"); // UNSAFE!
|
* // Thread 2: globalArff.load("file2.arff"); // UNSAFE!
|
||||||
*/
|
*/
|
||||||
class ArffFiles {
|
class ArffFiles {
|
||||||
const std::string VERSION = "1.1.0";
|
private:
|
||||||
|
|
||||||
// Memory usage limits (configurable via environment variables)
|
// Memory usage limits (configurable via environment variables)
|
||||||
static constexpr size_t DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100 MB
|
static constexpr size_t DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100 MB
|
||||||
static constexpr size_t DEFAULT_MAX_SAMPLES = 1000000; // 1 million samples
|
static constexpr size_t DEFAULT_MAX_SAMPLES = 1000000; // 1 million samples
|
||||||
static constexpr size_t DEFAULT_MAX_FEATURES = 10000; // 10k features
|
static constexpr size_t DEFAULT_MAX_FEATURES = 10000; // 10k features
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ArffFiles() = default;
|
ArffFiles() = default;
|
||||||
|
|
||||||
// Move constructor
|
// Move constructor
|
||||||
ArffFiles(ArffFiles&& other) noexcept
|
ArffFiles(ArffFiles&& other) noexcept
|
||||||
: lines(std::move(other.lines))
|
: lines(std::move(other.lines))
|
||||||
@@ -86,7 +87,7 @@ public:
|
|||||||
{
|
{
|
||||||
// Other object is left in a valid but unspecified state
|
// Other object is left in a valid but unspecified state
|
||||||
}
|
}
|
||||||
|
|
||||||
// Move assignment operator
|
// Move assignment operator
|
||||||
ArffFiles& operator=(ArffFiles&& other) noexcept
|
ArffFiles& operator=(ArffFiles&& other) noexcept
|
||||||
{
|
{
|
||||||
@@ -102,13 +103,13 @@ public:
|
|||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy constructor (explicitly defaulted)
|
// Copy constructor (explicitly delete)
|
||||||
ArffFiles(const ArffFiles& other) = default;
|
ArffFiles(const ArffFiles& other) = delete;
|
||||||
|
|
||||||
// Copy assignment operator (explicitly defaulted)
|
// Copy assignment operator (explicitly deleted)
|
||||||
ArffFiles& operator=(const ArffFiles& other) = default;
|
ArffFiles& operator=(const ArffFiles& other) = delete;
|
||||||
|
|
||||||
// Destructor (explicitly defaulted)
|
// Destructor (explicitly defaulted)
|
||||||
~ArffFiles() = default;
|
~ArffFiles() = default;
|
||||||
void load(const std::string& fileName, bool classLast = true)
|
void load(const std::string& fileName, bool classLast = true)
|
||||||
@@ -231,7 +232,7 @@ public:
|
|||||||
const std::vector<int>& getY() const { return y; }
|
const std::vector<int>& getY() const { return y; }
|
||||||
const std::map<std::string, bool>& getNumericAttributes() const { return numeric_features; }
|
const std::map<std::string, bool>& getNumericAttributes() const { return numeric_features; }
|
||||||
const std::vector<std::pair<std::string, std::string>>& getAttributes() const { return attributes; };
|
const std::vector<std::pair<std::string, std::string>>& getAttributes() const { return attributes; };
|
||||||
|
|
||||||
// Move-enabled getters for efficient data transfer
|
// Move-enabled getters for efficient data transfer
|
||||||
// WARNING: These methods move data OUT of the object, leaving it in an empty but valid state
|
// WARNING: These methods move data OUT of the object, leaving it in an empty but valid state
|
||||||
// Use these when you want to transfer ownership of large data structures for performance
|
// Use these when you want to transfer ownership of large data structures for performance
|
||||||
@@ -241,7 +242,7 @@ public:
|
|||||||
std::map<std::string, std::vector<std::string>> moveStates() noexcept { return std::move(states); }
|
std::map<std::string, std::vector<std::string>> moveStates() noexcept { return std::move(states); }
|
||||||
std::vector<std::pair<std::string, std::string>> moveAttributes() noexcept { return std::move(attributes); }
|
std::vector<std::pair<std::string, std::string>> moveAttributes() noexcept { return std::move(attributes); }
|
||||||
std::map<std::string, bool> moveNumericAttributes() noexcept { return std::move(numeric_features); }
|
std::map<std::string, bool> moveNumericAttributes() noexcept { return std::move(numeric_features); }
|
||||||
|
|
||||||
std::vector<std::string> split(const std::string& text, char delimiter)
|
std::vector<std::string> split(const std::string& text, char delimiter)
|
||||||
{
|
{
|
||||||
std::vector<std::string> result;
|
std::vector<std::string> result;
|
||||||
@@ -252,26 +253,27 @@ public:
|
|||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
std::string version() const { return VERSION; }
|
std::string version() const { return ARFFLIB_VERSION; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Helper function to validate file path for security
|
// Helper function to validate file path for security
|
||||||
static void validateFilePath(const std::string& fileName) {
|
static void validateFilePath(const std::string& fileName)
|
||||||
|
{
|
||||||
if (fileName.empty()) {
|
if (fileName.empty()) {
|
||||||
throw std::invalid_argument("File path cannot be empty");
|
throw std::invalid_argument("File path cannot be empty");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for path traversal attempts
|
// Check for path traversal attempts
|
||||||
if (fileName.find("..") != std::string::npos) {
|
if (fileName.find("..") != std::string::npos) {
|
||||||
throw std::invalid_argument("Path traversal detected in file path: " + fileName);
|
throw std::invalid_argument("Path traversal detected in file path: " + fileName);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for absolute paths starting with / (Unix) or drive letters (Windows)
|
// Check for absolute paths starting with / (Unix) or drive letters (Windows)
|
||||||
if (fileName[0] == '/' || (fileName.length() >= 3 && fileName[1] == ':')) {
|
if (fileName[0] == '/' || (fileName.length() >= 3 && fileName[1] == ':')) {
|
||||||
// Allow absolute paths but log a warning - this is for user awareness
|
// Allow absolute paths but log a warning - this is for user awareness
|
||||||
// In production, you might want to restrict this based on your security requirements
|
// In production, you might want to restrict this based on your security requirements
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for suspicious characters that could be used in path manipulation
|
// Check for suspicious characters that could be used in path manipulation
|
||||||
const std::string suspiciousChars = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f";
|
const std::string suspiciousChars = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f";
|
||||||
for (char c : suspiciousChars) {
|
for (char c : suspiciousChars) {
|
||||||
@@ -279,33 +281,35 @@ private:
|
|||||||
throw std::invalid_argument("Invalid character detected in file path");
|
throw std::invalid_argument("Invalid character detected in file path");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for excessively long paths (potential buffer overflow attempts)
|
// Check for excessively long paths (potential buffer overflow attempts)
|
||||||
constexpr size_t MAX_PATH_LENGTH = 4096; // Common filesystem limit
|
constexpr size_t MAX_PATH_LENGTH = 4096; // Common filesystem limit
|
||||||
if (fileName.length() > MAX_PATH_LENGTH) {
|
if (fileName.length() > MAX_PATH_LENGTH) {
|
||||||
throw std::invalid_argument("File path too long (exceeds " + std::to_string(MAX_PATH_LENGTH) + " characters)");
|
throw std::invalid_argument("File path too long (exceeds " + std::to_string(MAX_PATH_LENGTH) + " characters)");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Additional validation using filesystem operations when available
|
// Additional validation using filesystem operations when available
|
||||||
try {
|
try {
|
||||||
// Check if the file exists and validate its canonical path
|
// Check if the file exists and validate its canonical path
|
||||||
if (std::filesystem::exists(fileName)) {
|
if (std::filesystem::exists(fileName)) {
|
||||||
std::filesystem::path normalizedPath = std::filesystem::canonical(fileName);
|
std::filesystem::path normalizedPath = std::filesystem::canonical(fileName);
|
||||||
std::string normalizedStr = normalizedPath.string();
|
std::string normalizedStr = normalizedPath.string();
|
||||||
|
|
||||||
// Check if normalized path still contains traversal attempts
|
// Check if normalized path still contains traversal attempts
|
||||||
if (normalizedStr.find("..") != std::string::npos) {
|
if (normalizedStr.find("..") != std::string::npos) {
|
||||||
throw std::invalid_argument("Path traversal detected after normalization: " + normalizedStr);
|
throw std::invalid_argument("Path traversal detected after normalization: " + normalizedStr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (const std::filesystem::filesystem_error& e) {
|
}
|
||||||
|
catch (const std::filesystem::filesystem_error& e) {
|
||||||
// If filesystem operations fail, we can still proceed with basic validation
|
// If filesystem operations fail, we can still proceed with basic validation
|
||||||
// This ensures compatibility with systems where filesystem might not be fully available
|
// This ensures compatibility with systems where filesystem might not be fully available
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper function to validate resource usage limits
|
// Helper function to validate resource usage limits
|
||||||
static void validateResourceLimits(const std::string& fileName, size_t sampleCount = 0, size_t featureCount = 0) {
|
static void validateResourceLimits(const std::string& fileName, size_t sampleCount = 0, size_t featureCount = 0)
|
||||||
|
{
|
||||||
// Check file size limit
|
// Check file size limit
|
||||||
try {
|
try {
|
||||||
if (std::filesystem::exists(fileName)) {
|
if (std::filesystem::exists(fileName)) {
|
||||||
@@ -314,16 +318,17 @@ private:
|
|||||||
throw std::invalid_argument("File size (" + std::to_string(fileSize) + " bytes) exceeds maximum allowed size (" + std::to_string(DEFAULT_MAX_FILE_SIZE) + " bytes)");
|
throw std::invalid_argument("File size (" + std::to_string(fileSize) + " bytes) exceeds maximum allowed size (" + std::to_string(DEFAULT_MAX_FILE_SIZE) + " bytes)");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (const std::filesystem::filesystem_error&) {
|
}
|
||||||
|
catch (const std::filesystem::filesystem_error&) {
|
||||||
// If filesystem operations fail, continue without size checking
|
// If filesystem operations fail, continue without size checking
|
||||||
// This ensures compatibility with systems where filesystem might not be available
|
// This ensures compatibility with systems where filesystem might not be available
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check sample count limit
|
// Check sample count limit
|
||||||
if (sampleCount > DEFAULT_MAX_SAMPLES) {
|
if (sampleCount > DEFAULT_MAX_SAMPLES) {
|
||||||
throw std::invalid_argument("Number of samples (" + std::to_string(sampleCount) + ") exceeds maximum allowed (" + std::to_string(DEFAULT_MAX_SAMPLES) + ")");
|
throw std::invalid_argument("Number of samples (" + std::to_string(sampleCount) + ") exceeds maximum allowed (" + std::to_string(DEFAULT_MAX_SAMPLES) + ")");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check feature count limit
|
// Check feature count limit
|
||||||
if (featureCount > DEFAULT_MAX_FEATURES) {
|
if (featureCount > DEFAULT_MAX_FEATURES) {
|
||||||
throw std::invalid_argument("Number of features (" + std::to_string(featureCount) + ") exceeds maximum allowed (" + std::to_string(DEFAULT_MAX_FEATURES) + ")");
|
throw std::invalid_argument("Number of features (" + std::to_string(featureCount) + ") exceeds maximum allowed (" + std::to_string(DEFAULT_MAX_FEATURES) + ")");
|
||||||
@@ -352,12 +357,12 @@ private:
|
|||||||
continue;
|
continue;
|
||||||
auto values = attribute.second;
|
auto values = attribute.second;
|
||||||
std::transform(values.begin(), values.end(), values.begin(), ::toupper);
|
std::transform(values.begin(), values.end(), values.begin(), ::toupper);
|
||||||
|
|
||||||
// Enhanced attribute type detection
|
// Enhanced attribute type detection
|
||||||
bool isNumeric = values == "REAL" || values == "INTEGER" || values == "NUMERIC";
|
bool isNumeric = values == "REAL" || values == "INTEGER" || values == "NUMERIC";
|
||||||
bool isDate = values.find("DATE") != std::string::npos;
|
bool isDate = values.find("DATE") != std::string::npos;
|
||||||
bool isString = values == "STRING";
|
bool isString = values == "STRING";
|
||||||
|
|
||||||
// For now, treat DATE and STRING as categorical (non-numeric)
|
// For now, treat DATE and STRING as categorical (non-numeric)
|
||||||
// This provides basic compatibility while maintaining existing functionality
|
// This provides basic compatibility while maintaining existing functionality
|
||||||
numeric_features[feature] = isNumeric;
|
numeric_features[feature] = isNumeric;
|
||||||
@@ -490,7 +495,7 @@ private:
|
|||||||
|
|
||||||
// Validate file path for security
|
// Validate file path for security
|
||||||
validateFilePath(fileName);
|
validateFilePath(fileName);
|
||||||
|
|
||||||
// Validate file size before processing
|
// Validate file size before processing
|
||||||
validateResourceLimits(fileName);
|
validateResourceLimits(fileName);
|
||||||
|
|
||||||
@@ -507,13 +512,13 @@ private:
|
|||||||
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
|
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip sparse data format for now (lines starting with '{')
|
// Skip sparse data format for now (lines starting with '{')
|
||||||
// Future enhancement: implement full sparse data support
|
// Future enhancement: implement full sparse data support
|
||||||
if (!line.empty() && line[0] == '{') {
|
if (!line.empty() && line[0] == '{') {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
|
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
|
||||||
std::stringstream ss(line);
|
std::stringstream ss(line);
|
||||||
ss >> keyword >> attribute;
|
ss >> keyword >> attribute;
|
||||||
@@ -564,7 +569,7 @@ private:
|
|||||||
if (lines.empty()) {
|
if (lines.empty()) {
|
||||||
throw std::invalid_argument("No data samples found in file");
|
throw std::invalid_argument("No data samples found in file");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate loaded data dimensions against limits
|
// Validate loaded data dimensions against limits
|
||||||
validateResourceLimits(fileName, lines.size(), attributes.size());
|
validateResourceLimits(fileName, lines.size(), attributes.size());
|
||||||
|
|
||||||
@@ -621,15 +626,16 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Common helper function to parse ARFF file attributes and count samples
|
// Common helper function to parse ARFF file attributes and count samples
|
||||||
static int parseArffFile(const std::string& fileName,
|
static int parseArffFile(const std::string& fileName,
|
||||||
std::vector<std::pair<std::string, std::string>>& attributes,
|
std::vector<std::pair<std::string, std::string>>& attributes,
|
||||||
std::set<std::string>& uniqueClasses,
|
std::set<std::string>& uniqueClasses,
|
||||||
size_t& sampleCount,
|
size_t& sampleCount,
|
||||||
int classIndex = -1,
|
int classIndex = -1,
|
||||||
const std::string& classNameToFind = "") {
|
const std::string& classNameToFind = "")
|
||||||
|
{
|
||||||
// Validate file path for security
|
// Validate file path for security
|
||||||
validateFilePath(fileName);
|
validateFilePath(fileName);
|
||||||
|
|
||||||
std::ifstream file(fileName);
|
std::ifstream file(fileName);
|
||||||
if (!file.is_open()) {
|
if (!file.is_open()) {
|
||||||
throw std::invalid_argument("Unable to open file: " + fileName);
|
throw std::invalid_argument("Unable to open file: " + fileName);
|
||||||
@@ -645,12 +651,12 @@ private:
|
|||||||
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
|
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip sparse data format for now (lines starting with '{')
|
// Skip sparse data format for now (lines starting with '{')
|
||||||
if (!line.empty() && line[0] == '{') {
|
if (!line.empty() && line[0] == '{') {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
|
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
|
||||||
std::stringstream ss(line);
|
std::stringstream ss(line);
|
||||||
std::string keyword, attribute, type_w;
|
std::string keyword, attribute, type_w;
|
||||||
@@ -717,7 +723,7 @@ private:
|
|||||||
// Use specific index
|
// Use specific index
|
||||||
classValue = trim(tokens[actualClassIndex]);
|
classValue = trim(tokens[actualClassIndex]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!classValue.empty()) {
|
if (!classValue.empty()) {
|
||||||
uniqueClasses.insert(classValue);
|
uniqueClasses.insert(classValue);
|
||||||
sampleCount++;
|
sampleCount++;
|
||||||
@@ -726,7 +732,7 @@ private:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
while (getline(file, line));
|
while (getline(file, line));
|
||||||
|
|
||||||
return actualClassIndex;
|
return actualClassIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [1.2.1] 2025-07-15 Bug Fixes and Improvements
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- Library version from CMake projecto to `ArffFiles.hpp`
|
||||||
|
- Library `catch2` as a conan test requirement
|
||||||
|
- Install target for CMake
|
||||||
|
|
||||||
## [1.2.0] 2025-06-27 Refactoring and Improvements
|
## [1.2.0] 2025-06-27 Refactoring and Improvements
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
cmake_minimum_required(VERSION 3.20)
|
cmake_minimum_required(VERSION 3.20)
|
||||||
|
|
||||||
project(ArffFiles
|
project(ArffFiles
|
||||||
VERSION 1.2.0
|
VERSION 1.2.1
|
||||||
DESCRIPTION "Library to read Arff Files and return STL vectors with the data read."
|
DESCRIPTION "Library to read Arff Files and return STL vectors with the data read."
|
||||||
HOMEPAGE_URL "https://github.com/rmontanana/ArffFiles"
|
HOMEPAGE_URL "https://github.com/rmontanana/ArffFiles"
|
||||||
LANGUAGES CXX
|
LANGUAGES CXX
|
||||||
@@ -41,14 +41,60 @@ add_subdirectory(config)
|
|||||||
# -------
|
# -------
|
||||||
if (ENABLE_TESTING)
|
if (ENABLE_TESTING)
|
||||||
MESSAGE("Testing enabled")
|
MESSAGE("Testing enabled")
|
||||||
Include(FetchContent)
|
find_package(Catch2 REQUIRED)
|
||||||
FetchContent_Declare(Catch2
|
|
||||||
GIT_REPOSITORY https://github.com/catchorg/Catch2.git
|
|
||||||
GIT_TAG v3.3.2
|
|
||||||
)
|
|
||||||
FetchContent_MakeAvailable(Catch2)
|
|
||||||
include(CTest)
|
include(CTest)
|
||||||
add_subdirectory(tests)
|
add_subdirectory(tests)
|
||||||
endif (ENABLE_TESTING)
|
endif (ENABLE_TESTING)
|
||||||
|
|
||||||
add_library(ArffFiles INTERFACE ArffFiles.hpp)
|
add_library(ArffFiles INTERFACE ArffFiles.hpp)
|
||||||
|
|
||||||
|
target_include_directories(ArffFiles INTERFACE
|
||||||
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
|
||||||
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/configured_files/include>
|
||||||
|
$<INSTALL_INTERFACE:include>
|
||||||
|
)
|
||||||
|
# Install
|
||||||
|
# -------
|
||||||
|
install(TARGETS ArffFiles EXPORT ArffFilesTargets
|
||||||
|
INCLUDES DESTINATION include
|
||||||
|
)
|
||||||
|
install(EXPORT ArffFilesTargets
|
||||||
|
FILE ArffFilesTargets.cmake
|
||||||
|
NAMESPACE ArffFiles::
|
||||||
|
DESTINATION lib/cmake/ArffFiles
|
||||||
|
)
|
||||||
|
|
||||||
|
# Install the main header file
|
||||||
|
install(FILES ArffFiles.hpp
|
||||||
|
DESTINATION include
|
||||||
|
)
|
||||||
|
|
||||||
|
# Install the generated configuration header
|
||||||
|
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/configured_files/include/arffFiles_config.h"
|
||||||
|
DESTINATION include
|
||||||
|
)
|
||||||
|
|
||||||
|
# Install documentation files
|
||||||
|
install(FILES LICENSE README.md
|
||||||
|
DESTINATION share/doc/ArffFiles
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create and install package configuration files
|
||||||
|
include(CMakePackageConfigHelpers)
|
||||||
|
write_basic_package_version_file(
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/ArffFilesConfigVersion.cmake"
|
||||||
|
VERSION ${PROJECT_VERSION}
|
||||||
|
COMPATIBILITY AnyNewerVersion
|
||||||
|
)
|
||||||
|
|
||||||
|
configure_package_config_file(
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/ArffFilesConfig.cmake.in"
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/ArffFilesConfig.cmake"
|
||||||
|
INSTALL_DESTINATION lib/cmake/ArffFiles
|
||||||
|
)
|
||||||
|
|
||||||
|
install(FILES
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/ArffFilesConfig.cmake"
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/ArffFilesConfigVersion.cmake"
|
||||||
|
DESTINATION lib/cmake/ArffFiles
|
||||||
|
)
|
||||||
|
11
CMakeLists_conan.txt
Normal file
11
CMakeLists_conan.txt
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.20)
|
||||||
|
|
||||||
|
project(ArffFiles
|
||||||
|
VERSION 1.2.1
|
||||||
|
DESCRIPTION "Library to read Arff Files and return STL vectors with the data read."
|
||||||
|
HOMEPAGE_URL "https://github.com/rmontanana/ArffFiles"
|
||||||
|
LANGUAGES CXX
|
||||||
|
)
|
||||||
|
|
||||||
|
# Subdirectories
|
||||||
|
add_subdirectory(config)
|
8
Makefile
8
Makefile
@@ -25,10 +25,12 @@ clean: ## Clean the tests info
|
|||||||
@echo ">>> Done";
|
@echo ">>> Done";
|
||||||
|
|
||||||
build: ## Build a debug version of the project
|
build: ## Build a debug version of the project
|
||||||
@echo ">>> Building Debug ArffFiles...";
|
@echo ">>> Building Debug Folding...";
|
||||||
@if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi
|
@if [ -d $(f_debug) ]; then rm -rf $(f_debug); fi
|
||||||
@mkdir $(f_debug);
|
@mkdir $(f_debug);
|
||||||
@cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON
|
conan install . -of $(f_debug) -s build_type=Debug -b missing
|
||||||
|
cmake -B $(f_debug) -S . -DCMAKE_BUILD_TYPE=Debug -DCMAKE_TOOLCHAIN_FILE=$(f_debug)/conan_toolchain.cmake -DENABLE_TESTING=ON
|
||||||
|
cmake --build $(f_debug) -t $(test_targets) $(n_procs)
|
||||||
@echo ">>> Done";
|
@echo ">>> Done";
|
||||||
|
|
||||||
opt = ""
|
opt = ""
|
||||||
|
@@ -29,10 +29,10 @@ A modern C++17 header-only library to read **ARFF (Attribute-Relation File Forma
|
|||||||
```bash
|
```bash
|
||||||
# Add the package to your conanfile.txt
|
# Add the package to your conanfile.txt
|
||||||
[requires]
|
[requires]
|
||||||
arff-files/1.0.1
|
arff-files/1.2.1
|
||||||
|
|
||||||
# Or install directly
|
# Or install directly
|
||||||
conan install arff-files/1.0.1@
|
conan install arff-files/1.2.1@
|
||||||
```
|
```
|
||||||
|
|
||||||
### Manual Installation
|
### Manual Installation
|
||||||
|
5
cmake/ArffFilesConfig.cmake.in
Normal file
5
cmake/ArffFilesConfig.cmake.in
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
@PACKAGE_INIT@
|
||||||
|
|
||||||
|
include("${CMAKE_CURRENT_LIST_DIR}/ArffFilesTargets.cmake")
|
||||||
|
|
||||||
|
check_required_components(ArffFiles)
|
88
conanfile.py
88
conanfile.py
@@ -1,21 +1,29 @@
|
|||||||
import re
|
import re
|
||||||
from conan import ConanFile
|
from conan import ConanFile
|
||||||
from conan.tools.files import copy
|
from conan.tools.files import copy
|
||||||
|
from conan.tools.cmake import CMakeToolchain, CMakeDeps
|
||||||
|
|
||||||
|
|
||||||
class ArffFilesConan(ConanFile):
|
class ArffFilesConan(ConanFile):
|
||||||
name = "arff-files"
|
name = "arff-files"
|
||||||
version = "X.X.X"
|
version = "X.X.X"
|
||||||
description = (
|
description = "Header-only library to read ARFF (Attribute-Relation \
|
||||||
"Header-only library to read ARFF (Attribute-Relation File Format) files and return STL vectors with the data read."
|
File Format) files and return STL vectors with the data read."
|
||||||
)
|
|
||||||
url = "https://github.com/rmontanana/ArffFiles"
|
url = "https://github.com/rmontanana/ArffFiles"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
homepage = "https://github.com/rmontanana/ArffFiles"
|
homepage = "https://github.com/rmontanana/ArffFiles"
|
||||||
topics = ("arff", "data-processing", "file-parsing", "header-only", "cpp17")
|
topics = ("arff", "data-processing", "file-parsing", "header-only", "cpp17")
|
||||||
no_copy_source = True
|
no_copy_source = True
|
||||||
exports_sources = "ArffFiles.hpp", "LICENSE", "README.md"
|
exports_sources = (
|
||||||
|
"ArffFiles.hpp",
|
||||||
|
"LICENSE",
|
||||||
|
"README.md",
|
||||||
|
"CMakeLists.txt",
|
||||||
|
"config/*",
|
||||||
|
"cmake/*",
|
||||||
|
)
|
||||||
package_type = "header-library"
|
package_type = "header-library"
|
||||||
|
settings = "build_type", "compiler", "arch", "os"
|
||||||
|
|
||||||
def init(self):
|
def init(self):
|
||||||
# Read the CMakeLists.txt file to get the version
|
# Read the CMakeLists.txt file to get the version
|
||||||
@@ -28,12 +36,76 @@ class ArffFilesConan(ConanFile):
|
|||||||
if match:
|
if match:
|
||||||
self.version = match.group(1)
|
self.version = match.group(1)
|
||||||
|
|
||||||
|
def build_requirements(self):
|
||||||
|
self.tool_requires("cmake/[>=3.15]")
|
||||||
|
self.test_requires("catch2/3.8.1")
|
||||||
|
|
||||||
|
def layout(self):
|
||||||
|
# Only use cmake_layout for conan packaging, not for development builds
|
||||||
|
# This can be detected by checking if we're in a conan cache folder
|
||||||
|
if (
|
||||||
|
hasattr(self, "folders")
|
||||||
|
and hasattr(self.folders, "base_build")
|
||||||
|
and self.folders.base_build
|
||||||
|
and ".conan2" in self.folders.base_build
|
||||||
|
):
|
||||||
|
from conan.tools.cmake import cmake_layout
|
||||||
|
|
||||||
|
cmake_layout(self)
|
||||||
|
|
||||||
|
def generate(self):
|
||||||
|
# Generate CMake toolchain file
|
||||||
|
tc = CMakeToolchain(self)
|
||||||
|
tc.generate()
|
||||||
|
|
||||||
|
# Generate CMake dependencies file (needed for test requirements like catch2)
|
||||||
|
deps = CMakeDeps(self)
|
||||||
|
deps.generate()
|
||||||
|
|
||||||
|
def build(self):
|
||||||
|
# Use CMake to generate the config file through existing config system
|
||||||
|
from conan.tools.cmake import CMake
|
||||||
|
|
||||||
|
cmake = CMake(self)
|
||||||
|
# Configure with minimal options - just enough to generate the config file
|
||||||
|
cmake.configure(
|
||||||
|
build_script_folder=None,
|
||||||
|
cli_args=["-DENABLE_TESTING=OFF", "-DCODE_COVERAGE=OFF"],
|
||||||
|
)
|
||||||
|
# No need to build anything, just configure to generate the config file
|
||||||
|
|
||||||
def package(self):
|
def package(self):
|
||||||
# Copy header file to include directory
|
# Copy header file
|
||||||
copy(self, "*.hpp", src=self.source_folder, dst=self.package_folder, keep_path=False)
|
copy(
|
||||||
|
self,
|
||||||
|
"ArffFiles.hpp",
|
||||||
|
src=self.source_folder,
|
||||||
|
dst=self.package_folder,
|
||||||
|
keep_path=False,
|
||||||
|
)
|
||||||
|
# Copy the generated config file from CMake build folder
|
||||||
|
copy(
|
||||||
|
self,
|
||||||
|
"arffFiles_config.h",
|
||||||
|
src=f"{self.build_folder}/configured_files/include",
|
||||||
|
dst=self.package_folder,
|
||||||
|
keep_path=False,
|
||||||
|
)
|
||||||
# Copy license and readme for package documentation
|
# Copy license and readme for package documentation
|
||||||
copy(self, "LICENSE", src=self.source_folder, dst=self.package_folder, keep_path=False)
|
copy(
|
||||||
copy(self, "README.md", src=self.source_folder, dst=self.package_folder, keep_path=False)
|
self,
|
||||||
|
"LICENSE",
|
||||||
|
src=self.source_folder,
|
||||||
|
dst=self.package_folder,
|
||||||
|
keep_path=False,
|
||||||
|
)
|
||||||
|
copy(
|
||||||
|
self,
|
||||||
|
"README.md",
|
||||||
|
src=self.source_folder,
|
||||||
|
dst=self.package_folder,
|
||||||
|
keep_path=False,
|
||||||
|
)
|
||||||
|
|
||||||
def package_info(self):
|
def package_info(self):
|
||||||
# Header-only library configuration
|
# Header-only library configuration
|
||||||
|
@@ -1,11 +1,10 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <string>
|
#define ARFFLIB_VERSION_MAJOR @PROJECT_VERSION_MAJOR@
|
||||||
#include <string_view>
|
#define ARFFLIB_VERSION_MINOR @PROJECT_VERSION_MINOR@
|
||||||
|
#define ARFFLIB_VERSION_PATCH @PROJECT_VERSION_PATCH@
|
||||||
|
|
||||||
#define PROJECT_VERSION_MAJOR @PROJECT_VERSION_MAJOR @
|
#define ARFFLIB_VERSION "@PROJECT_VERSION@"
|
||||||
#define PROJECT_VERSION_MINOR @PROJECT_VERSION_MINOR @
|
|
||||||
#define PROJECT_VERSION_PATCH @PROJECT_VERSION_PATCH @
|
|
||||||
|
|
||||||
static constexpr std::string_view arffFiles_project_name = "@PROJECT_NAME@";
|
static constexpr std::string_view arffFiles_project_name = "@PROJECT_NAME@";
|
||||||
static constexpr std::string_view arffFiles_project_version = "@PROJECT_VERSION@";
|
static constexpr std::string_view arffFiles_project_version = "@PROJECT_VERSION@";
|
||||||
|
@@ -3,7 +3,6 @@
|
|||||||
#include <catch2/generators/catch_generators.hpp>
|
#include <catch2/generators/catch_generators.hpp>
|
||||||
#include <catch2/matchers/catch_matchers_string.hpp>
|
#include <catch2/matchers/catch_matchers_string.hpp>
|
||||||
#include "ArffFiles.hpp"
|
#include "ArffFiles.hpp"
|
||||||
#include "arffFiles_config.h"
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
class Paths {
|
class Paths {
|
||||||
@@ -28,7 +27,7 @@ public:
|
|||||||
TEST_CASE("Version Test", "[ArffFiles]")
|
TEST_CASE("Version Test", "[ArffFiles]")
|
||||||
{
|
{
|
||||||
ArffFiles arff;
|
ArffFiles arff;
|
||||||
REQUIRE(arff.version() == "1.1.0");
|
REQUIRE(arff.version() == "1.2.1");
|
||||||
}
|
}
|
||||||
TEST_CASE("Load Test", "[ArffFiles]")
|
TEST_CASE("Load Test", "[ArffFiles]")
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user