From f843c938fcd68ae225cc05cecfc1da54a11f384c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Sun, 27 Nov 2022 11:35:21 +0100 Subject: [PATCH] Build sklearn transformer --- .gitignore | 2 +- MANIFEST.in | 2 +- Makefile | 6 +- fimdlp/{FImdlp.cpp => CPPFImdlp.cpp} | 10 +-- fimdlp/{FImdlp.h => CPPFImdlp.h} | 12 +-- fimdlp/__init__.py | 4 +- fimdlp/cfimdlp.pyx | 10 +-- fimdlp/cppfimdlp.cpython-310-darwin.so | Bin 0 -> 84512 bytes fimdlp/mdlp.py | 103 +++++++++++++++++++++++++ pyproject.toml | 18 +++++ sample.py | 11 +-- setup.py | 18 +---- 12 files changed, 152 insertions(+), 44 deletions(-) rename fimdlp/{FImdlp.cpp => CPPFImdlp.cpp} (65%) rename fimdlp/{FImdlp.h => CPPFImdlp.h} (55%) create mode 100755 fimdlp/cppfimdlp.cpython-310-darwin.so create mode 100644 fimdlp/mdlp.py diff --git a/.gitignore b/.gitignore index cbabd70..0318fe0 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ __pycache__/ *$py.class # C extensions -*.so +build/**/*.so # Distribution / packaging .Python diff --git a/MANIFEST.in b/MANIFEST.in index 152f4c3..4d6a74c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -include fimdlp/FImdlp.h +include fimdlp/CPPFImdlp.h diff --git a/Makefile b/Makefile index 80acec7..49674a6 100644 --- a/Makefile +++ b/Makefile @@ -16,9 +16,13 @@ push: ## Push code with tags build: ## Build package rm -fr dist/* rm -fr build/* - #python setup.py build_ext python -m build +buildext: ## Build extension + rm -fr dist/* + rm -fr build/* + python setup.py build_ext + audit: ## Audit pip pip-audit diff --git a/fimdlp/FImdlp.cpp b/fimdlp/CPPFImdlp.cpp similarity index 65% rename from fimdlp/FImdlp.cpp rename to fimdlp/CPPFImdlp.cpp index 19241c7..3600959 100644 --- a/fimdlp/FImdlp.cpp +++ b/fimdlp/CPPFImdlp.cpp @@ -1,13 +1,13 @@ -#include "FImdlp.h" -namespace FImdlp +#include "CPPFImdlp.h" +namespace CPPFImdlp { - FImdlp::FImdlp() + CPPFImdlp::CPPFImdlp() { } - FImdlp::~FImdlp() + CPPFImdlp::~CPPFImdlp() { } - std::vector FImdlp::cutPoints(std::vector &X, std::vector &y) + std::vector CPPFImdlp::cutPoints(std::vector &X, std::vector &y) { std::vector cutPts; int i, ant = X.at(0); diff --git a/fimdlp/FImdlp.h b/fimdlp/CPPFImdlp.h similarity index 55% rename from fimdlp/FImdlp.h rename to fimdlp/CPPFImdlp.h index d15cf8b..81f589a 100644 --- a/fimdlp/FImdlp.h +++ b/fimdlp/CPPFImdlp.h @@ -1,14 +1,14 @@ -#ifndef FIMDLP_H -#define FIMDLP_H +#ifndef CPPFIMDLP_H +#define CPPFIMDLP_H #include #include -namespace FImdlp +namespace CPPFImdlp { - class FImdlp + class CPPFImdlp { public: - FImdlp(); - ~FImdlp(); + CPPFImdlp(); + ~CPPFImdlp(); std::vector cutPoints(std::vector &, std::vector &); }; } diff --git a/fimdlp/__init__.py b/fimdlp/__init__.py index 7df9f7a..d1675e1 100644 --- a/fimdlp/__init__.py +++ b/fimdlp/__init__.py @@ -1 +1,3 @@ -from ._version import __version__ \ No newline at end of file +from ._version import __version__ + +all = ["FImdlp", "__version__"] diff --git a/fimdlp/cfimdlp.pyx b/fimdlp/cfimdlp.pyx index cfa00b2..8317808 100644 --- a/fimdlp/cfimdlp.pyx +++ b/fimdlp/cfimdlp.pyx @@ -2,15 +2,15 @@ # cython: language_level = 3 from libcpp.vector cimport vector -cdef extern from "FImdlp.h" namespace "FImdlp": - cdef cppclass FImdlp: - FImdlp() except + +cdef extern from "CPPFImdlp.h" namespace "CPPFImdlp": + cdef cppclass CPPFImdlp: + CPPFImdlp() except + vector[float] cutPoints(vector[int]&, vector[int]&) cdef class CFImdlp: - cdef FImdlp *thisptr + cdef CPPFImdlp *thisptr def __cinit__(self): - self.thisptr = new FImdlp() + self.thisptr = new CPPFImdlp() def __dealloc__(self): del self.thisptr def cut_points(self, X, y): diff --git a/fimdlp/cppfimdlp.cpython-310-darwin.so b/fimdlp/cppfimdlp.cpython-310-darwin.so new file mode 100755 index 0000000000000000000000000000000000000000..c553eccde1968d84b6f0d253514c1d2ab803ac7d GIT binary patch literal 84512 zcmeIb3wTu3xi`Ki7YNr0DuP!=B|;QKCJ@XZ(G1DJ9-KhrCSGtDCKED}%#1U8AW?(S zWR&eVEwwjl^)x;9LhGrAw)Th@Fo*(Ls}XIbUQwy;7;O$dmWlb4Qua(t*HS#l*wDuA=XY59_rXty9FNoT-K&XFXq zcd={PViq;<);-c-EQ!XfU)e*(+`<#i6cy63F2*R^>y3xj21E7U zV5lJsBBQ*5+c@VDA%L(^hAx|YvJ(x?>#cDvc6zH9%wNQ( zEYrJAtnWS{&z8lQEjufk#4-DjnOu*1{*|s8H@6PN2+DQ@*~PJKUIt~$nvNDHJ0ZUH zBP3~>!0Sg?q`ioxI*o_hIMO1iV62;Lk$TRPB#=sVaC8kvJUNVe(ir%n>##KAZE}%C zdfpfbd=?z#Q#SXrp^JY!{RgRQPJi`@2Y)%L3~@F%h{ZJ$jXsivsqH1p^xTW!@$ypy zqmWABlPuOA<4(h{X_2PJW6`Ni!Ik9v7hharFR5=ukR(bKS@gN}6HcIC)EZL)%_Xx1 z3#cXOLO9BI1@uDs6$`upn8tncGBsdoz|?@L0aF8}222f@8Zb3rYQWThsR2_1rUpz6 zm>MuOU~0hBfT;mf1EvN{4VW4*HDGGM)PSi0Qv;?3ObwVCFg0Lmz|?@L0aF8}222f@ z8Zb3rYQWThsR2_1rUpz6m>MuOU~0hBfT;mf1EvN{4VW7E-=Tpevig_hI~q!?6FOOe z{~7GO+PT!Z#JPBp-0`vOR*zb*?zWB@D({-hK7^H%Gi7y2x15X?xYYyh;v?41bMPgs zd%Kq7>^PERz2}ekme&1PKfkU>4s||7AJ^KSlhxWD`x`F%VUOzS@~BI8_f2cqaL8#L zGsNAoGp|;S7Rc%Wx%h~@{YZ|-vCn$vFTgiDnfG&N$1aP@vCGda`6}P z_Afy8uJz8fjBMo39ov;ZTDGIGhpZQEKV-cozY8B9yB%-Gf9O`<_FNqA@qiCZa>{A( zICfj_S`0>6jc@d*2j!QK%8sXH>+BwRizxX7*|FVit$tZf?2*;BE!NIIpcaR$V_ch2 zkxd@Q^VYl0g#fquM>#d?yQFOGQ+(a*NxHh^WZM=wS-a_wwa~TMqkaUgy!Ew-!=7Yw z&$=nHy33>f6*@%HL0NTea;q~8|021#}JBtUYqztsqv_P^Qf=5)n_4ni*`L^y4BZtiCk#03ON}m@Tko_ za&q~WzVqzRRHk6oZ~9U5z7$H-Bdf2-$@s=hEB5-1_xF3$9=YQg`8wzI&gIS>D@*s0D2;AyU3RWWMQ9swdup^svcVhW1s*}|( zq= z$X$s~tm_+zJXIbRV%`KHE7vhiYVd7a2p<#|HPd|k{YA?C9DqDo0pib3-wb$J!FdtGa0@^(_~lyR=aVXOKR(D$Zr z!=T41-AnrXF~qUt1XB8LLW90G6Zs`8v`R=wbSpP#B?$C>j4p@7$3>!zCpxt;lz2M& zDiR+MiMc#+qW1SokXXeN+eP9Bh=bUn+G_~(okEs?rlpoVor+@L?@{+Lqs?^ehV1zx zZJ+Y(9~ZDDKaQ-=gbGA22C65+Lng%@#{p~Sw@FdS{400?pr%<+leQk&v(dHpD1HM$ z3!}_#71lHMr%1KGA*Z&!k1B6Nn)Dd`{(^JMaNOgz{ z)=ADL0UeSBSzH4lT7;)xtfvpnq|c`GI-Y*2o_-K5%=nX)FJWrJ=zf5_RQUx&{WDNg z-TK%-cuw<7H15`^!_xDI^}njm8(wWw#Dm#*WGt=@(Fx^W|jaCkSC0JC|2<0z7RSc&Ua@k{|~OqDK2L z4*@oieuQpEGvLqglOjlpp9>BBjKrkMUt&q`Xb&QZL|j3tOrEokWm5SI=FLU*+sWi% zDNOQ4Nbc`p!5^|r@(WpM&Uu-$MF(f=%CJ4i+1_P@<@dtlE5>fEkczM7zl12~Rwr7C zWm5@LMWusTgdb96)cl7@1XcG;B1u)Q!LL4PuOZF3bq=_xPW0<87@9stihNoA@)>CP z?{v!@l{eB4JXsT!C<_Le(I9am4Kkl5%4B87TJp+YATKrc91^@3f=T4BNE^xWmswI} z4kbNG<-u?`fmRdA^8aGYl~-Uw(e6hQ4U&m6GUR0W(>(10N^9qA*3Ng)^{Ba$?KLwDz(HhhWlIL2=68B+tiDsgNHa*XyqP8Eeu|4!-#0@{1(+FS^g*1)-p_7~Bu z9`%FAAxzTdqH<3486>SALIr|SBo0FreT!*^zLnEjJEPP|l9m6#YFOSz1yvKoMhmNti?}6jW5!*17bjXl znLQ?w9z>Fp>FOF*$^4a35~%CSLx^hkKIr;;$ZQFHzy#)(@a*22C;CvR$rC>D+PNHB13l>xG5*ECeg6Fef1qH_;sHISQDo!HsB^5oNrX_Bd4`L+7 zstvQ>E2QFG`AbnNx}00DAojh-z&>ARU(VTYNVC&;&e^{McER%#@YuUMk=<6%_wK~a zxpgK?-Yr@aD@F8K?MmM4@d+sN7c_&@-fiqt5rk6ZPoR6E z+-Fh7PoaD0=0@{J;wH3MFKvlg-L-qTAZubM)WLEg!|eN}7DIw)<;0nY(jLEnvZ<_w z?Px>Jr$p9}i9bLmy#eH+RWTOU2@+pYh3*0iN{cdQmA}9DLvU!Td0KXFNE`$cwm9fK z+Wn9P!=b*B*pKKz#QdDZyZ|wJNq$8wdMYCNTD6hz?A_dm5>FrjwuVlq)j)2Ix(`F| z5>#dltXgxNmlo0KWcFOyK#EM3FF>+KeNX!ps!Mj6VD?=p^Hc{h-;5-5n?!eOL2m<+ zQBI7L=Z2BgiXw=P&IZDKZ5PF;d*#F%a`FM{o0vyBB({y(rTvl;6K_zIzO(v1qVQNi z8I83NTakaRb+V}aiZyGM9zmQ7ZM(kaC=~E_R0k%lT*Ic8$`LnFDaxmihmqhts%#Q@ zY=@>tC%Risrx7f18#rmPRRmGJQ2^)GrLaqu_SY|X+AvCc7r?o7G1=^&5KOnLM`a^g z@AcFMzd|OY2bzb>-`$0sC1zKdNx6FQ`jibz+QAztrwycC{0g@u&fq{Bk^2 zNuin1REtRIv;evnP2{u|&RJ$Hc6HSzLydT%gNH;mq67B_U!-ccw7 z3bhUG4CVffd6|_D@{V9W!TgL+j6E%2LgKTK^(m0vHxpcsF|MWKwR@3BHmv5K%Pb3P zy{hp*1)`9N>(IOUO2NC6=3khHH6c^C#%56gZ|`N)`<}KTjb#~6X`=`vnH`cHvz*4R z2e) zpTAXOt!{o58+N4><5G}vg#sqlB=c(#u2Hwc>a6SkEJ>33yuAIryc$PSVSzREO+;Y6 zSSG8k-SAhyN5i6AhK3~@?>x%_sj13i#pI2ZXaSgvZ>dqgz=&6bf{@X5zL$dlbb)1T z{{#`&O>uZ0C96A=OJ3EUIG2>Zi))jLMB2O%0(xHnNIbW$+}__eazpB7i~6GaspI&% zi_{m3kE;i_e>A~z@a3bHqq6^?y#0f`Yr*HeYgGj);)b=lFJJ0d+g5vTkqd) zd0BnF#__Z@buP7*A9a40>ReD@O^ig0CpoJC;~!S{V*|}(Y@d774lY7fXyCLaYzWbC*^H##Q}r4`Aymb^=dda=mL8~0Z*zBYexrq@A5hJ8=psD=%x zMHclX^$W*qOy7S}4{h(8U^(>imzGy#{~@Mt1^Rxc#_?C7???e=N9g;nr0+kgzprt; z-*$@Z|9wW+oeflg%ZgrZlBBrZ+3`%3+y8eGMdoombi_)z7DE*J+#?{WQHy>`Ub5(W z41G}i_PRi={~401z9fJCnn!)Q>hMmBa;7^~2c|!fFb~Xa`}=v;!n{&EG#&4;>{6zS zB-yev{k#MN*FJ0E`~%i8P~FqVF>W2eR&k%~c>cEic-Se*t-?k;>Z}43)`HSlQHv?qJV(v$u8lmdi{ge;J7Tp4ChD$3*>s^;@ffEfYYj~cumLf>6CvX%DGvk1 zth0q?mQ%)R{(Y+@X;=Pw+^f(*cIB@@2#STa8d27Y-ZCPCMN_$1=81`j(O!X=-ZK%( z8vmEln*$p?Buy^BD)IJB5VOfbWqxcuiHry2RNG!T72hYPmS~y>Kh7y9M|jw_db>qd zN6@5rD$Q5RKjd@Mx2Uj5j9c4LRC=7e7o);HJTTX^3mJDcJ*w>APzCX*vF$qN4a;}@ zf@-3Sq>g_Zh_E$TPvK@B-rKvD3|ZYl`PfN5(ni1^us_%5vqg1>@A@T*UKqd9ez^bp zrN~F$5Bvbp)E|Q6RTaY8Zkx+Cu-dy+G(JozAJEdrTj)`b@JDUp`FZ}uSQFFy4Q~6M z{tfh7wOnxgp}yJJDV;ir(jBszHxWxe`8=T1*xM(PH>wQClNuF<=dyQWfGju@+_Wh} zEA?v?$R1NcUy@b|s2u6JFBgDnd=-GU{|v~%Fm#b7Q&CfuWn6H~CP~|F$;VTnQK+0Y z=?Dr>Vt?KDbL}EfS}}n93Ej)taeQQavX)QG$x#)oDrd+^w8gIetMDxR9Sp|Ww~$Hi zpHVMRj{x;+C?u$74o>YS>W4V>a8Un(Q;!^+x}2!nIQ1}4-@>Uspmlrlgpv#;>IIy- z0MxaddduL{e;yBNdZLVS4+ZrEQSQO1A0p~Q&=#nNfcjJ9g<76HICUFQKg+4}LA`@h z4;h?#0a1USQ|HBtwC{52jnr5t*77`1_cfsyxgfoflQs`ddUza2otg_opwEfhP~y7> zC;dH<($@Y)mQHUU&O0Br@ms`e!y%w|3mZpvXkStOt+0FT5QV}N>ZMSCLhn%MItu-f zLRV4f_Y|5-q3sl^q|l=jnns~tQK*Pxu@w3yh42h*mo~-+|%_fNDU9N64j|9ecgf~zaLmb{LexMB>hA>{HVX*2#+-B{JQz+sA5`%$k zBaP;Gd*4tiRl(zcmPJ+3`cKtj?|+qE5ID1wVSTNl@0 zkT;Ly{cWs}_Ze)PJ2dm+xZL->;p5OvU!F^HoOYLJexWK)b< zdb0L8itI60Sv#L3jU?OJsWI_D)wP})(>kFFlToHU*K5y{3X*M`L1yhdFPDI83z4Sn zGn;;kb|2__>5&y)$g+IyL_RP^i?#D{9)elk!P13Uw%I&*Sxx4N9oA;*25+89E%|Kk z^I+I^`VjP%aofg`SLl4o0TuPqyN7M#5d&T9pfN5rDhBUy7?h+V^d=thU#| zCQgJL3w6bU{dJfSwm@Vewdz-WXJ&OHy`0l|IU!|t zmXxJj$`&SNA(OJp-SI3IJ)c;=u?`Oo(B0ZmP&~nLZRCC;g5t4Y>UR``X}80)H&UUt zqVG|8YQX79Rfc?$v>-L*^LeSf&lrcAXaq-JrSmDLupVn-C&snj$7pba^~q|@C{n9D zP_|Yt1fNEYk&L&ylP=b2J6EF2axz6?u|Jm`Wy+R$$;A#Vp|%%be2IS|C+8O&zWpI& zUxwR0euz2T_m9XH!{5VL+Y|LJyd$QVR5=aP zSZHiw-l*r`VWBaE=7}w+Aw7P&ANH~BLkx+1Rkg`DUi#-Zaw#5lIxSTm;1U|RguA$e zXW*qJTuTy;(4d;CoX5$IVt{(Q5E*E{{hKhn{9oIsaClnp>WQD?!4Z*;XIiq>xY|$I zcsi%xaJx|A1K_W9e5Ksr-u^+Z{f)kH+A)Y^%K9;qaAcwY$3m1V<|Xqkqs0oT%JHd! zQPas){T8+X+JB&Q(Dsh_OBnZ{%Y2o_2`DL5PV+HlpJe_T5k&;4@)4{KV-?f^`neT( zZtFwsde4ETJ;?y3l(sFs&*P~IIA^7W(F6)jU(UJaq`5vsx9s!MfXPR7Yv&zQcrj7v z^I3Yl=)0k>EJLhZqZ4Ki$2C-WF&_1m^yT#BgQKq`iw*CTTx=pbLB34$(QxQQ40G`@ zVjzoON*!wtX;_SVnJfp+KN$ZQ6&wGQ=RnTKzVzt|>x4}CkRQfBqK9+^e%Ahi&L=Hi zgy$tcy8(8Yto%=SybB~Zwv&>ur47TVZbXAFnW3osnlsE7H(>{C9W$75uOY$>k`)(ZGC z1m+h11<$u{t9A?4AQbakG`)kZmdTEKAy$hbsqcrnzV-bEuiwt)tbVV-8?1hJ6aV?h z&$;dHLF;!)cKw_q^!j}xUB9use%x|Lg)!Jt{qmqgt?c8$>o=LI!lrSmALWbs{Sf;E zy?)oQbu!g&7?tl{Bn?tOalUMsTm9U)h{9Pj6)TeTc{99exGaAWq*@d87<}lFiaWIc zlT7Ufddc+(79H$#S(hD;72DSLP0SeJ2A3KK!?W1FH^bOmU07()bG)`5cX9zbd;T#D zY1YI%jC#!SaV!i^C;(D<&q4`^tlc2z;t3A6G`o>u51xluXp+|-p}k5KhdAvJdb(5H ziI=wV(b_#^N;EjHgDkh>Eo(x-7~odl!eqCpw)ii#>fhYk`*Yl>xfb~`q|kMROlQ(U zSQKE?cfWkh?RdsxoxLw}9?k98g=N;e?6GDW%I0>wV7ve={Iyxm7w0B9XZ^{CqX)Woj&sVQ4XD>CM1(hQ2#?QtARpC>y+=gDwf zt!=$B2`eou1T`z;e!o`zd+#F{ne5$J8Qu2%zF&CM_c6iuZluR7w9<7v-!=*=-2hGY z^gct!}} zY)Fr1Y`kDJ9nUPwnjW4gg=U;j9)R3T?&q@HD<7wsBQ>f8qgsvn0gmuo-itZ04UGZI z_$3ApYv(sH@6%qo4xLBe@V`lo%FL*JAB01RUDViYkwXv6T|M^am|>DN;z=8;2+sM@ z$vepEU(!V<^oZk9W_N8p*%oyNa>6-2?cdR|X@k0KIPxKbnh6e%<9%fu-ZRvw(@=dh zA|8ag(1y)DeOqn`Q_uXV^FNI{{h!shl`0 zt4r{BJHA1|A;=f4ckQQIre^&Po}`VWVT5L-B|UvP_BU$O>EFf^NM>)RU>Ifgc2ql^ zs6fI^U}Oi(;M=^g%OQC=mQbwF=u+Laj=w8o^zjdCmt%-X+n=@b3D^#^!56h8w15MW zy_*oFO~$?ov+5@FDyGXWwn7{A3dU$TX{QZgA*zba#$xRqY?gZ4k)CA?q^{nNh==xM zpF!C;t#}-sb;J`>QBUWSJ76-rIeU9R=X~seQ})uIsy|?`ozQ z+A7g$j?5f+B~367qr}h$68Ry=f%^ltxWxI!pD`*iNA{i4mxpyH8WGm6#4UZ^kQRkQ*+C>a!8x~}V+%c?ldLL@B8TsK2=^*$=SFxk zf6GhBcmuZR@SQA8nI|W|xr|H~Pl$1B6Hj^)d)BS8)7v74^2K=NK^Z4@WyjI@0m|Sh zcH{!f*?UMOe*@8F<>gq);@ky2qr=}xQl@&6OABiu(24k&g-cc(#Fv7bo^tN z{W(uEUjkF#xpr_Z@12O{d97oIwR1Y?+}JeQVMi}3-<+Sk{>Aa`WVK$Y*7u!~?&K1v1G^exDbAk8+-zoQL8+x9$l$M!@TzM$YfiV1=A<+%o=g--9xq*ujeK#k+E< zh{v%v{f!0`^n**DSt#>L+8 zeHD<>H*KIOynZY~{_QkI=KmL48q-wVkKtRRqf<{%HtJsXfE0$fg6dX@y0ufG5w^2~ z63Rui`6oNQEWZ~*;!b*IXeHJiY)`$F@N1CQ_gn4X@`YIN$A5xsz3FTprffGLz3(2O zp7>-uW4fFXI6O(b2g5^6T2Z7%tplyBK98ldlNgFYx09_m9dc3z-Pjfo&0pqndN~vKYSLbv(68v&kIjNJR7Le z=Y>-+LA=gcN*?;zL=cdo5FCXd<@_89mdRkidEqJ46Q4jjRMbfl2Amf*tZ&8o2&&ebgr^g4JHo|zS z(nI0TwYkWjzj$*=?p-b{j`=9JyocT+t;e62Ld~|br#mkn!7+Tdb@nszc50E@f!wM35ajPyIRmVm+mA4T-dU*dj?BH;gn-1e2 z)jkDTt>rist+x|=G$z+3n+s}-Kbx1DezAM|S2?wgk7etegIFcHi{DYsCT2B`mDt;O zB0?e_fe460%*&CA7X~ zd&Xv3a?r_ewq5AGm7Wuz7MU1Y+Ty|6I7~s*4}G-SU=Pskd$dPr*$aK9mru(;9Q$CM&^>va>Y!uYt77hlbHVA`43U?Za?csz?$ z8odEuBi=M;3?7C^hc5INC016r0nY}Hp(Lc>tfLvdh_b)Y>w@VcUvxiw#gOP;_c6^p z^}EI^-{6ymYd7XNcAy8#R~VH$sqDhTxDU14@fL-aaP*l4_7zy$(z`BLE`pBQ-SIuW zA7OBk`ipEZmHwjh*?l6rU&!v0+5IARpTh1X>|V<5W$Zqk-Dj|S1-s8=_gUn_Yc@z zWA}sX{&#ji%CJeiaKPM)+q=M!|ghx1OuOz})!;k$X#t9jQl0iYQv;c>-j z1!dCN#n5fPWt4QKBwbms*&@+dB3%7HT%TjXA=XPJ>C)kQES8#rXDt@bw;=H?Oa8{3 zaqn5M(EnBr1;3Ybp(H(-Gva}v(yp8fBR8%zE-M@!w7k#CHWUbPI{JW9GFhhn~yGX^WRpXLlrjh2q)T=SjL(r39d5Pm4{ zQ)un}A>%>w$dEg+j)A-dFIn<`I~@F{KRR4`GbjJo!=-m~=$L6|-WJQgVN&~$A#V?p zemo@q&0*5dhKxmc`_TN?heTN_P`6H2r zAnQ@o9i*nPuRaik1@i#RF&qu93WgBV5Nx9E67fX?Aq6;szU5m*QL*^S=5T!+$+3Vh z>Td)yiWS7)sbyqJBWTbvO1!5Ay34cUjnN=7*)lC&}&Y*K=#gXnkBct{CCMfgjyA_dhVF}8H3Qc-{2NE0;cYl@>- z4dJM5aF#WY7mk)RM8nPANNWi!wzY-+dNP_s@$y8kNW_GpNK7Dx`LL}C^ zAm3mJ+zoM*B4d1zSjVQ95|p#3WO~e4P>LH^Qb}k}37=GVEXmt|KB^32P$y1owzZAu z73kkhn4?#rl)Ox~@X8y}Qque-9uL$Jqp5do5R+kCu?1*`rz+L8E?`vNYH&-sh@yqI z80ebn(b?AoGP*ZtbG9!QaIwE!4y&EQI(pGUOxwX&U?v+MBV^7tu4vs#(F9q#A0T4T z+MYN~XI2O&cDGF0lF;f6_BZQG1)dHcD|xysFE7itoG0a61^+6GWU*}QkR)3b<|ZV1;Pk&Y zPk065>F>FhR!dUBT1o1{l50Ht$TgT_;aB1HOGS$$bvNMKKKLublJqA0T=0&R#!4$# zDL{v)11LaYu9UMPSIXM}g3Y;-rG31VQ+I|m%yy2HyKkaY&~>4dTR24;YMX*QE|G?m zVi{63Rm#~gRm$t0Dp^+8rTn@w$>Ou*QXNp$oSZQu@(5prEO8aKQ~k+DcFd=;5Y7`m zcKHnBsRZ-!xQzLD63To`;mpS}f%$xyP}q)zH^Y2!TG)>5B*WO+F(0cE=HtN(^YI{u z`FNbcd@RYBkEgKA#}NzWj}!iQ;p4RnuEKV_)nPu~i7}trl;Y13K3*v^JVE$2;h!h` z^M!wb@FxntNcdRR;VNvOB>c(3$C43OVf#hGzgYNa5?qDtmk1vdCBtYU=98^ax?T8X z!k;F5vO!8O7yb<4Un=|x;X8ysQ}~w&f0pnog@3v5`6i*Ty-HxG@MjCZTKF}>cL{%v z@aGC&7QS2f^Mrq;@TqT5`D=xbojt<~gkL9oZit2L3k6;z{KdjwBK)PozgqaqgpaO; ztFWD~<_p`e6Zm@Jqs!qcY{#2j=JS0+VfzY!eZpTUe82GPg&z<;x-723_Eo}f6n;?n zHwu5X@SB9+Ec}r0!@`dU|0dx_g&z~XBK)}UF|6P!Y{xLd{JI^?m-aD&suTZ~yM_1c zWr9u^S)_ulO3>K^ z9Uo6lq^l5g#s7e=MbK3VI^LhM<6m)!CJdv(T(A_KOPNu(2g04r<@p1D+d9tAUk)S)7o;M4+O^hx*4`u|4 zcFRPVAyWgU222f@8Zb3rYQWThsR2_1rUpz6m>MuOU~0hBfT;mf1EvN{4VW4*HDGGM z)PSi0Qv;?3ObwVCFg0Lmz|?@L0aF8}222f@8Zb3rYQWThsR2_1rUpz6m>MuOU~0hB zfT;mf1EvN{4VW4*HDGGM)PSjhe}xA4*RB6pQfVvE@UPXgsrNlis8EV@lfQy)Uwi5K z^;8p?P13(Fj<65Q`Pb0d0`S{r?9%zN(MuO zU~0hBfT;mf1EvN{4VW4*HDGGM)PSi0Qv;?3ObwVCFg0Lmz|?@L0aF8}222f@8Zb3r zYQWThsR2_1rUpz6m>MuOU~0hBfT;mf1EvN{4VW4*HDGGM)PSi0Qv;?3{>hT_uS zF_-Z%ehd8UkLLC7wdwo%?63dX_x6SOYuW8c5Et>Ki!{HCGA8&{2 zf_njOKiqM+b1l;Gd2sb`>);-S+YPrL?%W*d_~mfd!>xnsf_oBfAKd6%>G({z5Zpa* zzk&M`+%dRw^Q7Z4Tm9j!%S}4R<|U3*7hM z9*27yZpcui!_9_U5BE6S$8d!O((x*|dbm5_{ta#q++ny2he^jha1pq>;eH19G~9kT zX*lG;&4UZU-3Rw7+{h8A7u+(qJK>&$lSWF%r@%G9-3iwP_cYvlBc+_2QIh3+xPK>q zv}9Qdw;t}7aN20;t3s>v)l9esxO?HY!TlA^Rw!8(!>xh44{lqb^wmdjM-eU7K`_C3*s`7Jne31jC_PpW<%}#Ps+%0l+i}N1J^tOJbQDToVYL zjBkP8ACE=@^*ZaqKuifoS@nq*rRDn4(-Dy=Dp6eRi$b`bOlVFf_ZGj`84W6p%>gCo z2X&a&gQ68^du3b+42Z1>M4|z|kLpK~&>T!K=-mqj5W1Tq;ixh&DpVh684yz&Xbwk7 zYng24`ZDa_nz!ts?&Own_qC>0qRiRCr(I_v9e!}W1aAmU(YJdW&;(S}eUXbks7icbl6 z$rfuU#S@GvUS}i{2(gxBP-MWuy7ha)p;caIj6B9hP)~#UytTfjrm&xq2rUd`AS+x= zpuy`71zBNu(WnS|@k9*r&DhX_Mc!B_f{vy%kW|qbt0C6_=LAE(CUnFobZggg__|4f;_GGO;Y7CymM2b6N!T=xkou7h|1E5X=v=8Vj^A=x?kJH%ENY z!0d21yB_E)hC+y#OqPoqqXA!iy7j2Mi=#e&V7AY{+A9ZmPh1?2G^GuMgF2)$VwR*g z8TnqQJ=hTBU7AIp+3MN&wImeu)0pOrEm=ILg4p#KQU2_xngG#e$52aTI=9X`=+LPr zy2(OA84?obby4KP00y2Ih9;tjcZLlPSQPsaXP>(ElBB|694zG?!7{>T7IsrJnb9_y) z09rFn9g-sEheHGK;%GdBmxR^^L-puJv3N5y>%DgVm5Y>VUa!5Z)azY?St%U#uEZ4O z^@p3A!y&h4rQPMCG^H^bUh8cNgjOky-T)0~t~LL2{%~CJh8w(5Uuac8@GnyA_NMTv zpr1>vE_2mgiSCY)A*C!k!x}qhS)@47dd=vaN`RGNu^Y5hiHcx2#)^MwgD=<=j|N;M zXt5iTN>}>oy$UoLL{}AYc2*9aZ#uH9KHTgJhK#Y<16kGsH7bo_>znGBogcGv>8Hvu72CG_Hyb*M(Fb$_l zt9MN)k;D`lU;K@$z0_6%V^+m|Q8ZI?pxGb6umQgrI>jQ{D~-sLHs6IsbPnbz4EtVx zB+?LUu5XG+`Ce(GB~RMdj?`R-Kd#h;F~-MS3x#pa;O6ZP&R=BrI+zlDFn4#*g=^6a zuXI7R)LbJaAY(Jh$RTrERfbz@p45eyJgFxge;dW$3({^Db?tmdbzR*YHx(okl8U{N z))sFwo2b0On3ow=ro?-|zc-Of!V7V2&Mz#iE46jyZyw)1vd7XhwA;GRvd?m0cwtXY zcP^5;au3kNvv67@m1nzF& z^!tI&wX{p6O&lK+=~V)kj^gq*H1qU@0@pzwbm{e7A+TQG%>w@b^dx_uz*mpv^4f3W z{I>`kiE{if;ao}D7w7mHfi;2O6!@UPeF95sIQ>W~=RZ~83k0?aJVW4Of!zXE3EU!Z zOyGM2ZWZ{)0(T1hn85c6+#~Rh1b$!OO#&YzjLqwQf%EVaWP}UW^78BH;{`rlq)!oe zvcMHBoPMUj>jm}*{HDNO!Ug!HwGK{??UXcn7{c8g-y?8D;7tN|3H%p_l{+=X#Phe>W&+kEj3j}^b;6j045qP}7p9pLd_^7~>1s;km z9_3dm@c9B)2z;r)RRVhimId|+Tqkg=z{>>ww!r-N+$8Cz0ym2E-2z7heplcYflob^ zm#3M8x4>Ttyj$SWr}6Uj z2z-ISdj(!D@IHa>7g!VccLE;}_(g$_2uy<~9-XAG50PG_B`La6_&|dUJY* zpU8-{1UFk1AxTn@J~0bZ`b0RGDZ2>IH>4APX%$cZmf+X%$@sI=iN8wlKP>old@}y* zbmFfQ{J$6cIzAbHb~^E|5d50p*RjsOIfvJe@cb-#!o;u7SJO}D`qMF`52nBDbmG_N zw_60ij!(v)olg9v)x3O93w|Bz{B=B;wKtVNJDvFT`BM}8I@b9wGVtr^x#J}1LX5Ba zcso{LeY_?9ud~ldpwG6~X~1NkZyD@!v#8(QNGJQ$v0gr#p?ultRKFgdooi3d}JDvFT^~PbruVbBG!Xwpe{_J$dFV-jH#`F5~ z-#O*^k$v@W_H_Q)>8$)>Ju^@6^WO)};?Ly*(&ZQQgo$5Y|J*J3b*z`ad+0#^>~!MS z*Gszvzm9eOeVjdA{_J$(*Vk9W&*1gf@yYnJ(}`bSkIfhSI@bB|IGA08=V#FqCVqYW zwodTtn9>L9-`VNJudnyMC-`-&^Vd-T7kQL_b~^Fv>%->-zmAEX^6#bqF7k*!JDvFT z_2kEbU&lm0SoyQliC6Cw_gsdX3=MvCdygg~vsh%AcK1{QJTz zSyBYQj&=Sn3g9A-__NcAUtbTGoyE(qW1^?}AAqAv=g&?jetrGCRPgIq=f8;pS^Rps z3i=DtxAgV)j|A4&+m8yYueWy!tgp8}6j)zx9~W3(Z;v^f%h%W269v}S+lvI&*W0TE z*4Nv&3aqcUzay}|-hN16eZ9R+V12#)2f}%H|Ff3Y{~dw#_4KC#>+9*vmq^b&z1-8{dW1lHHn4-2fXr}qe~ucyBdSYJUuub6i1)ePM0f9>e9x;LEUm>tf;3|R31(pS# zCvcs>D+OLA@T~%`5cnGcHwye~fg=JxFK~;%#{_N{_!KCM^s`>zVu3dZJXhe20{aB+ z68L6;Hwk>7z?%jB4}rG`{Jg;30`C`ix4@qZ+#~SF^LY983Oq;PeF7&0)&%}Hfe#4$ zsK7@AwnzilFSJasB2r&pV7bk(zFu4*u)e-+6WCUT0IvJsh+kiSJ}Pik6#}?kg|p(B z#@?qHl>QD6rx@^g23%&q)dqZ(0s9U3W&_?}z`r!$#|-!d1Ag0pKQ-W-+=2BQXTTR5aFqcs zGT?dxUSq&_8}RoH_#p${VZd)0@W%#x#DMcL7obVg_IjEDpJ%{R4ft{ct~2132E4|A z69)Vp1Af$idknbOfWI{0Q?X9Wu3wP>PdDJ%27J8%hYa{m1O9;lKV-mL40xvj<9Ne> z^8Uqu4;gSi_95Bz8*jiD8E};WFE`*;1Kwc3KQiF|GT@gD_>ch)!@eWCycZhq3!2=*9f%lp*i!`cOj>GX3A_)-Iw4S1CS-@N%N^25=W0rJ-x=)YyaKQZ9n8Sw87_;myR z(15?NJlq*@XTqHYcQ)KPa5&B+O@On( zod-wr^96A9%%TWxI$Sy247f|-D&QP&GvQ{zT@H5zTos%XZZ;gv;WcnBxH)h#oEr|u zrlc$3JaDyewC`B}M|;Vu;1-U+9-uZ>O@ z7~WpuHSM795dWDMnJ1G~Enc7xnh{>S^B4M=_m`QF{_1^D&Kl17_3t`SWAo^n@yMKc z^vyi_W*&VLXDriar_7^o0UYr%kG^5bV220Iqi<1t{xy%jv9rMZblm?-N8e~kCr)bG zandpCq$ZtbGahHl9N5f0peWXNClS*yP)q|3#bv}~pMDf#_=!u;NoY=Xh7(Irk>s&g zc-fLM(uxCeUOx^YD}lO8yAekn?BGI@MjMHrTG{oNv^}e zJDgx=Cz)5`q#<$*1{alj8Nsx2E=q}b>-kw)uOodd&s)vU@ePzY&7Nin1=bEmZnv}Z zrK0?LMHZFuET`iPEgc4o4B)*~w%e*S zhC`@+I)72+>5i|JfYew(!I?}P2|cNdX=Pl7KODjdS4FqN1{}&p-Mlz$na-BZbNZWb z%ohhoo0`1o^SWI9uwZH*{yoW{ABUs`=^D`6lr~QIt{`U2?Vsx&C$;OW!=lE{HNzK+ z1y_aKbvQUmeI^j{;VTMnu))2|=Zi(l&=@%C33IYv>TL)|*WxIvHwXo=!={V9E>}4Y z`^E#YsC@>}g+noRcsmFotS{Tkb!x8?^#zrfd(ov{mm?Zzz*+o|KY)(OCAikOrqhXL z7t-;cn?B5xRJ+g@jAeQgwPL3kFwF|N&Rp&KZMTXrl;drFXEBu$_Y^c zd}2fb(uLJK43AD179Vr5^V%6Tv`TZUtHnucuR_)`{e+lHPlzc$A!f!2F^&^r?9&I7 z61IiqHD$`9MLY5%$xXW#z6X z3=o;69ek4j`_PN9G9GMFFsK={DXc+H&8m?YDwsuyVOumSkMfq0e4YVyy=o7JO?>v0 zSRJ`G-sY9ZtRIjMMK!->&zvS!saE*_T=@wS+#|$c%ki3HFmE zlu1*U#89TGQG7?F_(J$MwHlvuP35brsr;*rsZ#p8i4shi|A2>dSw9t@T}4Woeg1H) zf-gmA>2t|6d#NrvBRWi_9Q5Jk*FN_A+6SiMvFG4o^$pvgdH%D{ zbWgH!|KGR)`0q@Jer_WFdb`jC!zu=Q4sY&Enj&6vXL6Fy)=yj;i{0k`rJcz?zvDjf zJ2-P|gE`II+F)xkU>0AUeip!^|37YR%#BRe`uktLkzq@AzH=Mo6H~To6H8@tqeH*K fLCe^zrM$V($$I8@f_BV}4t}eG?|{B$qw{|NEmkxJ literal 0 HcmV?d00001 diff --git a/fimdlp/mdlp.py b/fimdlp/mdlp.py new file mode 100644 index 0000000..34584d0 --- /dev/null +++ b/fimdlp/mdlp.py @@ -0,0 +1,103 @@ +import numpy as np +from .cppfimdlp import CFImdlp +from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.utils.multiclass import unique_labels +from sklearn.utils.validation import check_X_y, check_array, check_is_fitted + + +class FImdlp(TransformerMixin, BaseEstimator): + """Fayyad - Irani MDLP discretization algorithm. + + Parameters + ---------- + demo_param : str, default='demo' + A parameter used for demonstation of how to pass and store paramters. + + Attributes + ---------- + n_features_ : int + The number of features of the data passed to :meth:`fit`. + """ + + def __init__(self): + pass + + def _check_params_fit(self, X, y, expected_args, kwargs): + """Check the common parameters passed to fit""" + # Check that X and y have correct shape + X, y = check_X_y(X, y) + # Store the classes seen during fit + self.classes_ = unique_labels(y) + self.n_classes_ = self.classes_.shape[0] + # Default values + self.class_name_ = "class" + self.features_ = [f"feature_{i}" for i in range(X.shape[1])] + for key, value in kwargs.items(): + if key in expected_args: + setattr(self, f"{key}_", value) + else: + raise ValueError(f"Unexpected argument: {key}") + if len(self.features_) != X.shape[1]: + raise ValueError( + "Number of features does not match the number of columns in X" + ) + return X, y + + def fit(self, X, y, **kwargs): + """A reference implementation of a fitting function for a transformer. + Parameters + ---------- + X : {array-like, sparse matrix}, shape (n_samples, n_features) + The training input samples. + y : None + There is no need of a target in a transformer, yet the pipeline API + requires this parameter. + Returns + ------- + self : object + Returns self. + """ + X, y = self._check_params_fit( + X, y, expected_args=["class_name", "features"], kwargs=kwargs + ) + + self.n_features_ = X.shape[1] + self.X_ = X + self.y_ = y + self.discretizer_ = CFImdlp() + + return self + + def transform(self, X): + """Discretize X values. + Parameters + ---------- + X : {array-like}, shape (n_samples, n_features) + The input samples. + Returns + ------- + X_transformed : array, shape (n_samples, n_features) + The array containing the discretized values of ``X``. + """ + # Check is fit had been called + check_is_fitted(self, "n_features_") + + # Input validation + X = check_array(X) + if (X != self.X_).any(): + raise ValueError( + "X values are not the same as the ones used to fit the model." + ) + + # Check that the input is of the same shape as the one passed + # during fit. + if X.shape[1] != self.n_features_: + raise ValueError( + "Shape of input is different from what was seen" "in `fit`" + ) + print("Cut points for each feature in Iris dataset:") + for i in range(0, self.n_features_): + data = np.sort(X[:, i]) + Xcutpoints = self.discretizer_.cut_points(data, self.y_) + print(f"{self.features_[i]:20s}: {Xcutpoints}") + return X diff --git a/pyproject.toml b/pyproject.toml index a706dee..6617025 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,3 +37,21 @@ classifiers = [ [project.urls] Home = "https://github.com/doctorado-ml/FImdlp" + +[tool.black] +line-length = 79 +target_version = ['py38', 'py39', 'py310'] +include = '\.pyi?$' +exclude = ''' +/( + \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist +)/ +''' diff --git a/sample.py b/sample.py index 43df59e..85f43d8 100644 --- a/sample.py +++ b/sample.py @@ -1,14 +1,9 @@ -import numpy as np from sklearn.datasets import load_iris -from fimdlp import CFImdlp +from fimdlp.mdlp import FImdlp data = load_iris() X = data.data y = data.target features = data.feature_names -test = CFImdlp() -print("Cut points for each feature in Iris dataset:") -for i in range(0, X.shape[1]): - data = np.sort(X[:, i]) - Xcutpoints = test.cut_points(data, y) - print(f"{features[i]:20s}: {Xcutpoints}") +test = FImdlp() +Xcutpoints = test.fit(X, y, features=features).transform(X) diff --git a/setup.py b/setup.py index 66d5539..6e9a25d 100644 --- a/setup.py +++ b/setup.py @@ -9,24 +9,10 @@ from setuptools import Extension, setup setup( ext_modules=[ Extension( - name="fimdlp", - sources=["fimdlp/cfimdlp.pyx", "fimdlp/FImdlp.cpp"], + name="cppfimdlp", + sources=["fimdlp/cfimdlp.pyx", "fimdlp/CPPFImdlp.cpp"], language="c++", include_dirs=["fimdlp"], ), ] ) - -# from Cython.Build import cythonize -# setup( -# ext_modules=cythonize( -# Extension( -# "fimdlp", -# sources=["fimdlp/cfimdlp.pyx", "fimdlp/FImdlp.cpp"], -# language="c++", -# include_dirs=["fimdlp"], -# ), -# include_path=["./fimdlp"], -# ) -# ) -