
    Гh0                        S r SSKJrJr  SSKJrJrJrJrJ	r	J
r
JrJrJr  SSKrSSKrSSKJr  SSKJr  SSKJr  SS	KJrJrJr  SS
KJr  SSKJr  S\R@                  S\RB                  4S jr"\" SS5      r#\#" SSSSSS5      r$S\
\\RB                        S\
\RB                     4S jr%S\\RL                     S\\RL                  \'\(/S4   SS4S jr) " S S\5      r*S\RL                  S\4S  jr+S!\\'\	\RB                     4   S"\
\,   S#\\'\4   S$\
\   S%\\'\4   S\4S& jr-S\\RL                     S'\
\\'      S"\
\,   S(\(S)\\'\4   S*\(S+\(S\\\
\   4   4S, jr.  S1S-\S!\S.\
\   S/\(S\RB                  4
S0 jjr/g)2z*Utilities for processing spark partitions.    )defaultdict
namedtuple)	AnyCallableDictIteratorListOptionalSequenceTupleUnionN)
csr_matrix   )	ArrayLikeconcat)DataIterDMatrixQuantileDMatrix)XGBModel   )
get_loggerseriesreturnc                 P    U R                  SS9n[        R                  " U5      nU$ )zStack a series of arrays.F)copy)to_numpynpstack)r   arrays     eC:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\venv\Lib\site-packages\xgboost/spark/data.pystack_seriesr"      s$    OOO'EHHUOEL    Alias)datalabelweightmarginvalidqidvaluesr&   r'   
baseMarginvalidationIndicatorr*   seqc                 (    U (       a  [        U 5      $ g)z&Concatenate the data if it's not None.Nr   )r.   s    r!   concat_or_noner0      s    
c{r#   iteratorappendc                   ^ S[         R                  S[        SS4U4S jjnSnU  H  nUc  [        R                  UR
                  ;   nUSL a   [        R                  UR
                  ;   d   eU(       aJ  UR                  U[        R                     ) SS24   nUR                  U[        R                     SS24   nOUSpeU" US5        Uc  M  U" US5        M     g)zjExtract partitions from pyspark iterator. `append` is a user defined function for
accepting new partition.partis_validr   Nc                    > T" U [         R                  U5        T" U [         R                  U5        T" U [         R                  U5        T" U [         R                  U5        T" U [         R
                  U5        g )N)aliasr%   r&   r'   r(   r*   )r4   r5   r2   s     r!   	make_blob#cache_partitions.<locals>.make_blob+   sS    tUZZ*tU[[(+tU\\8,tU\\8,tUYY)r#   TF)pd	DataFrameboolr7   r)   columnsloc)r1   r2   r8   has_validationr4   trainr)   s    `     r!   cache_partitionsrA   %   s    * * * * &*N!"[[DLL8NT!;;$,,...HHd5;;//23EHHT%++.12E5%eT" r#   c                      ^  \ rS rSrSrS\\\4   S\\	   S\
SS4U 4S jjrS\\\R                        S\\R                     4S	 jrS
\S\4S jrSS jrSrU =r$ )PartIterE   z7Iterator for creating Quantile DMatrix from partitions.r%   	device_idkwargsr   Nc                 R   > SU l         X l        Xl        X0l        [        TU ]  SS9  g )Nr   T)release_data)_iter
_device_id_data_kwargssuper__init__)selfr%   rE   rF   	__class__s       r!   rN   PartIter.__init__H   s+     
#
d+r#   c                     U(       d  g U R                   bT  SS KnSS KnUR                  R                  R                  U R                   5        UR                  XR                     5      $ XR                     $ Nr   )rJ   cudfcupycudaruntime	setDevicer;   rI   )rO   r%   rT   cps       r!   _fetchPartIter._fetchR   sW    ??& GGOO%%doo6>>$zz"233JJr#   
input_datac                    U R                   [        U R                  [        R                     5      :X  a  gU" SU R                  U R                  [        R                     5      U R                  U R                  R                  [        R                  S 5      5      U R                  U R                  R                  [        R                  S 5      5      U R                  U R                  R                  [        R                  S 5      5      U R                  U R                  R                  [        R                  S 5      5      S.U R                  D6  U =R                   S-  sl         g)NFr%   r&   r'   base_marginr*   r   T )rI   lenrK   r7   r%   rZ   getr&   r'   r(   r*   rL   )rO   r\   s     r!   nextPartIter.nexta   s    ::TZZ

344 	
TZZ

34++djjnnU[[$?@;;tzz~~ellDABDJJNN5<<$FGDJJNN599d;<	
 ll	
 	

a
r#   c                     SU l         g rS   )rI   )rO   s    r!   resetPartIter.reseto   s	    
r#   )rK   rJ   rI   rL   )r   N)__name__
__module____qualname____firstlineno____doc__r   strr	   r
   intr   rN   r   r:   r;   rZ   r   r<   rc   rf   __static_attributes____classcell__)rP   s   @r!   rC   rC   E   s~    A,dO,08,IL,	, 8HR\\$:;  @V  x D  r#   rC   r4   c                 r   / S// p2nSn[        U R                  U R                  U R                  U R                  5       H  u  pVpxUS:X  a  [        U5      n	Un
UnO0[        U5      n	[        R                  " U	[        R                  S9n
UnUS:X  a  U	nXI:X  d   eUR                  U
5        UR                  US   [        U
5      -   5        UR                  U5        M     [        R                  " U5      n[        R                  " U5      n[        R                  " U5      n[        XU4[        U 5      U4S9$ )Nr   )dtype)shape)zipfeatureVectorTypefeatureVectorSizefeatureVectorIndicesfeatureVectorValuesrn   ra   r   arangeint32r2   r    concatenater   )r4   csr_indices_listcsr_indptr_listcsr_values_list
n_featuresvec_type	vec_size_vec_indices
vec_valuesvec_sizecsr_indices
csr_valuescsr_indptr_arrcsr_indices_arrcsr_values_arrs                  r!   )_read_csr_matrix_from_unwrapped_spark_vecr   s   s+   9;aS"J8;!!  	94[ q=9~H%K#J :H))HBHH=K#J?!J%%%,r2S5EEFz*598 XXo.Nnn%56O^^O4N	.9#d)ZAX r#   r%   dev_ordinalmetarefparamsc                     U (       d  [        [        R                  " S5      US9$ [        X40 UD6n[        U40 UDSU0D6nU$ )z+Handle empty partition for QuantileDMatrix.r   r   )r   r   )r   r   emptyrC   )r%   r   r   r   r   itms          r!   make_qdmr      sE     rxx/S99	$	,t	,B.f.#.AHr#   feature_colsuse_qdmrF   enable_sparse_data_optimhas_validation_colc           	        ^^^^^ [        [        5      m[        [        5      mSmS[        R                  S[        S[
        SS4UUUU4S jjnS[        R                  S[        S[
        SS4UUU4S jjnS	[        [        [        [        R                     4   S
[        [        [        4   S[        4S jn	U(       a  Un
ST;   a	  TS   S:X  d   eOUn
S[        [        [        [        4   [        [        [        [        [        [
        4   4   4   4U4S jjnU" 5       u  pTb!  U(       a  [!        X
5        [#        TX,SU5      nOWTb  U(       d  [!        X
5        U	" TT5      nO8Tc!  U(       a  [!        X
5        [#        TX,SU5      nO[!        X
5        U	" TT5      nU(       a(  U(       a  [#        TX,X5      nOU(       a	  U	" TT5      OSnOSnUb$  UR%                  5       UR%                  5       :X  d   eX4$ )a.  Create DMatrix from spark data partitions.

Parameters
----------
iterator :
    Pyspark partition iterator.
feature_cols:
    A sequence of feature names, used only when rapids plugin is enabled.
dev_ordinal:
    Device ordinal, used when GPU is enabled.
use_qdm :
    Whether QuantileDMatrix should be used instead of DMatrix.
kwargs :
    Metainfo for DMatrix.
enable_sparse_data_optim :
    Whether sparse data should be unwrapped
has_validation:
    Whether there's validation data.

Returns
-------
Training DMatrix and an optional validation DMatrix.
r   r4   namer5   r   Nc                   > U[         R                  :X  d  XR                  ;   a  U[         R                  :X  a  Tb  U T   R                  S   S:  a  U T   nO;X   R                  S   S:  a$  X   nU[         R                  :X  a  [	        U5      nOS nU[         R                  :X  a-  Ub*  TS:X  a  UR                  S   mTUR                  S   :X  d   eUc  g U(       a  TU   R                  U5        g TU   R                  U5        g g Nr   r   )r7   r%   r=   rt   r"   r2   )r4   r   r5   r    r   r   
train_data
valid_datas       r!   append_m0create_dmatrix_from_partitions.<locals>.append_m   s    5::!5

" ,&,,Q/!3.2<.@!!!$q(
5::%(/Euzz!e&7?!&QJ!U[[^333}4 ''.4 ''.5 "6r#   c                 J  > U[         R                  :X  d  XR                  ;   a  U[         R                  :X  a6  [        U 5      nTS:X  a  UR                  S   mTUR                  S   :X  d   eOX   nU(       a  TU   R                  U5        g TU   R                  U5        g g r   )r7   r%   r=   r   rt   r2   )r4   r   r5   r    r   r   r   s       r!   append_m_sparse7create_dmatrix_from_partitions.<locals>.append_m_sparse   s     5::!5uzz!A$G?!&QJ!U[[^333
4 ''.4 ''. "6r#   r+   rF   c           	      4   [        U 5      S:X  a;  [        S5      R                  S5        [        SS[        R
                  " S5      0UD6$ [        U [        R                     5      n[        U R                  [        R                  S 5      5      n[        U R                  [        R                  S 5      5      n[        U R                  [        R                  S 5      5      n[        U R                  [        R                  S 5      5      n[        SX#XEUS.UD6$ )Nr   XGBoostPySparkz_Detected an empty partition in the training data. Consider to enable repartition_random_shuffler%   r   r^   r`   )ra   r   warningr   r   r   r0   r7   r%   rb   r&   r'   r(   r*   )r+   rF   r%   r&   r'   r(   r*   s          r!   make,create_dmatrix_from_partitions.<locals>.make  s    v;!'(00.
 ; 0;F;;fUZZ01vzz%++t<=

5<< >?

5<< >?VZZ		489 
63
RX
 	
r#   missingg        c                  f   > Sn 0 n0 nTR                  5        H  u  p4X0;   a  XAU'   M  XBU'   M     X!4$ )N)max_binr   silentnthreadenable_categorical)items)non_data_keysnon_data_paramsr   kvrF   s        r!   split_params4create_dmatrix_from_partitions.<locals>.split_params  sH    

 LLNDA!%&"Q	 #
 $$r#   )r   listr:   r;   rm   r<   r   r	   r   ndarrayr   r   r   r   rn   floatrA   r   num_col)r1   r   r   r   rF   r   r   r   r   r   	append_fnr   r   r   dtraindvalidr   r   r   s    `  `           @@@r!   create_dmatrix_from_partitionsr      s   D /:$.?J.9$.?JJ/r|| /3 /$ /4 / /</bll /# / /$ / /"
T#tBJJ//0 
$sCx. 
W 
$  #	F"vi'8C'???'?	%%S#XS%UD@P:Q5Q0R RS %*  >LDG-":{$O		!'-j&)		'-*kvF-j&) (0Kv)F 2DT*f-F~~6>>#3333>r#   modelr_   strict_shapec           
          U R                  S5      n[        UUU R                  U R                  U R                  U R
                  U R                  S9nU R                  5       R                  USSUUS9$ )z4Predict contributions with data with the full model.N)r_   r   r   feature_typesfeature_weightsr   TF)pred_contribsvalidate_featuresiteration_ranger   )	_get_iteration_ranger   r   n_jobsr   r   r   get_boosterpredict)r   r%   r_   r   r   data_dmatrixs         r!   r   r   U  s     006O))-- 33L &&'! '  r#   )NF)0rl   collectionsr   r   typingr   r   r   r   r	   r
   r   r   r   numpyr   pandasr:   scipy.sparser   _typingr   compatr   corer   r   r   sklearnr   utilsr   Seriesr   r"   r$   r7   r0   r;   rm   r<   rA   rC   r   rn   r   r   r   r`   r#   r!   <module>r      s2   0 / X X X   #   5 5   rzz  	7QRh<9NPUV"**!56 8BJJ;O #r||$#.6c47PRV7V.W#	#@+x +\(BLL (Z (V
sD$$
%# sCx. 
'		
 cN er||$e 8C=)e #	e
 e cNe #e e 7HW%%&eV (,	
 )$ 	
 ZZr#   