
    Гh,                        S r SSKrSSKJr  SSKJrJrJrJrJ	r	J
r
JrJrJrJr  SSKrSSKrSSKrSSKrSSKJr  SSKJr  SSKJr  SS	KJrJr  SS
KJ r J!r!J"r"  SSK#J$r$  \RJ                  " S5      r&\\\'\4      r(/ SQr) " S S\ 5      r*\S\RV                  S\RX                  S\
\RV                  \'4   4S j5       r-\S\RV                  SSS\
\RV                  S4   4S j5       r-S\RV                  S\	\RX                     S\
\RV                  \	\'   4   4S jr-S\'S-  S\RV                  S\RX                  S\RX                  S\	\RX                     S\	\RX                     S\
\RV                  \RX                  \RX                  \	\RX                     \	\RX                     4   4S jr.S\\   S\\'\\   4   4S jr/S\(S\\'\\   4   4S jr0S \	\(   S\14S! jr2S"\1S\Rf                  4S# jr4SS$.S%\	\   S&\	\\\\   4      S'\	\   S(\5S)\6S \	\(   S*\6S+\1S,\	\6   S-\	\!   S\"4S. jjr7S%\	\   S&\	\\\\   4      S'\	\   S(\5S)\6S+\1S \	\(   S\!4S/ jr8g)0z)Copyright 2019-2025, XGBoost contributors    N)Sequence)
AnyCallableDictListOptionalTupleTypeVarUnioncastoverload)	dataframe   )
collective)FeatureNames)concatimport_cupy)DataIterDMatrixQuantileDMatrix)
is_on_cudaz[xgboost.dask])labelweightbase_marginqidlabel_lower_boundlabel_upper_boundc                      ^  \ rS rSrSr   SS\\   S\\   S\\	\\\   4      S\\   S\\\      S	S4U 4S
 jjjr
S\S	\\   4S jrS	\4S jrSS jrS\S	\4S jrSrU =r$ )DaskPartitionIter.   z.A data iterator for the `DaskQuantileDMatrix`.Ndatafeature_namesfeature_typesfeature_weightskwargsreturnc           	      L  > [         [        S 5      4nXl        [         H;  n[	        XUR                  US 5      5        [        [        X5      U5      (       a  M;   e   X l        X0l	        X@l
        [        U R                  [         5      (       d   eSU l        [        TU ]5  SS9  g )Nr   T)release_data)r   type_datametasetattrget
isinstancegetattr_feature_names_feature_types_feature_weights_itersuper__init__)	selfr!   r"   r#   r$   r%   typesk	__class__s	           dC:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\venv\Lib\site-packages\xgboost/dask/data.pyr5   DaskPartitionIter.__init__1   s     4:&
ADVZZ401gd.6666 
 ,+ /$**h////
d+    attrc                 L    [        X5      b  [        X5      U R                     $ g N)r/   r3   )r6   r=   s     r:   _getDaskPartitionIter._getJ   s$    4*4&tzz22r<   c                 4    U R                   U R                     $ )z5Utility function for obtaining current batch of data.)r*   r3   r6   s    r:   r!   DaskPartitionIter.dataO   s    zz$**%%r<   c                     SU l         g)zReset the iteratorr   N)r3   rC   s    r:   resetDaskPartitionIter.resetS   s	    
r<   
input_datac           	      >   U R                   [        U R                  5      :X  a  g[         Vs0 s H  o"U R	                  U5      _M     nnU" SU R                  5       SU R                  U R                  U R                  S.UD6  U =R                   S-  sl         gs  snf )zYield next batch of dataFN)r!   groupr"   r#   r$      T )	r3   lenr*   r+   r@   r!   r0   r1   r2   )r6   rH   r8   r%   s       r:   nextDaskPartitionIter.nextW   s    ::TZZ(+/04aTYYq\/40 	
---- 11	
 	
 	

a
 1s   B)r*   r0   r1   r2   r3   )NNN)r&   N)__name__
__module____qualname____firstlineno____doc__r   r   r   r   r   r5   strr@   r!   rF   r   boolrN   __static_attributes____classcell__)r9   s   @r:   r   r   .   s    8
 159=)-,3i,  -,  c49n 56	,
 "#, 49%, 
, ,2 # 
&c &x D  r<   r   dfcolr&   c                     g r?   rL   rY   rZ   s     r:   _add_columnr]   j   s    ORr<   c                     g r?   rL   r\   s     r:   r]   r]   n   s    KNr<   c                     Uc  X4$ SnUR                    SU 3nX0R                  ;   a'  US-  nUR                    SU 3nX0R                  ;   a  M'  U R                  " S0 X10D6n X4$ )Nr   _rK   rL   )namecolumnsassign)rY   rZ   trailsuids       r:   r]   r]   r   sx     {wFXXJax
 C

!
!F8$ 
 
	 cZ	 B7Nr<   devicer   ysample_weightr   c                   ^ [        TU5      u  mn[        TU5      u  mn[        TU5      u  mn[        TU5      u  mn	U b  U S:X  a  SOSn
[        R                  R                  SU
05         TR	                  5       mTU   R                  S5      R                  R                  5       R                  R                  TU'   TR                  US9mTR                  U5      U   R                  5       nUR                  R                  5       R                  R                  5       n[!        U5      n[#        XS   S	-   /-   5      nTR%                  US
US9R	                  5       mSSS5        TU   nTU   nU4S jX4 5       u  pEXgX4 Vs/ s H	  oc  M  UPM     nnTR'                  US	S9R	                  5       mTX#XE4$ ! , (       d  f       Nb= fs  snf )zA function to prevent query group from being scattered to different
workers. Please see the tutorial in the document for the implication for not having
partition boundary based on query groups.

Ncpup2ptaskszdataframe.shuffle.methodcategory)byrK   F)drop	divisionsc              3   f   >#    U  H&  ob  [        [        R                  TU   5      OS v   M(     g 7fr?   )r   ddSeries).0re   rY   s     r:   	<genexpr>!no_group_split.<locals>.<genexpr>   s*      "IX#ORYY3 =s   .1)axis)r]   daskconfigsetpersistastypecatas_knowncodessort_valuesgroupbycountindexcomputevaluestolistsortedtuple	set_indexrp   )rf   rY   r   rg   rh   r   qid_uidy_uidw_uidbm_uidshufflecntdivre   uidss    `             r:   no_group_splitr      s     b#&KBB"IBB.IBR-JB ~5egG	4g>	?ZZ\k((488AACGGMM7^^w^'jj!'*002ii!((//1SkCr7Q;-'(\\  
 ')	 	 
@" W+C
5	A"JO"M $E:N:CC:DN	A		&	&	(Bs}113 
@	?. Os   ,C5F=GG=
Gr%   c                    ^ ^^ T R                  S5      nUc   e[        U5      n[        US   5      (       a  SSKJm  OSSKJm  S[        S[        [        [        4   4U 4S jjmS[        S[        R                  4UU4S jjn[        U5       Vs/ s H
  oC" U5      PM     nn[        U5      nUR                  R                  (       a  T $ [        R!                  S	["        R$                  " 5       U5        [        U5      n[        U5      (       a&  ['        5       nUR)                  UR                  5      n	O [*        R(                  " UR                  5      n	UR,                  U	SS24   n[/        US
5      (       a  UR,                  U	SS24   nOXySS24   nT R1                  SU/05        [3        UR4                  5       H#  u  pJU
T ;   d   eT R1                  XU
   /05        M%     T $ s  snf )z>Sort worker-local data by query ID for learning to rank tasks.r!   Nr   )	DataFrameir&   c           
        >^  S[         [        [              S[         [           4U 4S jjn[         Vs0 s H  o"U" TR                  US5      5      _M     nnUR                  5        VVs0 s H  u  pEUc  M
  XE_M     nnnU$ s  snf s  snnf )zDReturn a dictionary containing all the meta info and all partitions.r=   r&   c                    > U b  U T   $ g r?   rL   )r=   r   s    r:   r@   0sort_data_by_qid.<locals>.get_dict.<locals>._get   s    Awr<   N)r   r   r   listr+   r-   items)r   r@   ra   data_optr8   vr!   r%   s   `      r:   get_dict"sort_data_by_qid.<locals>.get_dict   s    	xS	* 	x~ 	
 DHH44$vzz$5664H!)!1C!1Q!1C ICs   "B+	B8Bc                 $   > T" U 5      nT" U5      $ r?   rL   )r   r!   r   r   s     r:   map_fn sort_data_by_qid.<locals>.map_fn   s    {r<   a  [r%d]: Sorting data with %d partitions for ranking. This is a costly operation and will increase the memory usage significantly. To avoid this warning, sort the data based on qid before passing it into XGBoost. Alternatively, you can use set the `allow_group_split` to False.iloc)r-   rM   r   cudfr   pandasintr   rU   r   pdranger   r   is_monotonic_increasingLOGGERwarningcollget_rankr   argsortnpr   hasattrupdate	enumeraterb   )r%   
data_partsn_partsr   r   
meta_partsdfqdfxcp
sorted_idxcr   r   s   `          @@r:   sort_data_by_qidr      s   F#J!!!*oG*Q-  "$C DdO # ",,   &+7^4^&)^J4

C
ww&&
NN	T 	" 
C#]ZZ(
ZZ(

((:q=
!CsFhhz1}%a- 
MM6C5/"#++&F{{qq6(m$ ' MS 5s   G*list_of_partsc                   ^ ^ [        T [        5      (       d   e0 mS[        S[        SS 4U U4S jjn[	        T 5       H#  u  p#U" US5        [
         H  nU" X$5        M     M%     TR                  SS 5      nUb  [        S0 TD6mT$ )Nr   ra   r&   c                 t   > UTU    ;   a	  TU    U   nOS nUb   UT;  a  / TU'   TU   R                  U5        g g r?   )append)r   ra   partr   results      r:   r   !_get_worker_parts.<locals>.append  sR    =## #D)DD6!!t4L% r<   r!   r   rL   )r.   r   r   rU   r   r+   r-   r   )r   r   r   r`   r8   r   r   s   `     @r:   _get_worker_partsr      s    mT****#%F&# &S &T & & -(q&A1L  )
 **UD
!C
!+F+Mr<   partsc           	         U b  [        U S   R                  S5      5      nOSn[        [        R                  " [
        R                  " U/[
        R                  S9[        R                  R                  5      S   5      nU$ )Nr   r!   F)dtype)
r   r-   rV   r   	allreducer   arrayint32OpMAX)r   is_cudas     r:   _get_is_cudar     s]    U1X\\&124>>"((G9BHH"Etww{{STUVWGNr<   r   c                 z    U (       a  [        5       nUR                  S5      nU$ [        R                  " S5      nU$ )N)r   r   )r   emptyr   )r   r   r   s      r:   _make_emptyr      s6    ]  L  Lr<   )refr"   r#   r$   missingnthreadmax_binenable_categoricalmax_quantile_batchesr   c        
   
      
   [         R                  " 5       n
[        U5      nUc8  [        R	                  SU
R
                  5        [        [        U5      U UUU	UUS9$ [        S0 [        U5      DUU US.D6n[        UUUUU	UUS9$ )NzWorker %s has an empty DMatrix.)r"   r#   r   r   r   r   )r#   r"   r$   )r   r   r   r   r   r   rL   )
distributed
get_workerr   r   r   addressr   r   r   r   )r"   r#   r$   r   r   r   r   r   r   r   workerr   its                r:   _create_quantile_dmatrixr   )  s     ##%F5!G}8&..I ''1!5
 	
 
 

E
"
##'	
B 
-1 r<   c                    [         R                  " 5       nUn[        U5      n	Uc<  SUR                   S3n
[        R                  U
5        [        [        U	5      U UUS9nU$ [        S5      nS[        [        U      S[        U   4S jn[        U5      n0 nUR                  5        H  u  nnU" U5      nUUU'   M     [        S	0 UDUU UUUUS.D6nU$ )
zTGet data that local to worker from DaskDMatrix.

Returns
-------
A DMatrix object.

zWorker z has an empty DMatrix.)r"   r#   r   Tr!   r&   c                 H    [        S U  5       5      (       a  g [        U 5      $ )Nc              3   (   #    U  H  oS L v   M
     g 7fr?   rL   )ru   r   s     r:   rv   :_create_dmatrix.<locals>.concat_or_none.<locals>.<genexpr>x  s     -t|s   )anyr   )r!   s    r:   concat_or_none'_create_dmatrix.<locals>.concat_or_nonew  s     ----d|r<   )r   r"   r#   r   r   r$   rL   )r   r   r   r   r   r   r   r   r
   r   r   r   r   )r"   r#   r$   r   r   r   r   r   r   r   msgXyr   r   unzipped_dictconcated_dictkeyvaluer   s                      r:   _create_dmatrixr   U  s    " ##%FM5!G''=>s ''1	
 	AXhqk2 x{ 
 &m4M$&M#))+
U5!c , 
 


##-'
B Ir<   )9rT   loggingcollections.abcr   typingr   r   r   r   r   r	   r
   r   r   r   ry   r   numpyr   r   r   r   rs    r   r   _typingr   compatr   r   corer   r   r   r!   r   	getLoggerr   rU   
_DataPartsr+   r   r   rt   r]   r   r   r   rV   r   ndarrayr   floatr   r   r   rL   r<   r:   <module>r      s>   /  $         ! " ( 5 5 			+	,$sCx.!
9 9x 
 RBLL Rryy RU2<<;L5M R 
 R 
 NBLL Nt NbllD6H0I N 
 N
#BII.
2<<#&' 02$J02
02 
02 
yy	02
 BII&02 "))$02 LL"))RYY(;Xbii=PP02fEtCy ET#tCy.-A EPZ Dd3i4H 2,   "** ( ")L)) E#tCy.12) c]	)
 ) ) J) ) ) #3-) 
'	) )X6L)6 E#tCy.126 c]	6
 6 6 6 J6 6r<   