
    Гhx                        S r SSKrSSKrSSKrSSKrSSKJr  SSKJr  SSK	J
r
JrJrJrJrJrJrJrJrJrJrJr  SSKJr  SSKrSSKrSSKJ	r  SSKJr  SS	KJr  S
SK J!r!J"r"  S
SK#J$r$J%r%  S
SK&J'r'J(r(  S
SK)J*r+  \
(       a  S
SK,J-r.  O\r.\R^                  " S5      r0\0Rc                  SSS9r2S\3S\3S\\\Rh                  \Rh                  4   SS4   4S jr5S\4S jr6S\4S jr7S\SS4S jr8\2Rr                  S\\Rh                  \Rh                  4   4S j5       r:\2Rr                  S\\Rh                  \Rh                  4   4S j5       r;\2Rr                  S\\Rh                  \Rh                  4   4S j5       r<\2Rr                  S\\Rh                  \Rh                  4   4S j5       r=\2Rr                  S\\.\Rh                  4   4S j5       r>\2Rr                  S \?S\\R                  \Rh                  \Rh                  \R                  \Rh                  \Rh                  \R                  \Rh                  \Rh                  4	   4S! j5       rA SSS"S#S$.S%\3S\3S&\3S'\BS(\BS)\3S\\\Rh                     \\Rh                     \\Rh                     4   4S* jjjrC\\R                  \R                  \R                     \R                  \R                     4   rF\ " S+ S,5      5       rG " S- S.\5      rH " S/ S05      rIS1\R                  \R                     S\\R                  \R                  \R                  4   4S2 jrJ STS3\R                  S4\R                  \R                     S5\R                  \R                     S6\KS\R                  \R                     4
S7 jjrMS8\\R                  \R                  \R                     \R                  \R                     4   S9\R                  \R                     S\G4S: jrNS;\HS\\G\\G   4   4S< jrOS3\R                  S4\R                  \R                     S5\R                  \R                     S=\R                  \R                     S>\R                  \R                     S\\R                  \R                  \R                     \R                  \R                     \R                  \R                     4   4S? jrQS@\SA\\!   SB\?SS4SC jrR\2Rr                  S\3S\3SD\KSE\BS\\\R                     \Rh                  4   4
SF j5       rSSG\3SH\3S\\?   4SI jrTSJSKS"S#\R                  SL.S\3S\3SM\3SN\BSD\KSO\KSP\BS)\3SQ\R                  R                  S\\'\Rh                  4   4SR jjrVg)UzUtilities for data generation.    N)ThreadPoolExecutor)	dataclass)TYPE_CHECKINGAnyCallableDict	GeneratorList
NamedTupleOptionalSetTupleTypeUnion)request)typing)r	   )sparse   )DMatrixQuantileDMatrix)is_pd_cat_dtypepandas_pyarrow_mapper)	ArrayLike	XGBRanker)train)	DataFramejoblibz
./cachedir)verbose	n_samples
n_featuresreturnc              #     #    [         R                  " S5      n[        R                  R	                  S5      nUR                  SSX-  S9R                  X5      n[        R                  [        R                  [        R                  [        R                  [        R                  [        R                  [        R                  [        R                  [        R                  [        R                   [        R"                  [        R$                  [        R&                  [        R(                  [        R*                  [        R,                  [        R.                  [        R0                  [        R2                  [        R4                  /nU H>  n[        R6                  " XFS9nXG4v   UR9                  5       UR9                  5       4v   M@     U H>  n[        R6                  " XFS9nUR;                  U5      nUR;                  U5      n	X4v   M@     UR=                  SSX-  S	9R                  X5      n[        R>                  [@        4 H  n[        R6                  " XFS9nXG4v   M     [        R>                  [@        4 H>  n[        R6                  " XFS9nUR;                  U5      nUR;                  U5      n	X4v   M@     g
7f)z*Enumerate all supported dtypes from numpy.pandas  r      lowhighsizedtype   g      ?r)   N)!pytestimportorskipnprandomRandomStaterandintreshapeint32int64byteshortintcint_longlonguint32uint64ubyteushortuintcuint	ulonglongfloat16float32float64halfsingledoublearraytolistr   binomialbool_bool)
r   r    pdrngorigdtypesr+   Xdf_origdfs
             gC:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\venv\Lib\site-packages\xgboost/testing/data.py	np_dtypesrV   -   s     
		X	&B
))


%C;;13Y-C;DLLD 	






		
		

		













		
		)F, HHT'gkkmQXXZ'' 
 HHT',,t$\\!_k	  <<3Y%;<<DDD ((D!HHT'g " ((D!HHT',,t$\\!_k	 "s   J?Kc            	   #     #    [         R                  " S5      n U R                  5       U R                  5       U R	                  5       U R                  5       U R                  5       U R                  5       U R                  5       U R                  5       /n[        R                  nU R                  SSUS/SSUS/S.[        R                  S9n[        R                  SU R                  4 H,  nU H#  nU R                  SSUS/SSUS/S.US9nX54v   M%     M.     [        R                  nU R                  5       U R!                  5       /nU R                  S	S
US/SS
US	/S.[        R                  S9n[        R                  SU R                  4 Hs  nU Hj  nU R                  S	S
US/SS
US	/S.US9nX54v   US   nUS   n[#        XpR$                  5      (       d   e[#        X`R$                  5      (       d   eXg4v   Ml     Mu     UR'                  S5      n[        R                  SU R                  4 H1  nU R                  S	S
US/SS
US	/S.U R)                  5       S9nX54v   M3     SU R                  4 Hb  nSSUS/SSUS/S.nU R                  Xc  [        R*                  OU R-                  5       S9nU R                  XR-                  5       S9nX54v   Md     g7f)z/Enumerate all supported pandas extension types.r#   r,   r         f0f1r*   N      ?g       @g      @r[   categoryTF)r.   r/   
UInt8DtypeUInt16DtypeUInt32DtypeUInt64Dtype	Int8Dtype
Int16Dtype
Int32Dtype
Int64Dtyper0   nanr   rD   NAFloat32DtypeFloat64Dtype
isinstanceSeriesastypeCategoricalDtyperL   BooleanDtype)	rN   rQ   NullrP   r+   rT   ser_origserdatas	            rU   	pd_dtypesrt   h   s    			X	&B 	






	F %'FFD<<1dAq!T1o6bjj  D ruu%E1dAq!T1o>e  B (N	  & 66Doo!23F<<S$$S#tS,AB"**  D ruu%ES$,S#tS4IJRW  B (NDzHT(Cc99----h		2222-  & ;;z"Druu%\\dC(c40EF%%'  
 h & ruuUD$/tT47PQ||DLbooFW|X\\$oo&7\8h s   KKc            	   #     #    [         R                  " S5      n [         R                  " S5      n[        nSU R                  S4 H  nU H  nUR	                  S5      (       d  UR	                  S5      (       a  M1  U R                  U5      (       d  US:X  a  UO[        R                  nU R                  SSUS	/S
S	US/S.[        R                  S9nU R                  SSUS	/S
S	US/S.US9nXg4v   M     M     U R                  S4 Hi  nU R                  SSUS/SSUS/S.U R                  5       S9nU R                  SSUS/SSUS/S.U R                  UR                  5       5      S9nXg4v   Mk     g7f)z*Pandas DataFrame with pyarrow backed type.r#   pyarrowNr   rC   rM   r,   r   rX   rY   rZ   r*   FT)r.   r/   r   rh   
startswithisnar0   rg   r   rD   ro   
ArrowDtyperL   )rN   parQ   rp   r+   	orig_nullrP   rT   s           rU   pd_arrow_dtypesr|      s    			X	&B			Y	'B #F. ruua E	**e.>.>v.F.F$&GGDMMdaiRVVI<<1i+Aq)Q3GHjj   D
 1dAq!T1o>e  B (N  !" ||%t,UD$4MN//#  
 \\%t,UD$4MN--
+  
 h s   E3E5rO   c                 ~   U R                  SS9R                  SS5      nU R                  SS9n[        R                  US'   [        R
                  " [        SS9   [        X5        SSS5        [        R
                  " [        SS9   [        X5        SSS5        g! , (       d  f       N<= f! , (       d  f       g= f)	zValidate there's no inf in X.    r-      rY   )   r   zInput data contains `inf`matchN)	r1   r4   r0   infr.   raises
ValueErrorr   r   )rO   rR   ys      rU   	check_infr      s    


##Aq)A


AffAdG	z)D	E 
F 
z)D	E 
F	E 
F	E 
F	Es   BB.
B+.
B<c                  ~    [         R                  " S5      n U R                  5       nUR                  UR                  4$ )z2Fetch the California housing dataset from sklearn.sklearn.datasets)r.   r/   fetch_california_housingrs   targetdatasetsrs   s     rU   get_california_housingr      s6     ""#56H,,.D99dkk!!    c                  ~    [         R                  " S5      n U R                  5       nUR                  UR                  4$ )z&Fetch the digits dataset from sklearn.r   )r.   r/   load_digitsrs   r   r   s     rU   
get_digitsr      s6     ""#56H!D99dkk!!r   c                  L    [         R                  " S5      n U R                  SS9$ )z-Fetch the breast cancer dataset from sklearn.r   T)
return_X_y)r.   r/   load_breast_cancer)r   s    rU   
get_cancerr      s)     ""#56H&&$&77r   c                     [         R                  " S5      n [        R                  R	                  S5      nSnSnU R                  X!S9u  pEUR                  SX4R                  5      n[        UR                  S   5       HC  n[        UR                  S   5       H$  nXgU4   (       d  M  [        R                  XGU4'   M&     ME     XE4$ )zGenerate a sparse dataset.r      i  g      ?)random_stater,   r   )
r.   r/   r0   r1   r2   make_regressionrK   shaperangerg   )	r   rO   nsparsityrR   r   flagijs	            rU   
get_sparser     s     ""#56H
))


$CAH##A#8DA<<8WW-D1771:qwwqz"AqDzz&&Q$ #  4Kr   c                    ^^^ [         (       a  SSKmO[        R                  " S5      m[        R
                  R                  S5      mSmTR                  5       n S[        [        [        [        4   [        4   S[        STR                  4UUU4S	 jjnU" S
SSSSS.S5      U S'   U" SSSS.S5      U S'   U" SSSSS.S5      U S'   U" SSS S!S"S#S$S%.S5      U S&'   U" S'S(S)S!S*.S+5      U S,'   U" S-S(S.S/S0S"S1S2S3.S5      U S4'   U" S5S6S7S8S9S:.S;5      U S<'   U" S=S>S?S@S$SA.S5      U SB'   U" SCSDSS"SE.S5      U SF'   U" S@SGSGSH.SI5      U SJ'   SK[        SL[        S[        STR                  4UUU4SM jjnU" SNSOS5      U SP'   U" SQSRS5      U SS'   U" STSUS5      U SV'   U" SWSXS5      U SY'   U" SZS[S5      U S\'   U" S]S^S5      U S_'   U" S`SaS5      U Sb'   U" ScSdS5      U Se'   U" SfSgS5      U Sh'   U" SiSjS5      U Sk'   [        U R                  5      nTR                  U5        X   n [        R                   " T4Sl9nU R                   Hu  n[#        X   R$                  TR&                  5      (       a:  X@U   R(                  R*                  R-                  [        R.                  5      -  nMd  X@U   R0                  -  nMw     USmUR3                  5       -  -  nUSnUR5                  5       -
  -  nX4$ )oaI  Get a synthetic version of the amse housing dataset.

The real one can be obtained via:

.. code-block::

    from sklearn import datasets

    datasets.fetch_openml(data_id=42165, as_frame=True, return_X_y=True)

Number of samples: 1460
Number of features: 20
Number of categorical features: 10
Number of numerical features: 10
r   Nr#   r$   i  
name_probadensityr!   c           	        > [        T	SU-
  -  5      n[        R                  " SU-
  5      S:  =(       a    US:  nU(       a  SU-
  nX@[        R                  '   [	        U R                  5       5      n[	        U R                  5       5      nUS==   S[        R                  " U5      -
  -  ss'   TR                  UT	US9nT
R                  UT
R                  [        S U5      5      S9nU$ )	Nr,   r]   ư>r   )r)   pc                 "    [        U [        5      $ N)rk   str)xs    rU   <lambda>5get_ames_housing.<locals>.synth_cat.<locals>.<lambda>B  s    As!3r   r*   )intr0   absrg   listkeysvaluessumchoicerl   rn   filter)r   r   n_nullshas_nanr   r   r   r   seriesr   rN   rO   s            rU   	synth_cat#get_ames_housing.<locals>.synth_cat0  s     i1w;/0&&w'$.>7Q;W}H!)rvvJOO%&""$%	"rvvay JJt)qJ1%%3T:  
 r   gqu ]?gqh.?gsmB<?g5C(?goEb?)1Fam2fmConDuplexTwnhsTwnhsEr]   BldgTypegwD?g. ҥ?g)$;?)UnfRFnFing_9?GarageFinishgW歺?gbFx{?gbFx{?gQfL2rf?)CornerCulDSacFR2FR3	LotConfigg?g/ؗ?gf׽?g$A
?g5e?g() l?g[iF?)TypMin2Min1ModMaj1Maj2Sev
Functionalg M?g?gMq?)NoneBrkFaceStoneBrkCmng3f?
MasVnrTypeg3f?gI/j ?g,	PS˦?ge@?gQ~?gZ	%qv?)1Story2Storyz1.5FinSLvlSFoyerz1.5Unfz2.5Unfz2.5Fin
HouseStyleg$	P?gHp?gK$?gՐ?g4*p?)GdTAFaExPogE`o?FireplaceQugș&l??皙?g5e?gunڌ`?)r   r   r   r   r   	ExterCondgn0a?g{gUId?)r   r   r   r   	ExterQualg8 nV?)r   r   r   g(xߢs?PoolQClocstdc                   > TR                  XTS9n[        TSU-
  -  5      n[        R                  " SU-
  5      S:  a)  US:  a#  TR	                  TUSS9n[        R
                  X5'   TR                  U[        R                  S9$ )	Nr   scaler)   r,   r]   r   r   Fr)   replacer*   )normalr   r0   r   r   rg   rl   rE   )	r   r   r   r   r   null_idxr   rN   rO   s	         rU   	synth_num#get_ames_housing.<locals>.synth_num  sy    JJ3	J:i1w;/066#- 4'GaKzz)'5zIH&&AKyy"**y--r   gmtF@gOfK<Q=@	3SsnPorchgݹsΝ?g2Tf?
FireplacesgR u?gP$[r?BsmtHalfBathgvS?g_-?HalfBathgbĈ#F?g+?
GarageCarsg$[Q<@g"$#e?TotRmsAbvGrdg$[Q<{@g%Ǒ|@
BsmtFinSF1ge0OFG@g*Ӛ{7*d@
BsmtFinSF2gNڭ@gCk@	GrLivAreagg6.@gK@ScreenPorchr   g(e@g.A)r   r#   r.   r/   r0   r1   default_rngr   r   r   r   floatrl   r   columnsshufflezerosrk   r+   rn   catcodesrm   rE   r   r   mean)	rT   r   r   r   r   cr   rN   rO   s	         @@@rU   get_ames_housingr    s   " }  *
))


%CI	BsEz*E12=B	 . 	
 		BzN #(;WB~  		
 	B{O !	
 	B| !		
 	B| !		
 	B| "	
 		B}  	
 		B{O  		
 	B{O 	

 	BxL.u .5 .5 .RYY . .   24EsKB{O !24FLB|"#79LcRB~24FLBzN !35GMB|"#46H#NB~ !24EsKB| !24FLB| 13DcJB{O!"46H#NB}2::GKK	B 		|$AZZbekk2#6#677A''

33AAA	  	QUUW	$$A	affh	&&A5Lr   dpathc           	      h   [         R                  " S5      nSn[        R                  R	                  U S5      n[        R                  R                  U5      (       d  [        R                  " X#S9  [        R                  " US5       nUR                  U S9  SSS5        UR                  [        R                  R	                  U S5      [        R                  R	                  U S	5      [        R                  R	                  U S
5      4SSS9u	  nnnnn	n
nnnUUUUU	U
UUU4	$ ! , (       d  f       N= f)zFetch the mq2008 dataset.r   z>https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zipz
MQ2008.zip)urlfilenamer)pathNzMQ2008/Fold1/train.txtzMQ2008/Fold1/test.txtzMQ2008/Fold1/vali.txtTF)query_id
zero_based)r.   r/   osr  joinexistsr   urlretrievezipfileZipFile
extractallload_svmlight_files)r  r   srcr   fx_trainy_train	qid_trainx_testy_testqid_testx_validy_valid	qid_valids                 rU   
get_mq2008r#    s    ""#56H
JCWW\\%.F77>>&!!5		%	%  
& 	$$GGLL 89GGLL 78GGLL 78	

  	% 	
 	
 
/ 
&	%s   D##
D1Fr$   )	vary_sizer   n_samples_per_batch	n_batchesuse_cupyr$  r   c                   / n/ n/ nU(       a4  SSK n	U	R                  R                  [        R                  " U5      5      n
O[        R                  R                  U5      n
[        U5       Hy  nU(       a  XS-  -   OU nU
R                  X5      nU
R                  U5      nU
R                  SSUS9nUR                  U5        UR                  U5        UR                  U5        M{     XgU4$ )zMake batches of dense data.r   N
   r,   r&   )	cupyr1   r2   r0   r=   r   randnuniformappend)r%  r    r&  r'  r$  r   rR   r   wr*  rO   r   r   _X_y_ws                   rU   make_batchesr2    s     	A
A
Akk%%bii&=>ii##L194='b&0CV	YYy-YYy![[QQY[7			  7Nr   c                   P   \ rS rSr% Sr\R                  \S'   \R                  \
R                     \S'   \R                  \
R                     \S'   \R                  \
R                     \S'   \R                  \
R                     \S'   \R                  \
R                     \S'   S	rg
)	ClickFoldi'  zCA structure containing information about generated user-click data.rR   r   qidscoreclickpos N)__name__
__module____qualname____firstlineno____doc__r   
csr_matrix__annotations__nptNDArrayr0   r5   rD   r6   __static_attributes__r9  r   rU   r4  r4  '  sp    M
{{288	RXX	;;rzz"";;rxx  	RXX	r   r4  c                   F    \ rS rSr% Sr\\S'   \\S'   \\S'   S\4S jr	Sr
g	)
	RelDataCVi3  zPSimple data struct for holding a train-test split of a learning to rank dataset.r   testmax_relr!   c                      U R                   S:H  $ )z6Whether the label consists of binary relevance degree.r,   )rG  )selfs    rU   	is_binaryRelDataCV.is_binary:  s    ||q  r   r9  N)r:  r;  r<  r=  r>  RelDatar@  r   rM   rJ  rC  r9  r   rU   rE  rE  3  s     ZN
ML!4 !r   rE  c                       \ rS rSrSrS\SS4S jrS\R                  \	R                     S\R                  \	R                     S\R                  \	R                     4S	 jrS
rg)PBMi?  a   Simulate click data with position bias model. There are other models available in
`ULTRA <https://github.com/ULTR-Community/ULTRA.git>`_ like the cascading model.

References
----------
Unbiased LambdaMART: An Unbiased Pairwise Learning-to-Rank Algorithm

etar!   Nc                     [         R                  " / SQ5      U l        [         R                  " / SQ5      n[         R                  " X!5      U l        g )N)r   g{Gz?Q?gp=
ף?r]   )
g(\?gQ?gQ?g(\?rQ  皙?g)\(?r   g{Gz?gQ?)r0   rI   
click_probpower	exam_prob)rI  rO  rU  s      rU   __init__PBM.__init__I  s6    ((#?@HHH
	 )1r   labelspositionc                    [         R                  " USS9n[         R                  " UR                  5      nSXS:  '   SX[	        U R
                  5      :  '   U R
                  U   n[         R                  " UR                  5      nUR                  UR                  :X  d   e[         R                  " USS9nSXUU R                  R                  :  '   U R                  U   n[         R                  R                  S5      nUR                  UR                  S   [         R                  S9n[         R                  " UR                  [         R                  S9nSXXC-  :  '   U$ )	zSample clicks for one query based on input relevance degree and position.

Parameters
----------

labels :
    relevance_degree

T)copyr   r   r$   )r)   r+   r*   r,   )r0   rI   r  r   lenrS  r)   rU  r1   r   rD   r5   )	rI  rX  rY  rS  rU  ranksrO   probclickss	            rU   sample_clicks_for_queryPBM.sample_clicks_for_queryR  s    &t,XXfll+
z13T__--.__V,
HHV\\*	}}+++-.0t~~***+NN5)	ii##D)zzv||AbjjzA(*RXX(N01i,,-r   )rS  rU  )r:  r;  r<  r=  r>  r   rV  rA  rB  r0   r5   r6   r`  rC  r9  r   rU   rN  rN  ?  s^    2E 2d 2!kk"((+!7:{{2887L!	RXX	!r   rN  r   c           
         [         R                  " U 5      n U R                  n[         R                  S[         R                  " [         R
                  " U SS U SS SS9) 5      S-   4   n[         R                  " [         R                  X!4   5      nX   n[         R                  " U[         R                  " U R                  /5      5      nXSU4$ )zrRun length encoding using numpy, modified from:
https://gist.github.com/nvictus/66627b580c13068589957d6ab0919e66

r   r,   Nr   T)	equal_nan)	r0   asarrayr)   r_flatnonzeroisclosediffr-  rI   )r   r   startslengthsr   indptrs         rU   rlencoderl  v  s    
 	

1A	AUU1bnnbjj12#2$&O%OPSTTTUFggbeeFI&'GYFYYvrxx12FF""r   rR   r   r5  sample_ratec                    [         R                  R                  S5      n[        U R                  S   U-  5      n[         R
                  " SU R                  S   [         R                  S9nUR                  U5        USU nX   nX   nX&   n	[         R                  " U	5      n
Xz   nX   nX   n	[        SSS9nUR                  XxU	S9  UR                  U 5      nU$ )	zWe use XGBoost to generate the initial score instead of SVMRank for
simplicity. Sample rate is set to 0.1 by default so that we can test with small
datasets.

r$   r   r*   Nz	rank:ndcghist)	objectivetree_method)r5  )r0   r1   r   r   r   aranger=   r  argsortr   fitpredict)rR   r   r5  rm  rO   r   indexX_trainr  r  
sorted_idxltrscoress                rU   init_rank_scorer{    s     ))


%CAGGAJ,-IIIa2995EKK*9EhGhG
I I&J!G!G%I
kv
>CGGG)G, [[^FMr   foldscores_foldc                     U u  p#nUR                   [        R                  :X  d   e[        R                  " U5      n[        R                  " UR
                  4[        R                  S9n[        R                  " UR
                  4[        R                  S9n[        SS9nU Hb  n	X:H  n
U
R                  U
R                  S   5      n
X   n[        R                  " U5      SSS2   nXU
'   X:   nUR                  X5      nXU
'   Md     UR                  S   UR                  S   :X  d   UR                  UR                  45       eUR                  S   UR                  S   :X  d   UR                  UR                  45       e[        X#XAXv5      $ )zSimulate clicks for one fold.r*   r]   )rO  r   Nr   )r+   r0   r5   uniqueemptyr)   r6   rN  r4   r   rs  r`  r4  )r|  r}  X_foldy_foldqid_foldqidsrY  r_  pbmqqid_maskquery_scoresquery_positionrelevance_degreesquery_clickss                  rU   simulate_one_foldr    sV   
  $FH>>RXX%%%99XDxxbhh7HXXv{{nBHH5F
#,C =##HNN1$56",L1$B$7+",223DU'x  <<?hnnQ//O&,,1OO/<<?fll1o-Kfll/KK-VXFMMr   cv_datac           	      z  ^^^^^^ [        [        U R                  U R                  5      5      u  pn[        R
                  " S/U Vs/ s H  oDR                  S   PM     sn-   5      n[        R                  " U5      n[        U5      S:X  d   e[        R                  " U5      n[        R                  " U5      n[        R                  " U5      n[        XgU5      n	[        SUR                  5       V
s/ s H  oXZS-
     XZ    PM     nn
/ / / / / / 4u  mmmmmm[        UR                  S-
  5       H  n
[        X   X*   X:   4X   5      nTR!                  UR"                  5        TR!                  UR$                  5        TR!                  UR&                  5        TR!                  UR(                  5        TR!                  UR*                  5        TR!                  UR,                  5        M     [        UR                  S-
  5       V
s/ s H  n
TU
   PM
     nn
[        S5       H!  n
X   X   :H  R/                  5       (       a  M!   e   [        T5      S:X  a'  [1        TS   TS   TS   TS   TS   TS   5      nSnX4$ UUUUUU4S j[        [        T5      5       5       u  pX4$ s  snf s  sn
f s  sn
f )z6Simulate click data using position biased model (PBM).r   rX   r,   r   Nc           
   3   h   >#    U  H'  n[        TU   TU   TU   TU   TU   TU   5      v   M)     g 7fr   )r4  ).0r   X_lstc_lstp_lstq_lsts_lsty_lsts     rU   	<genexpr>"simulate_clicks.<locals>.<genexpr>  sB      
& eAha%(E!HeAhaQQ&s   /2)r   zipr   rF  r0   rI   r   cumsumr\  r   vstackconcatenater{  r   r)   r  r-  rR   r   r5  r6  r7  r8  allr4  )r  rR   r   r5  vrk  X_fully_fullqid_fullscores_fullr   rz  r|  scores_check_1r   rF  r  r  r  r  r  r  s                   @@@@@@rU   simulate_clicksr    s]   S56IA# XXqc3AWWQZ334FYYvFv;%]]1F^^AF~~c"H "&(;K>CAv{{>ST>S&Q-&)4>SFT/12r2r2/E,E5%u6;;?# !$cf!5vyATVVTVVTXXTZZ TZZ TXX $ ).fkkAo(>?(>1eAh(>N?1X!VY.335555  5zQ%(E!HeAha%(ERSHU ;	
 
3u:&
 ;G 4 U @s   	J.
7J3J8r_  r8  c           
         [         R                  " U5      nX   n X5   nX%   nXE   n[        U5      u  n  n[        SUR                  5       GH  nXhS-
     n	Xh   n
X:  d   X45       e[         R
                  " X)U
 5      R                  S:X  d   X45       eXIU
 nUR                  5       S:X  d   UR                  5       5       eUR                  5       UR                  S-
  :  d9   UR                  5       UR                  U[         R
                  " X)U
 5      45       e[         R                  " U5      nX	U
 U   X	U
& X9U
 U   X9U
& XU
 U   XU
& X)U
 U   X)U
& GM     XX4nU$ )z,Sort data based on query index and position.r,   r   )r0   rs  rl  r   r)   r  minmax)rR   r   r5  r_  r8  rx  rk  _r   begend	query_posrs   s                rU   sort_ltr_samplesr    sz    CJ	AF
/C
/CC=LFAq1fkk"Umiy$3*$yyy&++q0<3*<0CL	}}!#4Y]]_4#}})..1"44 	
MMONNIIccl#	7
 	
4 ZZ	*
3Z
+c
 S/*533Z
+c
s|J/+ #. aDKr   DTypeDMatrixTdevicec                    [         R                  R                  5       nU " UR                  SSSS9R	                  [         R
                  5      R                  SS5      5      n[        US5      (       a  UR                  SS2S4   nO	USS2S4   nUnU" XEUS	9n[        R                  " [        S
S9   [        SUS.U5        SSS5        [        US5      (       Gd  U " UR                  5       R                  SS5      5      nX:H  R                  5       (       d   eUR                  R                   R"                  SL d   eUR                  R                   R$                  SL d   eUR'                  UR                  S	9  U " UR                  5       R                  SS5      5      nXR                  :H  R                  5       (       d   eUnUR)                  U5        UR                  5       n	UR)                  UR                  SUR*                  5      5        UR                  5       n
X:H  R                  5       (       d   eUR	                  [         R,                  5      nUR)                  U5        UR                  5       nX:H  R                  5       (       d   eUR                  SSSS5      n[        R                  " [        S
S9   UR)                  U5        SSS5        gg! , (       d  f       GN%= f! , (       d  f       g= f)zRun tests for base margin.r   r]   d   r-   2   r   ilocN)base_marginz.*base_margin.*r   ro  )rq  r  FTr,   r   )r0   r1   r   r   rm   rD   r4   hasattrr  r.   r   r   train_fnget_base_marginr  Tflagsc_contiguousf_contiguousset_infoset_base_marginr)   rE   )r  r  r  rO   rR   r   r  Xygotbm_colbm_rowbm_f64s               rU   run_base_margin_infor  (  s   
))


!CcjjCcj*11"**=EEb!LMAq&FF1a4LadGK	!K	0B	z);	<6:B? 
= 1fB&&(00Q78"''))))}}""//5888}}""//4777
.B&&(00B78}}$))++++ 
;'##%
;..q+2B2BCD##% %%'''' "((4
;'##% %%'''' ii1a+]]:-?@{+ A@7  
=	<< A@s   3K!K3!
K03
Lr   as_densec                   ^ ^^^ [        [        R                  S5      (       dN  [        R                  R                  S5      n[        R                  " T TST-
  USS9nUR                  SST S9nXV4$ [        [        R                  " 5       T5      mS[        S	[        R                  4UU UU4S
 jjn/ n[        TS9 n	[        T5       H#  n
UR                  U	R                  Xz5      5        M%     SSS5        / n/ nU H7  nUR                  5       u  pVUR                  U5        UR                  U5        M9     [!        U5      T:X  d   e[        R"                  " USS9n[        R$                  " U5      nUR'                  UR(                  S   UR(                  S   45      R*                  n[        R,                  " USS9nUR(                  S   T :X  d   eUR(                  S   T:X  d   eUR(                  S   T :X  d   eU(       aR  UR/                  5       nUR(                  S   T :X  d   eUR(                  S   T:X  d   e[        R0                  XS:H  '   X4$ X4$ ! , (       d  f       GNs= f)z|Make sparse matrix.

Parameters
----------

as_dense:

  Return the matrix as np.ndarray with missing values filled by NaN

r   r$   r]   csr)mr   r   r   format        r   t_idr!   c                   > [         R                  R                  SU -  5      nTT
-  nU T
S-
  :X  a  TX-  -
  nOUn[        R                  " T	UST-
  US9R	                  5       n[         R
                  " T	S45      n[        UR                  S   5       H]  nUR                  US-      UR                  U   -
  nUS:w  d  M-  XTS S 2U4   R                  5       UR                  T	S45      -  S-  -  nM_     XE4$ )Nr$   r,   r]   )r  r   r   r   r   rR  )
r0   r1   r   r   tocscr  r   r   rk  toarray)r  rO   thread_sizen_features_tlocrR   r   r   r)   r    r   	n_threadsr   s           rU   
random_csc*make_sparse_regression.<locals>.random_cscu  s    ii##D4K0 I-9q= (4+==O)OMM(N	

 %' 	
 HHi^$qwwqz"A88AE?QXXa[0Dqyq!tW__&YN)CCcII #
 tr   )max_workersN)r  r   r,   )axis)r  r0   r1   r2   r   r   r  multiprocessing	cpu_countr   
csc_matrixr   r   r-  submitresultr\  hstackrd  r4   r   r  r   r  rg   )r   r    r   r  rO   rR   r   r  futuresexecutorr   	X_results	y_resultsr  r  arrr  s   ```             @rU   make_sparse_regressionr  W  s#    299m,,ii##D)MM(N
 JJ3c	J:t O--/<I !2!2  . G			2hy!ANN8??:9: " 
3 IIxxz 
 y>Y&&&#]]9UCC


9A			1771:qwwqz*+--A
qqA99Q<9$$$99Q<:%%%771:"""kkmyy|y(((yy|z)))1Hv6M; 
3	2s   =3I
I+	n_stringsseedc                 >   Sn[        5       n[        R                  R                  U5      n[	        U5      U :  aY  SR                  UR                  [        [        R                  5      USS95      nUR                  U5        [	        U5      U :  a  MY  [        U5      $ )zGenerate n unique strings.r    Tr   )setr0   r1   r   r\  r  r   r   stringascii_lettersadd)r  r  name_lenunique_stringsrO   
random_strs         rU   unique_random_stringsr    s    H"uN
))


%C
n
	
)WWJJtF001$JO

 	:&	 n
	
) r   r  r]   )r   	cat_ratior  r   	cat_dtypen_categoriesonehotr  r  r  c          	      @   [         R                  " S5      n	[        R                  R	                  U5      n
U	R                  5       n[        U5       GH&  nU
R                  SUSS9S   nUS:X  a  [        R                  " U[        R                  5      (       a0  [        R                  " [        X,5      5      nU
R                  XSS9nO'[        R                  " SU5      nU
R                  SX S9nU	R                  USS	9U[!        U5      '   U[!        U5         R"                  R%                  U5      U[!        U5      '   M  U
R                  SX S9nU	R                  UUR&                  S	9U[!        U5      '   GM)     [        R(                  " U 4S
9nUR*                   HQ  n[-        UU   R&                  U	R.                  5      (       a  UUU   R"                  R0                  -  nMI  UUU   -  nMS     US-  nUS:  a  [        U5       H  nU
R                  SU S-
  [3        X-  5      S9n[        R4                  UR6                  UU4'   [9        UR:                  R6                  U   5      (       d  Mj  U[        R<                  " UR:                  R6                  U   R>                  5      R@                  :X  a  M   e   URB                  S   U:X  d   eU(       a  U	RE                  U5      nU(       a+  [G        UR*                  5      nU
RI                  U5        UU   nUU4$ )a  Generate categorical features for test.

Parameters
----------
n_categories:
    Number of categories for categorical features.
onehot:
    Should we apply one-hot encoding to the data?
sparsity:
    The ratio of the amount of missing values over the number of all entries.
cat_ratio:
    The ratio of features that are categorical.
shuffle:
    Whether we should shuffle the columns.
cat_dtype :
    The dtype for categorical features, might be string or numeric.

Returns
-------
X, y
r#   r,   r-   r   Tr   r&   r^   r*   r   r  )%r.   r/   r0   r1   r2   r   r   rK   
issubdtypestr_rI   r  r   rr  r3   rl   r   r  set_categoriesr+   r  r   rk   rn   r  r   rg   r  r   rQ   r  
categoriesr)   r   get_dummiesr   r  )r   r    r  r  r   r  r  r   r  rN   rO   rT   r   r   r  r  numlabelcolrv  r   s                        rU   make_categoricalr    s   B 
		X	&B
))


-C	B:a3A6Q;}}Y00XX&;L&LM
JJz4JHYYq,7
KKALKI1J7Bs1vJCF66zBBs1vJ++!,+GC3cii8Bs1vJ   HHI<(EzzbgmmR%8%899RW[[&&&ERWE	 
 
QJE#~z"AKKIMI4H0I   E !#BGGE1Hryy~~a011#ryy1B1M1M'N'S'SSSS # 88A;*$$$^^Brzz"G[u9r   )F)r   )Wr>  r  r  r  r  concurrent.futuresr   dataclassesr   r   r   r   r   r   r	   r
   r   r   r   r   r   r   urllibr   numpyr0   r.   rA  numpy.randomRNGscipyr   corer   r   rs   r   r   sklearnr   r   trainingr   r  compatr   
DataFrameTr/   r   Memorymemoryr   ndarrayrV   rt   r|   r   cacher   r   r   r   r  r   r?  r#  rM   r2  rB  r5   rL  r4  rE  rN  rl  r   rD   r{  r  r  r6   r  r  r  r  	DTypeLiker  r9  r   rU   <module>r     sP   $  	   1 !        )  + 9 * (0J			X	&	|Q	/88 #8uRZZ+,dD898v<9 <~7 7t
3 
4 
 "bjj"**&< = " " "E"**bjj01 " " 8E"**bjj01 8 8 E"**bjj01    s%
BJJ 67 s sl 55

JJJJ
JJJJ
JJJJ
5 5x 	   	   4

T"**-tBJJ/??@< !!3;;rxx#8#++bhh:OO
P   	!
 	!4 4n#BHH% #%S[[#++0U*V #& 	!!
{{288! 
RXX	! 	!
 	[[!HN
!!3;;rxx#8#++bhh:OO
PNRZZ(N NF(Y (5HY<O1O+P (V..
{{288. 
RXX	. KK!	.
 
RXX	. 
KKKKKK.b+, +,DM +,3 +,SW +,^ RR #R/4R@DR
5""#RZZ/0R Rj S    S	  , %'XXPPP P
 P P P P P yy""P 9bjj !Pr   