
    'h6j                      S r SSKJr  SSKrSSKrSSKrSSKJr  SSKJ	r	J
r
JrJrJr  SSKJrJr  SSKJr  SSKJrJrJrJrJrJrJrJrJrJr  SSKJr   SS	K!J"r#  SS
K$J%r%J&r&J'r'  SSK(J)r)J*r*J+r+J,r,J-r-J.r.J/r/J0r0  SSK1J2r2  SSK3J4r4J5r5J6r6  SSK7J8r8  SSK9J:r:  SSK;J<r<J=r=  SSK>J?r?  SSK@JArAJBrB  SSKCJDrDJErEJFrFJGrGJHrHJIrIJJrJJKrK  SSKLJMrMJNrNJOrO  SSKPJQrQ  SSKRJSrSJTrTJUrU  SSKVJWrW  SSKXJYrYJZrZJ[r[J\r\J]r]J^r^J_r_J`r`JaraJbrbJcrcJdrdJere  SSKfJgrg  SSKhJiriJjrjJkrkJlrlJmrmJnrnJoroJprpJqrqJrrrJsrsJtrt  SSKhJurv  SSKhJwrx  SSKhJyrz  SSK{J|r|J}r}J~r~JrJr  SSKJrJr  SS KJr  SS!KJr  SS"KJrJr  \GR                  " \5         SS#KJr  SS$KJr  SS%KJr  SSS5        \(       Ga  SSKrSS&KJrJrJr  SS'KJr  SS(KJr  SS)KJr  SSKrSSKrSSKJr  SSKrSS*KrJr  SS+KJr  SS,KJr  SS-K!JrJrJrJr  SS.K$JrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJr  SS/KCJr  SS0KJr  SS1KJr  SS2KJr  \GR                  S3:  a	  SS4KJrJr  OSS4KJrJr  \GR                  S5:  a  SS6KJ5r5  OSS6KJ5r5  \" S75      r\" S85      r " S9 S:5      rS<S=S; jjrg! , (       d  f       GN?= f)>z4Module containing logic related to eager DataFrames.    )annotationsN)defaultdict)	GeneratorIterableMappingSequenceSized)BytesIOStringIO)Path)
IOTYPE_CHECKINGAnyCallableClassVarNoReturnTypeVarcastget_argsoverload)	functions)DbWriteModeJaxExportTypeTorchExportType)arrow_to_pydfdataframe_to_pydfdict_to_pydfiterable_to_pydfnumpy_to_pydfpandas_to_pydfsequence_to_pydfseries_to_pydf)parse_as_duration_string)deprecate_renamed_parameter
deprecatedissue_deprecation_warningget_df_item_by_key)parse_into_expression)is_pycapsulepycapsule_to_frame)serialize_polars_object)issue_unstable_warningunstable)is_bool_sequence
no_defaultnormalize_filepathparse_versionqualified_type_namerequire_same_typescale_byteswarn_null_comparison)	wrap_exprwrap_ldfwrap_s)NotebookFormatter)DynamicGroupByGroupByRollingGroupBy)DataFramePlot)N_INFER_DEFAULTBooleanFloat32Float64Int32Int64NullObjectStringStructUInt16UInt32UInt64)INTEGER_DTYPES)_ALTAIR_AVAILABLE_GREAT_TABLES_AVAILABLE_PANDAS_AVAILABLE_PYARROW_AVAILABLE_check_for_numpy_check_for_pandas_check_for_pyarrow_check_for_torchaltairgreat_tablesimport_optionaltorch)numpy)pandas)pyarrow)ColumnNotFoundErrorInvalidOperationErrorModuleUpgradeRequiredErrorNoRowsReturnedErrorTooManyRowsReturnedError)collit)CompatLevel)Schema)_expand_selector_dicts_expand_selectors)PyDataFrame)dtype_str_repr)write_clipboard_string)
CollectionIteratorr   )	timedelta)IOBase)Literal)GT)Workbook)	Worksheet)DataTypeExpr	LazyFrameSeries)-AsofJoinStrategyAvroCompressionClosedIntervalColumnFormatDictColumnNameOrSelectorColumnTotalsDefinitionColumnWidthsDefinitionComparisonOperatorConditionalFormatDictConnectionOrCursorCsvQuoteStyleDbWriteEngine
EngineTypeFillNullStrategyFrameInitTypes
IndexOrderIntoExprIntoExprColumnIpcCompressionJoinStrategyJoinValidationLabelMaintainOrderJoinMultiColSelectorMultiIndexSelectorOneOrMoreDataTypesOrientationParquetCompressionParquetMetadataPartitioningSchemePivotAggPolarsDataTypePythonDataTypeQuantileMethodRowTotalsDefinitionSchemaDefinition
SchemaDictSelectorTypeSerializationFormatSingleColSelectorSingleIndexSelectorSizeUnitStartByUniqueKeepStrategyUnstackDirection)	NoDefaultPolarsDataFrame)CredentialProviderFunctionPolarsDataset)   
   )Concatenate	ParamSpec)r      )r%   TPc                     \ rS rSr% SrS\S'   SS1rS\S'     GSgS	S
S	\SS.               GShS jjjr\	SS.     GSiS jj5       r
\	GSjS j5       r\	 GSkS	S
S.         GSlS jjj5       r\	 GSkS	S
S
SS.             GSmS jjj5       rGSnS jr\	GSoS j5       r\\" 5       GSpS j5       5       r\\" 5       GSqS j5       5       r\GSrS j5       r\GSsS j5       r\GSsS j5       r\GStS j5       r\R0                  GSuS j5       r\GSvS  j5       r\GSwS! j5       r\GSxS" j5       r GSg     GSyS# jjr  GSz     GS{S$ jjrGS|S% jr      GS}S& jr      GS|S' jr GS~S( jr!        GSS) jr"GSS* jr#GSS+ jr$GSS, jr%GSS- jr&GSS. jr'GSS/ jr(GSS0 jr)GSS1 jr*GSS2 jr+GSS3 jr,GSS4 jr-GSS5 jr.GSS6 jr/    GSS7 jr0    GSS8 jr1GSS9 jr2GSS: jr3GSS; jr4GSS< jr5GSS= jr6GSS> jr7GSS? jr8\9    GSS@ j5       r:\9    GSSA j5       r:\9    GSSB j5       r:    GSSC jr:      GSSD jr;GSsSE jr<GSSF jr=GSkGSSG jjr>GStSH jr?GSkGSSI jjr@SSJ.GSSK jjrAGSxSL jrBGSgGSSM jjrC\D" SNSOSPSQ9S	SR.GSSS jj5       rE\9STSU.GSSV jj5       rF\9GSSW j5       rF\9    GSSX j5       rFS
SU.   GSSY jjrFGSSZ jrGS[SS
SS	S\.           GSS] jjrH\9 GSSTSTSTSTSTS^.             GSS_ jjj5       rI\9STSTSTSTSTS^.             GSS` jj5       rI\" 5        GSS	S	S	S	S[S^.             GSSa jjj5       rI\9 GSSTSTSTSb.         GSSc jjj5       rJ\9STSTSTSb.         GSSd jj5       rJ\9STSTSTSb.         GSSe jj5       rJ\" 5        GSS	S	S	Sb.         GSSf jjj5       rJSSg.     GSSh jjrK      GSSi jrL        GSSj jrMGSGSSk jjrNGSGSSl jjrO\9 GSSTS.     GSSm jjj5       rP\9GSGSSn jj5       rP\9STS.     GSSo jj5       rP GSkSS.     GSSp jjjrP\9GSGSSq jj5       rQ\9GSSr j5       rQGSkGSSs jjrQ\9GSkGSSt jj5       rR\9GSSu j5       rR GSk   GSSv jjrR\9 GSkSTSTSTSTSTSTSTSTSTSTSTSTSTSTSTSTSTSw.                                     GSSx jjj5       rS\9STSTSTSTSTSTSTSTSTSTSTSTSTSTSTSTSTSw.                                     GSSy jj5       rS GSkSS
SzS{S|S}S	S	S	S	S	SS	S	S	S~SSw.                                     GSS jjjrSSS.GSS jjrT  GS       GSS jjrU  GSgSS	S	S	S	S	S	S	S	S	S	S	S	SS
S
SS	SS	S	S.                                               GSS jjjrV\9SS	S	S~SS.             GSS jj5       rW\9SS	S	S~SS.             GSS jj5       rW\D" SNSOSPSQ9SS	S	S~SS.             GSS jj5       rW\9SS	S.       GSS jj5       rX\9SS	S.       GSS jj5       rX\D" SNSOSPSQ9SS	S.       GSS jj5       rXSS	S
S	S	SS	S	SS	S~SS	SS.                               GSS jjrYSS	S	S.           GSS jjrZ\" 5             GSS j5       r[\9STSTSTSTSTS.             GSS jj5       r\\9STSTSTS.             GSS jj5       r\SS	S	S~S	S	S.               GSS jjr\GSGSS jjr]SSS	S.       GSS jjr^GSS jr_S
S.     GSS jjr`GSS jra      GSS jrb      GSS jrc\9STSTSTS.       GSS jj5       rd\9STSTS.       GSS jj5       rd\9STSTS.       GSS jj5       rdSSSS.       GSS jjrd GSSS.     GSS jjjreGSS jrfGSS jrgSSS
SS.             GSS jjrhSS.GSS jjri\D" SSSSQ9SS.       GSS jj5       rj\D" SSSSQ9SS.       GSS jj5       rkS
S.GSS jjrlGSkGSS jjrmGSGSS jjrnGSGSS jjroGSGSS jjrp GSk   GSS jjrq GSk   GSS jjrr        GSS jrs          GSS jrtGSGSS jjru\v" S5      GSGSS jj5       rwSS.       GSS jjrx\D" SSSSQ9S	SS	S.           GSS jj5       ry\D" SSSSQ9S	S	SSSS	SS.                   GSS jj5       rz\D" SSSSQ9S	SS.         GSS jj5       r{S	S	S	S	S	S	SSS	S
SS
S
S
S.                               GSS jjr|\D" SSSSQ9  GSS	S	SSSS	S	S.                     GSS jjj5       r}\" 5       SS.       GSS jj5       r~ GSkSS.       GSS jjjrSS.     GSS jjrSS.GSS jjrGSS jrS
S.     GSS jjrGSS jrS
S.     GSS jjrGSGSS jjrGSS jrGSS jr\9STS.GSS jj5       r\9GSS j5       r\S.     GSS jjr   GSS
S.         GSS jjjrGSS jr      GSS jr\D" SSSSQ9S	S	S	S
SGS GS.               GSGS jj5       r GSkS	S	S	GS.         GSGS jjjrGSS	S	GS.         GSGS jjr\9STSTSTGS.           GSGS	 jj5       r\9STSTGS
.           GSGS jj5       r\9STSTGS
.           GSGS jj5       rS
S
SGS.           GSGS jjrGSS	GS.GSGS jjjrGS GS jrGS GS jrGSGS jr      GSGS jr      GSGS jr      GSGS jr      GSGS jr\9GSGSGS jj5       r\9GSGS j5       rGSGSGS jjrGSGS jrGS GS jrGSGS jrGS GS jrGSGS jrS
GS.GSGS  jjrGSGS! jrS
GS.GSGS" jjrGSGSGS# jjrGSGSGS$ jjrGSGS% jrGSGS& jr GS	     GS
GS' jjr GSkGS SSGS(.         GSGS) jjjr GSkGS*SGS+.       GSGS, jjjrGSkGSGS- jjr\v" GS.5      GSGS/ j5       rGSGS0 jrGSGS1 jr GSkS	SSS	GS2.           GSGS3 jjjrGSGS4 jr\9 GSSTSTGS5.       GSGS6 jjj5       r\9 GSSTGS7.       GSGS8 jjj5       r GSkS	SGS5.       GSGS9 jjjr\9STGS:.GSGS; jj5       r\9GSGS< j5       rSGS:.   GSGS= jjr\9STSTSTGS>.         GSGS? jj5       r\9STSTGS@.         GSGSA jj5       r\9STSTGSB.         GSGSC jj5       r\9STGSD.         GSGSE jj5       rSSSGS>.         GSGSF jjr\9STSTGSG.     GSGSH jj5       r\9STGSI.     GSGSJ jj5       rSGSKGSG.     GSGSL jjrGSGSM jrGSGSGSN jjrSS.GS GSO jjrGSGS!GSP jjr    GS"         GS#GSQ jjrGSGSR jrGS$GSS jrGS%GSGST jjr      GS&GSU jrGS'GSV jrGS(GSW jrSGSX.     GS)GSY jjr\" 5         GS*S	S	SSGSZ.               GS+GS[ jjj5       rGSGS\ jr\v" GS]5          GS,         GS-GS^ jj5       r\" 5       GS_GS_GS_GS_GS`GS`GSa.               GS.GSb jj5       r  GSg     GS/GSc jjrSS	S	GSd.       GS0GSe jjrGSfrg	(1  	DataFrame   u  
Two-dimensional data structure representing data as a table with rows and columns.

Parameters
----------
data : dict, Sequence, ndarray, Series, or pandas.DataFrame
    Two-dimensional data in various forms; dict input must contain Sequences,
    Generators, or a `range`. Sequence may contain Series or other Sequences.
schema : Sequence of str, (str,DataType) pairs, or a {str:DataType,} dict
    The schema of the resulting DataFrame. The schema may be declared in several
    ways:

    * As a dict of {name:type} pairs; if type is None, it will be auto-inferred.
    * As a list of column names; in this case types are automatically inferred.
    * As a list of (name,type) pairs; this is equivalent to the dictionary form.

    If you supply a list of column names that does not match the names in the
    underlying data, the names given here will overwrite them. The number
    of names given in the schema should match the underlying data dimensions.

    If set to `None` (default), the schema is inferred from the data.
schema_overrides : dict, default None
    Support type specification or override of one or more columns; note that
    any dtypes inferred from the schema param will be overridden.

    The number of entries in the schema should match the underlying data
    dimensions, unless a sequence of dictionaries is being passed, in which case
    a *partial* schema can be declared to prevent specific fields from being loaded.
strict : bool, default True
    Throw an error if any `data` value does not exactly match the given or inferred
    data type for that column. If set to `False`, values that do not match the data
    type are cast to that data type or, if casting is not possible, set to null
    instead.
orient : {'col', 'row'}, default None
    Whether to interpret two-dimensional data as columns or as rows. If None,
    the orientation is inferred by matching the columns and data dimensions. If
    this does not yield conclusive results, column orientation is used.
infer_schema_length : int or None
    The maximum number of rows to scan for schema inference. If set to `None`, the
    full data may be scanned *(this can be slow)*. This parameter only applies if
    the input data is a sequence or generator of rows; other input is read as-is.
nan_to_null : bool, default False
    If the data comes from one or more numpy arrays, can optionally convert input
    data np.nan values to null instead. This is a no-op for all other input data.

Notes
-----
Polars explicitly does not support subclassing of its core data types. See
the following GitHub issue for possible workarounds:
https://github.com/pola-rs/polars/issues/2846#issuecomment-1711799869

Examples
--------
Constructing a DataFrame from a dictionary:

>>> data = {"a": [1, 2], "b": [3, 4]}
>>> df = pl.DataFrame(data)
>>> df
shape: (2, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 3   │
│ 2   ┆ 4   │
└─────┴─────┘

Notice that the dtypes are automatically inferred as polars Int64:

>>> df.dtypes
[Int64, Int64]

To specify a more detailed/specific frame schema you can supply the `schema`
parameter with a dictionary of (name,dtype) pairs...

>>> data = {"col1": [0, 2], "col2": [3, 7]}
>>> df2 = pl.DataFrame(data, schema={"col1": pl.Float32, "col2": pl.Int64})
>>> df2
shape: (2, 2)
┌──────┬──────┐
│ col1 ┆ col2 │
│ ---  ┆ ---  │
│ f32  ┆ i64  │
╞══════╪══════╡
│ 0.0  ┆ 3    │
│ 2.0  ┆ 7    │
└──────┴──────┘

...a sequence of (name,dtype) pairs...

>>> data = {"col1": [1, 2], "col2": [3, 4]}
>>> df3 = pl.DataFrame(data, schema=[("col1", pl.Float32), ("col2", pl.Int64)])
>>> df3
shape: (2, 2)
┌──────┬──────┐
│ col1 ┆ col2 │
│ ---  ┆ ---  │
│ f32  ┆ i64  │
╞══════╪══════╡
│ 1.0  ┆ 3    │
│ 2.0  ┆ 4    │
└──────┴──────┘

...or a list of typed Series.

>>> data = [
...     pl.Series("col1", [1, 2], dtype=pl.Float32),
...     pl.Series("col2", [3, 4], dtype=pl.Int64),
... ]
>>> df4 = pl.DataFrame(data)
>>> df4
shape: (2, 2)
┌──────┬──────┐
│ col1 ┆ col2 │
│ ---  ┆ ---  │
│ f32  ┆ i64  │
╞══════╪══════╡
│ 1.0  ┆ 3    │
│ 2.0  ┆ 4    │
└──────┴──────┘

Constructing a DataFrame from a numpy ndarray, specifying column names:

>>> import numpy as np
>>> data = np.array([(1, 2), (3, 4)], dtype=np.int64)
>>> df5 = pl.DataFrame(data, schema=["a", "b"], orient="col")
>>> df5
shape: (2, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 3   │
│ 2   ┆ 4   │
└─────┴─────┘

Constructing a DataFrame from a list of lists, row orientation specified:

>>> data = [[1, 2, 3], [4, 5, 6]]
>>> df6 = pl.DataFrame(data, schema=["a", "b", "c"], orient="row")
>>> df6
shape: (2, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 1   ┆ 2   ┆ 3   │
│ 4   ┆ 5   ┆ 6   │
└─────┴─────┴─────┘
rg   _dfplotstylezClassVar[set[str]]
_accessorsNTF)schema_overridesstrictorientinfer_schema_lengthnan_to_nullc          
     
   Uc  [        0 X#S9U l        g [        U[        5      (       a  [        UUUUUS9U l        g [        U[        [
        [        45      (       a  [        UUUUUUUS9U l        g [        U[        R                  5      (       a  [        XX4S9U l        g [        U5      (       a3  [        U[        R                  5      (       a  [        UUUUUUS9U l        g [        U5      (       a/  [        U[         R"                  5      (       a  [%        XX4S9U l        g ['        U5      (       a/  [        U[(        R*                  5      (       a  [-        XX4S9U l        g [/        U5      (       a@  [        U[0        R2                  5      (       a!  [        UR5                  SS9UUUUUS9U l        g [7        US5      (       dD  [        U[8        5      (       d/  [        U[:        [<        45      (       a  [?        UUUUUUS	9U l        g [        U[        R*                  5      (       a  [A        XX4S9U l        g [C        U5      (       a  [E        UUUS9R                  U l        g S
[G        U5      RH                  < S3n[K        U5      e)N)schemar   )r   r   r   r   )r   r   r   r   r   r   )r   r   r   )r   r   r   r   r   F)force__arrow_c_stream__)r   r   r   r   r   z3DataFrame constructor called with unsupported type z for the `data` parameter)&r   r   
isinstancedictlisttupler   r!   plru   r"   rQ   npndarrayr   rS   paTabler   rR   pdr   r    rT   rX   TensorrY   hasattrr	   r   r   r   r   r*   r+   type__name__	TypeError)	selfdatar   r   r   r   r   r   msgs	            iC:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\venv\Lib\site-packages\polars/dataframe/frame.py__init__DataFrame.__init__g  sK    <#6DH d###!1'DH tUH566'!1$7'DH bii((%6FDH d##
4(D(D$!1'DH  %%*T288*D*D$6FDH t$$D",,)G)G%6FDH d##
4(F(F$


'!1'DH 233tU++4)X!677'!1$7DH bll++(6FDH $)!1 c	 H Fd4jFYFYE\+,  C.     binary)formatc               t   [        U[        5      (       a(  [        UR                  5       R	                  5       5      nO&[        U[
        [        45      (       a  [        U5      nUS:X  a  [        R                  nO(US:X  a  [        R                  nOSU< 3n[        U5      eU R                  U" U5      5      $ )uJ  
Read a serialized DataFrame from a file.

Parameters
----------
source
    Path to a file or a file-like object (by file-like object, we refer to
    objects that have a `read()` method, such as a file handler (e.g.
    via builtin `open` function) or `BytesIO`).
format
    The format with which the DataFrame was serialized. Options:

    - `"binary"`: Deserialize from binary format (bytes). This is the default.
    - `"json"`: Deserialize from JSON format (string).

See Also
--------
DataFrame.serialize

Notes
-----
Serialization is not stable across Polars versions: a LazyFrame serialized
in one Polars version may not be deserializable in another Polars version.

Examples
--------
>>> import io
>>> df = pl.DataFrame({"a": [1, 2, 3], "b": [4.0, 5.0, 6.0]})
>>> bytes = df.serialize()
>>> pl.DataFrame.deserialize(io.BytesIO(bytes))
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪═════╡
│ 1   ┆ 4.0 │
│ 2   ┆ 5.0 │
│ 3   ┆ 6.0 │
└─────┴─────┘
r   json0`format` must be one of {'binary', 'json'}, got )r   r   r
   getvalueencodestrr   r1   rg   deserialize_binarydeserialize_json
ValueError
_from_pydf)clssourcer   deserializerr   s        r   deserializeDataFrame.deserialize  s    Z fh''V__.5578Fd,,'/FX&99Lv&77LFvjQCS/!~~l6233r   c                4    U R                  U 5      nXl        U$ )z7Construct Polars DataFrame from FFI PyDataFrame object.)__new__r   )r   py_dfdfs      r   r   DataFrame._from_pydf
  s     [[	r   )r   rechunkc          	     8    U R                  [        UUUUS95      $ )a  
Construct a DataFrame from an Arrow table.

This operation will be zero copy for the most part. Types that are not
supported by Polars may be cast to the closest supported type.

Parameters
----------
data : arrow Table, RecordBatch, or sequence of sequences
    Data representing an Arrow Table or RecordBatch.
schema : Sequence of str, (str,DataType) pairs, or a {str:DataType,} dict
    The DataFrame schema may be declared in several ways:

    * As a dict of {name:type} pairs; if type is None, it will be auto-inferred.
    * As a list of column names; in this case types are automatically inferred.
    * As a list of (name,type) pairs; this is equivalent to the dictionary form.

    If you supply a list of column names that does not match the names in the
    underlying data, the names given here will overwrite them. The number
    of names given in the schema should match the underlying data dimensions.
schema_overrides : dict, default None
    Support type specification or override of one or more columns; note that
    any dtypes inferred from the columns param will be overridden.
rechunk : bool, default True
    Make sure that all data is in contiguous memory.
)r   r   r   )r   r   )r   r   r   r   r   s        r   _from_arrowDataFrame._from_arrow  s*    F ~~!1	
 	
r   )r   r   r   include_indexc               <    U R                  [        UUUUUUS95      $ )a  
Construct a Polars DataFrame from a pandas DataFrame.

Parameters
----------
data : pandas DataFrame
    Two-dimensional data represented as a pandas DataFrame.
schema : Sequence of str, (str,DataType) pairs, or a {str:DataType,} dict
    The DataFrame schema may be declared in several ways:

    * As a dict of {name:type} pairs; if type is None, it will be auto-inferred.
    * As a list of column names; in this case types are automatically inferred.
    * As a list of (name,type) pairs; this is equivalent to the dictionary form.

    If you supply a list of column names that does not match the names in the
    underlying data, the names given here will overwrite them. The number
    of names given in the schema should match the underlying data dimensions.
schema_overrides : dict, default None
    Support type specification or override of one or more columns; note that
    any dtypes inferred from the columns param will be overridden.
rechunk : bool, default True
    Make sure that all data is in contiguous memory.
nan_to_null : bool, default True
    If the data contains NaN values they will be converted to null/None.
include_index : bool, default False
    Load any non-default pandas indexes as columns.
)r   r   r   r   r   )r   r    )r   r   r   r   r   r   r   s          r   _from_pandasDataFrame._from_pandas=  s0    L ~~!1'+	
 		
r   columnc                P    U R                   R                  XR                  5        U $ )z,Replace a column by a new Series (in place).)r   replace_s)r   r   
new_columns      r   _replaceDataFrame._replacen  s    /r   c                L    U R                  [        R                  " X5      5      $ N)r   rg   _import_columns)r   pointerwidths      r   r   DataFrame._import_columnss  s    ~~k99'IJJr   c                    [         (       a  [        [        R                  5      S:  a  Sn[	        U5      e[        U 5      $ )a   
Create a plot namespace.

.. warning::
    This functionality is currently considered **unstable**. It may be
    changed at any point without it being considered a breaking change.

.. versionchanged:: 1.6.0
    In prior versions of Polars, HvPlot was the plotting backend. If you would
    like to restore the previous plotting functionality, all you need to do
    is add `import hvplot.polars` at the top of your script and replace
    `df.plot` with `df.hvplot`.

Polars does not implement plotting logic itself, but instead defers to
`Altair <https://altair-viz.github.io/>`_:

- `df.plot.line(**kwargs)`
  is shorthand for
  `alt.Chart(df).mark_line(tooltip=True).encode(**kwargs).interactive()`
- `df.plot.point(**kwargs)`
  is shorthand for
  `alt.Chart(df).mark_point(tooltip=True).encode(**kwargs).interactive()` (and
  `plot.scatter` is provided as an alias)
- `df.plot.bar(**kwargs)`
  is shorthand for
  `alt.Chart(df).mark_bar(tooltip=True).encode(**kwargs).interactive()`
- for any other attribute `attr`, `df.plot.attr(**kwargs)`
  is shorthand for
  `alt.Chart(df).mark_attr(tooltip=True).encode(**kwargs).interactive()`

For configuration, we suggest reading
`Chart Configuration <https://altair-viz.github.io/altair-tutorial/notebooks/08-Configuration.html>`_.
For example, you can:

- Change the width/height/title with
  ``.properties(width=500, height=350, title="My amazing plot")``.
- Change the x-axis label rotation with ``.configure_axisX(labelAngle=30)``.
- Change the opacity of the points in your scatter plot with
  ``.configure_point(opacity=.5)``.

Examples
--------
Scatter plot:

>>> df = pl.DataFrame(
...     {
...         "length": [1, 4, 6],
...         "width": [4, 5, 6],
...         "species": ["setosa", "setosa", "versicolor"],
...     }
... )
>>> df.plot.point(x="length", y="width", color="species")  # doctest: +SKIP

Set the x-axis title by using ``altair.X``:

>>> import altair as alt
>>> df.plot.point(
...     x=alt.X("length", title="Length"), y="width", color="species"
... )  # doctest: +SKIP

Line plot:

>>> from datetime import date
>>> df = pl.DataFrame(
...     {
...         "date": [date(2020, 1, 2), date(2020, 1, 3), date(2020, 1, 4)] * 2,
...         "price": [1, 4, 6, 1, 5, 2],
...         "stock": ["a", "a", "a", "b", "b", "b"],
...     }
... )
>>> df.plot.line(x="date", y="price", color="stock")  # doctest: +SKIP

Bar plot:

>>> df = pl.DataFrame(
...     {
...         "day": ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] * 2,
...         "group": ["a"] * 7 + ["b"] * 7,
...         "value": [1, 3, 2, 4, 5, 6, 1, 1, 3, 2, 4, 5, 1, 2],
...     }
... )
>>> df.plot.bar(
...     x="day", y="value", color="day", column="group"
... )  # doctest: +SKIP

Or, to make a stacked version of the plot above:

>>> df.plot.bar(x="day", y="value", color="group")  # doctest: +SKIP
)      r   z%altair>=5.4.0 is required for `.plot`)rM   r2   rU   __version__r^   r>   r   r   s     r   r   DataFrame.plotw  s8    x ! M&2D2D$E	$Q9C,S11T""r   c                ^    [         (       d  Sn[        U5      e[        R                  " U 5      $ )a  
Create a Great Table for styling.

.. warning::
    This functionality is currently considered **unstable**. It may be
    changed at any point without it being considered a breaking change.

Polars does not implement styling logic itself, but instead defers to
the Great Tables package. Please see the `Great Tables reference <https://posit-dev.github.io/great-tables/reference/>`_
for more information and documentation.

Examples
--------
Import some styling helpers, and create example data:

>>> import polars.selectors as cs
>>> from great_tables import loc, style
>>> df = pl.DataFrame(
...     {
...         "site_id": [0, 1, 2],
...         "measure_a": [5, 4, 6],
...         "measure_b": [7, 3, 3],
...     }
... )

Emphasize the site_id as row names:

>>> df.style.tab_stub(rowname_col="site_id")  # doctest: +SKIP

Fill the background for the highest measure_a value row:

>>> df.style.tab_style(
...     style.fill("yellow"),
...     loc.body(rows=pl.col("measure_a") == pl.col("measure_a").max()),
... )  # doctest: +SKIP

Put a spanner (high-level label) over measure columns:

>>> df.style.tab_spanner(
...     "Measures", cs.starts_with("measure")
... )  # doctest: +SKIP

Format measure_b values to two decimal places:

>>> df.style.fmt_number("measure_b", decimals=2)  # doctest: +SKIP
z%great_tables is required for `.style`)rN   ModuleNotFoundErrorrV   ro   r  s     r   r   DataFrame.style  s*    b '&9C%c**t$$r   c                6    U R                   R                  5       $ )zx
Get the shape of the DataFrame.

Examples
--------
>>> df = pl.DataFrame({"foo": [1, 2, 3, 4, 5]})
>>> df.shape
(5, 1)
)r   shaper   s    r   r  DataFrame.shape  s     xx~~r   c                6    U R                   R                  5       $ )z
Get the number of rows.

Returns
-------
int

Examples
--------
>>> df = pl.DataFrame({"foo": [1, 2, 3, 4, 5]})
>>> df.height
5
)r   heightr  s    r   r  DataFrame.height  s     xx  r   c                6    U R                   R                  5       $ )z
Get the number of columns.

Returns
-------
int

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [4, 5, 6],
...     }
... )
>>> df.width
2
)r   r   r  s    r   r   DataFrame.width-  s    ( xx~~r   c                6    U R                   R                  5       $ )u^  
Get or set column names.

Returns
-------
list of str
    A list containing the name of each column in order.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.columns
['foo', 'bar', 'ham']

Set column names:

>>> df.columns = ["apple", "banana", "orange"]
>>> df
shape: (3, 3)
┌───────┬────────┬────────┐
│ apple ┆ banana ┆ orange │
│ ---   ┆ ---    ┆ ---    │
│ i64   ┆ i64    ┆ str    │
╞═══════╪════════╪════════╡
│ 1     ┆ 6      ┆ a      │
│ 2     ┆ 7      ┆ b      │
│ 3     ┆ 8      ┆ c      │
└───────┴────────┴────────┘
)r   columnsr  s    r   r  DataFrame.columnsC  s    J xx!!r   c                :    U R                   R                  U5        g)z
Change the column names of the `DataFrame`.

Parameters
----------
names
    A list with new names for the `DataFrame`.
    The length of the list should be equal to the width of the `DataFrame`.
N)r   set_column_names)r   namess     r   r  r  j  s     	!!%(r   c                6    U R                   R                  5       $ )u"  
Get the column data types.

The data types can also be found in column headers when printing the DataFrame.

Returns
-------
list of DataType
    A list containing the data type of each column in order.

See Also
--------
schema

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6.0, 7.0, 8.0],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.dtypes
[Int64, Float64, String]
>>> df
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ 6.0 ┆ a   │
│ 2   ┆ 7.0 ┆ b   │
│ 3   ┆ 8.0 ┆ c   │
└─────┴─────┴─────┘
)r   dtypesr  s    r   r  DataFrame.dtypesw  s    N xx  r   c                ^    U R                    Vs0 s H  oX   R                  _M     sn$ s  snf )z
Get flags that are set on the columns of this DataFrame.

Returns
-------
dict
    Mapping from column names to column flags.
)r  flagsr   names     r   r  DataFrame.flags  s+     48<<@<4dj&&&<@@@s   *c                R    [        [        U R                  U R                  5      SS9$ )a*  
Get an ordered mapping of column names to their data type.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6.0, 7.0, 8.0],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.schema
Schema({'foo': Int64, 'bar': Float64, 'ham': String})
F)check_dtypes)rd   zipr  r  r  s    r   r   DataFrame.schema  s     " c$,,45IIr   c                
   Uc  Su  p4O%USL a  Su  p4OUSL a  Su  p4OSU< 3n[        U5      eU R                  X4S9nUbC  XR                  :w  a4  USL a  SUR                   S	U S
3n[        U5      eUR	                  U5      nU$ )a7  
Return a NumPy ndarray with the given data type.

This method ensures a Polars DataFrame can be treated as a NumPy ndarray.
It enables `np.asarray` and NumPy universal functions.

See the NumPy documentation for more information:
https://numpy.org/doc/stable/user/basics.interoperability.html#the-array-method
FTT)TTF)FFzinvalid input for `copy`: writable
allow_copyzcopy not allowed: cast from z to z prohibited)r   to_numpydtypeRuntimeError	__array__)r   r+  copyr(  r)  r   arrs          r   r-  DataFrame.__array__  s     <#. HjT\#- HjU]#/ Hj.th7CC. mmXmE))!3u}4SYYKtE7+V"3''--&C
r   c                B    U(       a  Sn[        U5      eSSKJn  U" XS9$ )a  
Convert to a dataframe object implementing the dataframe interchange protocol.

Parameters
----------
nan_as_null
    Overwrite null values in the data with `NaN`.

    .. warning::
        This functionality has not been implemented and the parameter will be
        removed in a future version.
        Setting this to `True` will raise a `NotImplementedError`.
allow_copy
    Allow memory to be copied to perform the conversion. If set to `False`,
    causes conversions that are not zero-copy to fail.

Notes
-----
Details on the Python dataframe interchange protocol:
https://data-apis.org/dataframe-protocol/latest/index.html

Examples
--------
Convert a Polars DataFrame to a generic dataframe object and access some
properties.

>>> df = pl.DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["x", "y"]})
>>> dfi = df.__dataframe__()
>>> dfi.num_rows()
2
>>> dfi.get_column(1).dtype
(<DtypeKind.FLOAT: 2>, 64, 'g', '=')
zfunctionality for `nan_as_null` has not been implemented and the parameter will be removed in a future version

Use the default `nan_as_null=False`.r   r   )r)  )NotImplementedErrorpolars.interchange.dataframer   )r   nan_as_nullr)  r   r   s        r   __dataframe__DataFrame.__dataframe__  s-    L ; 
 &c**@t;;r   c                p    [        U[        5      (       a  U R                  X5      $ U R                  X5      $ )z(Compare a DataFrame with another object.)r   r   _compare_to_other_df_compare_to_non_df)r   otherops      r   _compDataFrame._comp  s1    eY'',,U77**555r   c                   U R                   UR                   :w  a  Sn[        U5      eU R                  UR                  :w  a  Sn[        U5      eSnUR                  [        R
                  " 5       R                  R                  U5      5      n[        R                  " X/SS9nUS:X  aM  U R                    Vs/ s H4  n[        R                  " U5      [        R                  " U U 35      :H  PM6     nnGOUS:X  aM  U R                    Vs/ s H4  n[        R                  " U5      [        R                  " U U 35      :g  PM6     nnGOZUS:X  aM  U R                    Vs/ s H4  n[        R                  " U5      [        R                  " U U 35      :  PM6     nnGOUS	:X  aL  U R                    Vs/ s H4  n[        R                  " U5      [        R                  " U U 35      :  PM6     nnOUS
:X  aL  U R                    Vs/ s H4  n[        R                  " U5      [        R                  " U U 35      :  PM6     nnOcUS:X  aL  U R                    Vs/ s H4  n[        R                  " U5      [        R                  " U U 35      :*  PM6     nnOSU< 3n[        U5      eUR                  U5      $ s  snf s  snf s  snf s  snf s  snf s  snf )z+Compare a DataFrame with another DataFrame.zDataFrame columns do not matchz!DataFrame dimensions do not match__POLARS_CMP_OTHER
horizontalhoweqneqgtltgt_eqlt_equnexpected comparison operator )
r  r   r  selectFallr  suffixconcatra   )	r   r:  r;  r   rM  other_renamedcombinednexprs	            r   r8  DataFrame._compare_to_other_df  s1    <<5==(2CS/!::$5CS/!%QUUW\\%8%8%@A88T1|D:?C||L|!AEE!HF8n 55|DLD5[?C||L|!AEE!HF8n 55|DLD4Z>BllKlAEE!Hquus6(^44lDKD4Z>BllKlAEE!Hquus6(^44lDKD7]?C||L|!AEE!HF8n 55|DLD7]?C||L|!AEE!HF8n 55|DLD3B6:CS/!t$$ MLKKLLs$   8;J5;J:;J?1;K;K	;Kc                V   [        U5        US:X  a'  U R                  [        R                  " 5       U:H  5      $ US:X  a'  U R                  [        R                  " 5       U:g  5      $ US:X  a'  U R                  [        R                  " 5       U:  5      $ US:X  a'  U R                  [        R                  " 5       U:  5      $ US:X  a'  U R                  [        R                  " 5       U:  5      $ US:X  a'  U R                  [        R                  " 5       U:*  5      $ SU< 3n[	        U5      e)z0Compare a DataFrame with a non-DataFrame object.rC  rD  rE  rF  rG  rH  rI  )r6   rJ  rK  rL  r   )r   r:  r;  r   s       r   r9  DataFrame._compare_to_non_df=  s     	U#:;;quuw%/005[;;quuw%/004Z;;quuw//4Z;;quuw//7];;quuw%/007];;quuw%/003B6:CS/!r   c          	        [        U[        R                  5      (       ag  U(       a0  U R                  [        R
                  " 5       [        U5      -  5      $ U R                  [        R
                  " 5       [        U5      -  5      $ [        U[        5      (       dS  [        XR                  S9n[        [        U R                  5       Vs/ s H  oCR                  SU 35      PM     sn5      nUR                  nU R                  U[        [         5      nU R#                  U R$                  R'                  UR$                  5      5      nU(       d  UOOUR)                  U Vs/ s H3  o3R*                  R-                  5       (       d  M#  UR/                  5       PM5     sn5      nU(       a  [1        U R2                  R5                  5       5       VVVs/ s H^  u  nu  pxUR7                  5       (       d  M  XT   R7                  5       (       d  XT   [8        :X  d  MD  [;        U5      R=                  U5      PM`     n	nnnU	(       a  UR)                  U	5      $ U$ s  snf s  snf s  snnnf )N)lengthrQ  )r   r   ru   rJ  rK  rL  rb   r   _prepare_other_argr  ranger   aliasr  _cast_all_from_torL   rB   r   r   div_dfwith_columnsr+  is_floatfloor	enumerater   items
is_integerrE   ra   r   )
r   r:  floordivsiorig_dtypesr   r   tp	int_castss
             r   _divDataFrame._divT  s   eRYY''{{1557c%j#899;;quuwU344E9--"5=Atzz9JK9JAww1#w/9JKLEll&&ungF__TXX__UYY78  R!NR77;K;K;M)!'')R!NO 	
  (11B1B1D'E'EOA|==? % !^..00KNd4J %F  $'E   y11	+ L "Os$   I""III3!IIc                    U Vs/ s H>  oDR                   U;   d  M  UR                  U5      R                  UR                  5      PM@     nnU(       a  UR	                  U5      $ U$ s  snf r   )r+  r   rZ  r  r]  )r   r   from_tord  castss         r   r[  DataFrame._cast_all_from_tos  sW     46J2aE9I)!!!&&)2J).ru%6B6 Ks
   A&.A&c                "    U R                  USS9$ )NTrc  ri  r   r:  s     r   __floordiv__DataFrame.__floordiv__y  s    yyy..r   c                "    U R                  USS9$ )NFrq  rr  rs  s     r   __truediv__DataFrame.__truediv__|  s    yyy//r   c                    Sn[        U5      e)Nzqthe truth value of a DataFrame is ambiguous

Hint: to check if a DataFrame contains any values, use `is_empty()`.)r   r  s     r   __bool__DataFrame.__bool__  s    W 	 nr   c                &    U R                  US5      $ )NrC  r<  rs  s     r   __eq__DataFrame.__eq__      zz%&&r   c                &    U R                  US5      $ )NrD  r}  rs  s     r   __ne__DataFrame.__ne__  s    zz%''r   c                &    U R                  US5      $ )NrE  r}  rs  s     r   __gt__DataFrame.__gt__  r  r   c                &    U R                  US5      $ )NrF  r}  rs  s     r   __lt__DataFrame.__lt__  r  r   c                &    U R                  US5      $ )NrG  r}  rs  s     r   __ge__DataFrame.__ge__      zz%))r   c                &    U R                  US5      $ )NrH  r}  rs  s     r   __le__DataFrame.__le__  r  r   c                "    U R                  5       $ r   )	serializer  s    r   __getstate__DataFrame.__getstate__  s    ~~r   c                V    U R                  [        U5      5      R                  U l        g r   )r   r
   r   )r   states     r   __setstate__DataFrame.__setstate__  s    ##GEN377r   c                   [        U[        5      (       a4  U R                  U R                  R	                  UR                  5      5      $ [        U5      nU R                  U R                  R                  UR                  5      5      $ r   )r   r   r   r   mul_dfrX  mulr   rs  s     r   __mul__DataFrame.__mul__  sY    eY''??488??599#=>>"5)txx||EHH566r   c                
    X-  $ r    rs  s     r   __rmul__DataFrame.__rmul__  s
    |r   c                   [        U[        5      (       a4  U R                  U R                  R	                  UR                  5      5      $ [        U5      nU R                  U R                  R                  UR                  5      5      $ r   )r   r   r   r   add_dfrX  addr   rs  s     r   __add__DataFrame.__add__  s[     eY''??488??599#=>>"5)txx||EHH566r   c                    [        U[        5      (       aI  U R                  [        U5      [        R
                  " S5      -   R                  R                  5       5      $ X-   $ )N*)r   r   rJ  rb   rK  ra   r  keeprs  s     r   __radd__DataFrame.__radd__  sH     eS!!;;E
QUU3Z 7==BBDEE|r   c                   [        U[        5      (       a4  U R                  U R                  R	                  UR                  5      5      $ [        U5      nU R                  U R                  R                  UR                  5      5      $ r   )r   r   r   r   sub_dfrX  subr   rs  s     r   __sub__DataFrame.__sub__  Y    eY''??488??599#=>>"5)txx||EHH566r   c                   [        U[        5      (       a4  U R                  U R                  R	                  UR                  5      5      $ [        U5      nU R                  U R                  R                  UR                  5      5      $ r   )r   r   r   r   rem_dfrX  remr   rs  s     r   __mod__DataFrame.__mod__  r  r   c                6    U R                   R                  5       $ r   )r   as_strr  s    r   __str__DataFrame.__str__  s    xx  r   c                "    U R                  5       $ r   )r  r  s    r   __repr__DataFrame.__repr__  s    ||~r   c                    XR                   ;   $ r   r  r   keys     r   __contains__DataFrame.__contains__  s    ll""r   c                "    U R                  5       $ r   )iter_columnsr  s    r   __iter__DataFrame.__iter__  s      ""r   c                4    [        U R                  5       5      $ r   )reversedget_columnsr  s    r   __reversed__DataFrame.__reversed__  s    ((*++r   c                    g r   r  r  s     r   __getitem__DataFrame.__getitem__  s     r   c                    g r   r  r  s     r   r  r    s     r   c                    g r   r  r  s     r   r  r         r   c                    [        X5      $ )u  
Get part of the DataFrame as a new DataFrame, Series, or scalar.

Parameters
----------
key
    Rows / columns to select. This is easiest to explain via example. Suppose
    we have a DataFrame with columns `'a'`, `'d'`, `'c'`, `'d'`. Here is what
    various types of `key` would do:

    - `df[0, 'a']` extracts the first element of column `'a'` and returns a
      scalar.
    - `df[0]` extracts the first row and returns a Dataframe.
    - `df['a']` extracts column `'a'` and returns a Series.
    - `df[0:2]` extracts the first two rows and returns a Dataframe.
    - `df[0:2, 'a']` extracts the first two rows from column `'a'` and returns
      a Series.
    - `df[0:2, 0]` extracts the first two rows from the first column and returns
      a Series.
    - `df[[0, 1], [0, 1, 2]]` extracts the first two rows and the first three
      columns and returns a Dataframe.
    - `df[0: 2, ['a', 'c']]` extracts the first two rows from columns `'a'` and
      `'c'` and returns a Dataframe.
    - `df[:, 0: 2]` extracts all rows from the first two columns and returns a
      Dataframe.
    - `df[:, 'a': 'c']` extracts all rows and all columns positioned between
      `'a'` and `'c'` *inclusive* and returns a Dataframe. In our example,
      that would extract columns `'a'`, `'d'`, and `'c'`.

Returns
-------
DataFrame, Series, or scalar, depending on `key`.

Examples
--------
>>> df = pl.DataFrame(
...     {"a": [1, 2, 3], "d": [4, 5, 6], "c": [1, 3, 2], "b": [7, 8, 9]}
... )
>>> df[0]
shape: (1, 4)
┌─────┬─────┬─────┬─────┐
│ a   ┆ d   ┆ c   ┆ b   │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╪═════╡
│ 1   ┆ 4   ┆ 1   ┆ 7   │
└─────┴─────┴─────┴─────┘
>>> df[0, "a"]
1
>>> df["a"]
shape: (3,)
Series: 'a' [i64]
[
    1
    2
    3
]
>>> df[0:2]
shape: (2, 4)
┌─────┬─────┬─────┬─────┐
│ a   ┆ d   ┆ c   ┆ b   │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╪═════╡
│ 1   ┆ 4   ┆ 1   ┆ 7   │
│ 2   ┆ 5   ┆ 3   ┆ 8   │
└─────┴─────┴─────┴─────┘
>>> df[0:2, "a"]
shape: (2,)
Series: 'a' [i64]
[
    1
    2
]
>>> df[0:2, 0]
shape: (2,)
Series: 'a' [i64]
[
    1
    2
]
>>> df[[0, 1], [0, 1, 2]]
shape: (2, 3)
┌─────┬─────┬─────┐
│ a   ┆ d   ┆ c   │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 1   ┆ 4   ┆ 1   │
│ 2   ┆ 5   ┆ 3   │
└─────┴─────┴─────┘
>>> df[0:2, ["a", "c"]]
shape: (2, 2)
┌─────┬─────┐
│ a   ┆ c   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 1   │
│ 2   ┆ 3   │
└─────┴─────┘
>>> df[:, 0:2]
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ d   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 4   │
│ 2   ┆ 5   │
│ 3   ┆ 6   │
└─────┴─────┘
>>> df[:, "a":"c"]
shape: (3, 3)
┌─────┬─────┬─────┐
│ a   ┆ d   ┆ c   │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 1   ┆ 4   ┆ 1   │
│ 2   ┆ 5   ┆ 3   │
│ 3   ┆ 6   ┆ 2   │
└─────┴─────┴─────┘
r'   r  s     r   r  r    s    R "$,,r   c           
        [        U[        5      (       a  Sn[        U5      e[        U[        5      (       a  [        R
                  " U5      nUR                  S:w  a  Sn[        U5      eUR                  S   [        U5      :w  a  Sn[        U5      e/ n[        U5       H1  u  pVUR                  [        R                  " XbSS2U4   5      5        M3     U R                  U5      R                  U l        g[        U[         5      (       a  Uu  px[        U[        R                  5      (       a  UR"                  [$        :X  d  ['        U5      (       a  Sn[        U5      e[        U[        5      (       a  U R)                  U5      n	O0[        U[*        5      (       a
  U SS2U4   n	OSU< 3n[        U5      eX)U'   [        U[*        5      (       a  U R-                  X5        g[        U[        5      (       a  U R/                  X5        ggS	U< S
[1        U5      R2                  < SU< S
[1        U5      R2                  < 3n[        U5      e)ug
  
Modify DataFrame elements in place, using assignment syntax.

Parameters
----------
key : str | Sequence[int] | Sequence[str] | tuple[Any, str | int]
    Specifies the location(s) within the DataFrame to assign new values.
    The behavior varies based on the type of `key`:

    - Str: `df["a"] = value`:
        Not supported. Raises a `TypeError`. Use `df.with_columns(...)`
        to add or modify columns.

    - Sequence[str]: `df[["a", "b"]] = value`:
        Assigns multiple columns at once. `value` must be a 2D array-like
        structure with the same number of columns as the list
        of column names provided.

    - tuple[Any, str | int]: `df[row_idx, "a"] = value`:
        Assigns a new value to a specific element in the DataFrame, where
        `row_idx` specifies the row and `"a"` specifies the column.

    - `df[row_idx, col_idx] = value`:
        Similar to the above, but `col_idx` is the integer index of the column.

value : Any
    The new value(s) to assign. The expected structure of `value` depends on the
    form of `key`:

    - For multiple column assignment (`df[["a", "b"]] = value`), `value` should
      be a 2D array-like object with shape (n_rows, n_columns).

    - For single element assignment (`df[row_idx, "a"] = value`), `value` should
      be a scalar.

Raises
------
TypeError
    If an unsupported assignment is attempted, such as assigning a Series
    directly to a column using `df["a"] = series`.

ValueError
    If the shape of `value` does not match the expected shape based on `key`.

Examples
--------
Sequence[str] :  `df[["a", "b"]] = value`:

>>> import numpy as np
>>> df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
>>> df[["a", "b"]] = np.array([[10, 40], [20, 50], [30, 60]])
>>> df
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 10  ┆ 40  │
│ 20  ┆ 50  │
│ 30  ┆ 60  │
└─────┴─────┘

tuple[Any, str | int] : `df[row_idx, "a"] = value`:

>>> df[1, "a"] = 100
>>> df
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 10  ┆ 40  │
│ 100 ┆ 50  │
│ 30  ┆ 60  │
└─────┴─────┘

`df[row_idx, col_idx] = value`:

>>> df[0, 1] = 30
>>> df
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 10  ┆ 30  │
│ 100 ┆ 50  │
│ 30  ┆ 60  │
└─────┴─────┘
z]DataFrame object does not support `Series` assignment by index

Use `DataFrame.with_columns`.   z,can only set multiple columns with 2D matrix   zEmatrix columns should be equal to list used to determine column namesNzjnot allowed to set DataFrame by boolean mask in the row position

Consider using `DataFrame.with_columns`.zunexpected column selection z/cannot use `__setitem__` on DataFrame with key z	 of type z and value )r   r   r   r   r   arrayndimr   r  lenr`  appendr   ru   r]  r   r   r+  r@   r/   r  intreplace_columnr   r   r   )
r   r  valuer   r  re  r  row_selectioncol_selectionrd  s
             r   __setitem__DataFrame.__setitem__u  s   F c34  C.  T""HHUOEzzQD o%{{1~S)] o% G$S>ryyQT{;< *((155DH U##+.(M ="))449L9LPW9W!-00C   n$ ---$$]3M3//M)*4]4EFn$  %m ---##M5M3//m/ 0 G9T#Y-?-?,BeYiU0D0D/GI 
 C. r   c                    U R                   $ r   )r  r  s    r   __len__DataFrame.__len__  s    {{r   c                "    U R                  5       $ r   cloner  s    r   __copy__DataFrame.__copy__      zz|r   c                "    U R                  5       $ r   r  )r   memos     r   __deepcopy__DataFrame.__deepcopy__  r  r   c                    U R                   $ r   r  r  s    r   _ipython_key_completions_#DataFrame._ipython_key_completions_!  s    ||r   c                8    U R                   R                  U5      $ )z
Export a DataFrame via the Arrow PyCapsule Interface.

https://arrow.apache.org/docs/dev/format/CDataInterface/PyCapsuleInterface.html
)r   r   )r   requested_schemas     r   r   DataFrame.__arrow_c_stream__$  s     xx**+;<<r   )_from_seriesc          	     8   [        [        R                  R                  SSS95      nUS:  a  U R                  n[        [        R                  R                  SSS95      nUS:  a  U R
                  nSR                  [        U UUUS9R                  5       5      $ )	z
Format output data in HTML for display in Jupyter Notebooks.

Output rows and columns can be modified by setting the following ENVIRONMENT
variables:

* POLARS_FMT_MAX_COLS: set the number of columns
* POLARS_FMT_MAX_ROWS: set the number of rows
POLARS_FMT_MAX_COLSK   defaultr   POLARS_FMT_MAX_ROWSr    )max_colsmax_rowsfrom_series)	r  osenvirongetr   r  joinr:   render)r   r  r  r  s       r   _repr_html_DataFrame._repr_html_,  s     rzz~~&;R~HIa<zzHrzz~~&;R~HIa<{{Hww!!(	
 fh
 	
r   c                    U R                   $ )a  
Get an ordered mapping of column names to their data type.

This is an alias for the :attr:`schema` property.

See Also
--------
schema

Notes
-----
This method is included to facilitate writing code that is generic for both
DataFrame and LazyFrame.

Examples
--------
Determine the schema.

>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6.0, 7.0, 8.0],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.collect_schema()
Schema({'foo': Int64, 'bar': Float64, 'ham': String})

Access various properties of the schema using the :class:`Schema` object.

>>> schema = df.collect_schema()
>>> schema["bar"]
Float64
>>> schema.names()
['foo', 'bar', 'ham']
>>> schema.dtypes()
[Int64, Float64, String]
>>> schema.len()
3
r   r  s    r   collect_schemaDataFrame.collect_schemaG  s    R {{r   c                   UcX  UcU  U R                   S:w  a  SU R                   < 3n[        U5      eU R                  R                  S5      R	                  S5      $ Ub  Uc  Sn[        U5      e[        U[        5      (       a  U R                  R                  U5      OU R                  R                  U5      nUR                  U5      $ )aY  
Return the DataFrame as a scalar, or return the element at the given row/column.

Parameters
----------
row
    Optional row index.
column
    Optional column index or name.

See Also
--------
row : Get the values of a single row, either by index or by predicate.

Notes
-----
If row/col not provided, this is equivalent to `df[0,0]`, with a check that
the shape is (1,1). With row/col, this is equivalent to `df[row,col]`.

Examples
--------
>>> df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
>>> df.select((pl.col("a") * pl.col("b")).sum()).item()
32
>>> df.item(1, 1)
5
>>> df.item(2, "b")
6
)r  r  zycan only call `.item()` if the dataframe is of shape (1, 1), or if explicit row/col values are provided; frame has shape r   z8cannot call `.item()` with only one of `row` or `column`)	r  r   r   	to_series	get_indexr   r  
get_columnget_index_signed)r   rowr   r   rd  s        r   itemDataFrame.itemr  s    < ;6>zzV#((,

~7 
 !o%88%%a(22155[FNLCS/! &#&& HHv&$$V, 	

 !!#&&r   futurecompat_levelz1.1versionr  c                  U R                   (       d  [        R                  " 0 5      $ Uc  SnO![        U[        5      (       a  UR
                  nU R                  R                  W5      n[        R                  R                  U5      $ )aP  
Collect the underlying arrow arrays in an Arrow Table.

This operation is mostly zero copy.

Data types that do copy:
    - CategoricalType

.. versionchanged:: 1.1
    The `future` parameter was renamed `compat_level`.

Parameters
----------
compat_level
    Use a specific compatibility level
    when exporting Polars' internal data structures.

Examples
--------
>>> df = pl.DataFrame(
...     {"foo": [1, 2, 3, 4, 5, 6], "bar": ["a", "b", "c", "d", "e", "f"]}
... )
>>> df.to_arrow()
pyarrow.Table
foo: int64
bar: large_string
----
foo: [[1,2,3,4,5,6]]
bar: [["a","b","c","d","e","f"]]
F)
r   r   tabler   rc   _versionr   to_arrowr   from_batches)r   r  compat_level_pyrecord_batchess       r   r  DataFrame.to_arrow  sh    @ zz88B< #Ok22*33O**?;xx$$^44r   .)	as_seriesc                   g r   r  r   r   s     r   to_dictDataFrame.to_dict  s    ORr   c                   g r   r  r"  s     r   r#  r$        MPr   c                   g r   r  r"  s     r   r#  r$    s     47r   c                   U(       a  U  Vs0 s H  o"R                   U_M     sn$ U  Vs0 s H  o"R                   UR                  5       _M     sn$ s  snf s  snf )u*  
Convert DataFrame to a dictionary mapping column name to values.

Parameters
----------
as_series
    True -> Values are Series
    False -> Values are List[Any]

See Also
--------
rows_by_key
to_dicts

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "A": [1, 2, 3, 4, 5],
...         "fruits": ["banana", "banana", "apple", "apple", "banana"],
...         "B": [5, 4, 3, 2, 1],
...         "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
...         "optional": [28, 300, None, 2, -30],
...     }
... )
>>> df
shape: (5, 5)
┌─────┬────────┬─────┬────────┬──────────┐
│ A   ┆ fruits ┆ B   ┆ cars   ┆ optional │
│ --- ┆ ---    ┆ --- ┆ ---    ┆ ---      │
│ i64 ┆ str    ┆ i64 ┆ str    ┆ i64      │
╞═════╪════════╪═════╪════════╪══════════╡
│ 1   ┆ banana ┆ 5   ┆ beetle ┆ 28       │
│ 2   ┆ banana ┆ 4   ┆ audi   ┆ 300      │
│ 3   ┆ apple  ┆ 3   ┆ beetle ┆ null     │
│ 4   ┆ apple  ┆ 2   ┆ beetle ┆ 2        │
│ 5   ┆ banana ┆ 1   ┆ beetle ┆ -30      │
└─────┴────────┴─────┴────────┴──────────┘
>>> df.to_dict(as_series=False)
{'A': [1, 2, 3, 4, 5],
'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'],
'B': [5, 4, 3, 2, 1],
'cars': ['beetle', 'audi', 'beetle', 'beetle', 'beetle'],
'optional': [28, 300, None, 2, -30]}
>>> df.to_dict(as_series=True)
{'A': shape: (5,)
Series: 'A' [i64]
[
    1
    2
    3
    4
    5
], 'fruits': shape: (5,)
Series: 'fruits' [str]
[
    "banana"
    "banana"
    "apple"
    "apple"
    "banana"
], 'B': shape: (5,)
Series: 'B' [i64]
[
    5
    4
    3
    2
    1
], 'cars': shape: (5,)
Series: 'cars' [str]
[
    "beetle"
    "audi"
    "beetle"
    "beetle"
    "beetle"
], 'optional': shape: (5,)
Series: 'optional' [i64]
[
    28
    300
    null
    2
    -30
]}
)r  to_list)r   r   rd  s      r   r#  r$    sM    t '+,t!FFAIt,,156AFFAIIK'66 -6s
   A$Ac                     U R                  SS9$ )u  
Convert every row to a dictionary of Python-native values.

Notes
-----
If you have `ns`-precision temporal values you should be aware that Python
natively only supports up to `μs`-precision; `ns`-precision values will be
truncated to microseconds on conversion to Python. If this matters to your
use-case you should export to a different format (such as Arrow or NumPy).

Examples
--------
>>> df = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
>>> df.to_dicts()
[{'foo': 1, 'bar': 4}, {'foo': 2, 'bar': 5}, {'foo': 3, 'bar': 6}]
Tnamed)rowsr  s    r   to_dictsDataFrame.to_dicts;  s    " yyty$$r   fortran)orderr(  r)  
structureduse_pyarrowc                  Ub
  [        SSS9  U(       GaY  U(       d"  U R                  5       (       d  Sn[        U5      e/ n/ nU R                  5        H  n	U	R                  [
        :X  a*  U	R                  R                  5       R                  SSUS9n
OU	R                  US9n
U	R                  [        :X  a)  U	R                  5       (       d  U
R                  [        S	S
9n
UR                  U
5        UR                  U	R                  U
R                  U
R                  SS 45        M     [         R"                  " U R$                  US9n['        U R(                  5       H  u  pX|   X'   M     U$ U R*                  R                  XUS9$ )a  
Convert this DataFrame to a NumPy ndarray.

This operation copies data only when necessary. The conversion is zero copy when
all of the following hold:

- The DataFrame is fully contiguous in memory, with all Series back-to-back and
  all Series consisting of a single chunk.
- The data type is an integer or float.
- The DataFrame contains no null values.
- The `order` parameter is set to `fortran` (default).
- The `writable` parameter is set to `False` (default).

Parameters
----------
order
    The index order of the returned NumPy array, either C-like or
    Fortran-like. In general, using the Fortran-like index order is faster.
    However, the C-like order might be more appropriate to use for downstream
    applications to prevent cloning data, e.g. when reshaping into a
    one-dimensional array.
writable
    Ensure the resulting array is writable. This will force a copy of the data
    if the array was created without copy, as the underlying Arrow data is
    immutable.
allow_copy
    Allow memory to be copied to perform the conversion. If set to `False`,
    causes conversions that are not zero-copy to fail.
structured
    Return a `structured array`_ with a data type that corresponds to the
    DataFrame schema. If set to `False` (default), a 2D ndarray is
    returned instead.

    .. _structured array: https://numpy.org/doc/stable/user/basics.rec.html

use_pyarrow
    Use `pyarrow.Array.to_numpy
    <https://arrow.apache.org/docs/python/generated/pyarrow.Array.html#pyarrow.Array.to_numpy>`_

    function for the conversion to NumPy if necessary.

    .. deprecated:: 0.20.28
        Polars now uses its native engine by default for conversion to NumPy.

Examples
--------
Numeric data without nulls can be converted without copying data in some cases.
The resulting array will not be writable.

>>> df = pl.DataFrame({"a": [1, 2, 3]})
>>> arr = df.to_numpy()
>>> arr
array([[1],
       [2],
       [3]])
>>> arr.flags.writeable
False

Set `writable=True` to force data copy to make the array writable.

>>> df.to_numpy(writable=True).flags.writeable
True

If the DataFrame contains different numeric data types, the resulting data type
will be the supertype. This requires data to be copied. Integer types with
nulls are cast to a float type with `nan` representing a null value.

>>> df = pl.DataFrame({"a": [1, 2, None], "b": [4.0, 5.0, 6.0]})
>>> df.to_numpy()
array([[ 1.,  4.],
       [ 2.,  5.],
       [nan,  6.]])

Set `allow_copy=False` to raise an error if data would be copied.

>>> s.to_numpy(allow_copy=False)  # doctest: +SKIP
Traceback (most recent call last):
...
RuntimeError: copy not allowed: cannot convert to a NumPy array without copying data

Polars defaults to F-contiguous order. Use `order="c"` to force the resulting
array to be C-contiguous.

>>> df.to_numpy(order="c").flags.c_contiguous
True

DataFrames with mixed types will result in an array with an object dtype.

>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6.5, 7.0, 8.5],
...         "ham": ["a", "b", "c"],
...     },
...     schema_overrides={"foo": pl.UInt8, "bar": pl.Float32},
... )
>>> df.to_numpy()
array([[1, 6.5, 'a'],
       [2, 7.0, 'b'],
       [3, 8.5, 'c']], dtype=object)

Set `structured=True` to convert to a structured array, which can better
preserve individual column data such as name and data type.

>>> df.to_numpy(structured=True)
array([(1, 6.5, 'a'), (2, 7. , 'b'), (3, 8.5, 'c')],
      dtype=[('foo', 'u1'), ('bar', '<f4'), ('ham', '<U1')])
Nzthe `use_pyarrow` parameter for `DataFrame.to_numpy` is deprecated. Polars now uses its native engine by default for conversion to NumPy.z0.20.28r  zEcopy not allowed: cannot create structured array without copying dataT)r2  r)  r3  )r3  F)r.  r  )r+  r'  )r&   is_emptyr,  r  r+  rH   structunnestr*  rG   	has_nullsastyper   r  r  r  r   emptyr  r`  r  r   )r   r1  r(  r)  r2  r3  r   arraysstruct_dtyperd  r/  outidxcs                 r   r*  DataFrame.to_numpyN  sP   j "%Y! dmmoo]"3''FL&&(77f$((//+44#'#'$/ 5 C ***=C77f$Q[[]]**Su*5Cc"##QVVSYY		!"$FG ) ((4;;l;C#DLL1 2Jxx  j QQr   )devicelabelfeaturesr+  r1  c                   g r   r  r   return_typerA  rB  rC  r+  r1  s          r   to_jaxDataFrame.to_jax  r  r   c                   g r   r  rE  s          r   rG  rH    s      #r   c                  US:w  a  Uc  Ub  Sn[        U5      eUS:X  a  Uc  Ub  Sn[        U5      e[        SSS9nUR                  R                  =(       d2    [	        [        [        R                  R                  SS	5      5      5      n	U(       a  U R                  U5      n
O9U	(       d0  U R                  [        [        [        [        [        [        05      n
OU n
[!        U["        5      (       a  UR%                  U5      S
   nUc  [&        R(                  " 5       OUR+                  U5         US:X  a2  S
SKJn  U" U
USSS9nUR0                  R3                  USS9sSSS5        $ US:X  a  Ubh  U
R5                  U5      nUb  U
R5                  U5      OU
R6                  " UR8                  6 nUR;                  5       UR;                  5       S.sSSS5        $ U
 Vs0 s H  oR<                  UR;                  5       _M     snsSSS5        $ SR?                  [A        [B        5      5      nSU< SU 3n[        U5      es  snf ! , (       d  f       g= f)a&  
Convert DataFrame to a Jax Array, or dict of Jax Arrays.

.. versionadded:: 0.20.27

.. warning::
    This functionality is currently considered **unstable**. It may be
    changed at any point without it being considered a breaking change.

Parameters
----------
return_type : {"array", "dict"}
    Set return type; a Jax Array, or dict of Jax Arrays.
device
    Specify the jax `Device` on which the array will be created; can provide
    a string (such as "cpu", "gpu", or "tpu") in which case the device is
    retrieved as `jax.devices(string)[0]`. For more specific control you
    can supply the instantiated `Device` directly. If None, arrays are
    created on the default device.
label
    One or more column names, expressions, or selectors that label the feature
    data; results in a `{"label": ..., "features": ...}` dict being returned
    when `return_type` is "dict" instead of a `{"col": array, }` dict.
features
    One or more column names, expressions, or selectors that contain the feature
    data; if omitted, all columns that are not designated as part of the label
    are used. Only applies when `return_type` is "dict".
dtype
    Unify the dtype of all returned arrays; this casts any column that is
    not already of the required dtype before converting to Array. Note that
    export will be single-precision (32bit) unless the Jax config/environment
    directs otherwise (eg: "jax_enable_x64" was set True in the config object
    at startup, or "JAX_ENABLE_X64" is set to "1" in the environment).
order : {"c", "fortran"}
    The index order of the returned Jax array, either C-like (row-major) or
    Fortran-like (column-major).

See Also
--------
to_dummies
to_numpy
to_torch

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "lbl": [0, 1, 2, 3],
...         "feat1": [1, 0, 0, 1],
...         "feat2": [1.5, -0.5, 0.0, -2.25],
...     }
... )

Standard return type (2D Array), on the standard device:

>>> df.to_jax()
Array([[ 0.  ,  1.  ,  1.5 ],
       [ 1.  ,  0.  , -0.5 ],
       [ 2.  ,  0.  ,  0.  ],
       [ 3.  ,  1.  , -2.25]], dtype=float32)

Create the Array on the default GPU device:

>>> a = df.to_jax(device="gpu")  # doctest: +SKIP
>>> a.device()  # doctest: +SKIP
GpuDevice(id=0, process_index=0)

Create the Array on a specific GPU device:

>>> gpu_device = jax.devices("gpu")[1]  # doctest: +SKIP
>>> a = df.to_jax(device=gpu_device)  # doctest: +SKIP
>>> a.device()  # doctest: +SKIP
GpuDevice(id=1, process_index=0)

As a dictionary of individual Arrays:

>>> df.to_jax("dict")
{'lbl': Array([0, 1, 2, 3], dtype=int32),
 'feat1': Array([1, 0, 0, 1], dtype=int32),
 'feat2': Array([ 1.5 , -0.5 ,  0.  , -2.25], dtype=float32)}

As a "label" and "features" dictionary; note that as "features" is not
declared, it defaults to all the columns that are not in "label":

>>> df.to_jax("dict", label="lbl")
{'label': Array([[0],
        [1],
        [2],
        [3]], dtype=int32),
 'features': Array([[ 1.  ,  1.5 ],
        [ 0.  , -0.5 ],
        [ 0.  ,  0.  ],
        [ 1.  , -2.25]], dtype=float32)}

As a "label" and "features" dictionary where each is designated using
a col or selector expression (which can also be used to cast the data
if the label and features are better-represented with different dtypes):

>>> import polars.selectors as cs
>>> df.to_jax(
...     return_type="dict",
...     features=cs.float(),
...     label=pl.col("lbl").cast(pl.UInt8),
... )
{'label': Array([[0],
        [1],
        [2],
        [3]], dtype=uint8),
 'features': Array([[ 1.5 ],
        [-0.5 ],
        [ 0.  ],
        [-2.25]], dtype=float32)}
r   Nz>`label` and `features` only apply when `return_type` is 'dict'B`label` is required if setting `features` when `return_type='dict'jaxzPlease see `https://jax.readthedocs.io/en/latest/installation.html` for specific installation recommendations for the Jax package)install_messageJAX_ENABLE_X640r   r  frame_to_numpyFz	Jax Array)r   r1  r(  targetK)ar1  rB  rC  , invalid `return_type`: 
Expected one of: )"r   rW   configjax_enable_x64boolr  r   r  r  r   rB   rA   rD   rC   rK   rJ   r   r   devices
contextlibnullcontextdefault_devicepolars.ml.utilitiesrQ  rY   asarrayrJ  dropr  rG  r  r  r   r   )r   rF  rA  rB  rC  r+  r1  r   jxenabled_double_precisionframerQ  r/  label_framefeatures_framesrsvalid_jax_typess                    r   rG  rH    s+   x & e&78;ORCS/!F"u}9MVCS/!L

 $&99#;#; $
t

/56@
  IIe$E)IIwvvNOEEfc""ZZ'*F)/Z##%R=N=Nv=VVg%>$"&	 xx''#S'9 WV &$"',,u"5K $/ X."ZZ)<)<= # "-!3!3!5$2$9$9$;- WV8 ?DDesHHcjjl2eD9 WV< #'))H],C"D/>QRaQbc o%	 E9 WVs+   0/I)A'II$I I7I
I)rB  rC  r+  c                   g r   r  r   rF  rB  rC  r+  s        r   to_torchDataFrame.to_torch  s     r   c                   g r   r  rk  s        r   rl  rm    s     r   c                   g r   r  rk  s        r   rl  rm    s     #&r   c               <   US;  a  Uc  Ub  Sn[        U5      eUS:X  a  Uc  Ub  Sn[        U5      e[        S5      nU[        [        [        4;   a  SU 3n[        U5      eU=(       d     [        [
        [        [        [        [        0nUb  U R                  U5      n[        U[        R                  5      (       d  UR                  U5      nUb  U R                  U5      OU R                  " UR                  6 R                  U5      n	[        R                  " X/SS	9n
O%Ub  U R                  U5      OU R                  U5      n
US
:X  a   SSKJn  U" U
SSS9nUR$                  " U5      $ US:X  aO  Ub!  WR'                  5       W	R'                  5       S.$ U
 Vs0 s H  oR(                  UR'                  5       _M     sn$ US:X  a  SSKJn  Uc  SOWR                  nU" XUS9$ SR/                  [1        [2        5      5      nSU< SU 3n[        U5      es  snf )a  
Convert DataFrame to a PyTorch Tensor, Dataset, or dict of Tensors.

.. versionadded:: 0.20.23

.. warning::
    This functionality is currently considered **unstable**. It may be
    changed at any point without it being considered a breaking change.

Parameters
----------
return_type : {"tensor", "dataset", "dict"}
    Set return type; a PyTorch Tensor, PolarsDataset (a frame-specialized
    TensorDataset), or dict of Tensors.
label
    One or more column names, expressions, or selectors that label the feature
    data; when `return_type` is "dataset", the PolarsDataset will return
    `(features, label)` tensor tuples for each row. Otherwise, it returns
    `(features,)` tensor tuples where the feature contains all the row data.
features
    One or more column names, expressions, or selectors that contain the feature
    data; if omitted, all columns that are not designated as part of the label
    are used.
dtype
    Unify the dtype of all returned tensors; this casts any column that is
    not of the required dtype before converting to Tensor. This includes
    the label column *unless* the label is an expression (such as
    `pl.col("label_column").cast(pl.Int16)`).

See Also
--------
to_dummies
to_jax
to_numpy

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "lbl": [0, 1, 2, 3],
...         "feat1": [1, 0, 0, 1],
...         "feat2": [1.5, -0.5, 0.0, -2.25],
...     }
... )

Standard return type (Tensor), with f32 supertype:

>>> df.to_torch(dtype=pl.Float32)
tensor([[ 0.0000,  1.0000,  1.5000],
        [ 1.0000,  0.0000, -0.5000],
        [ 2.0000,  0.0000,  0.0000],
        [ 3.0000,  1.0000, -2.2500]])

As a dictionary of individual Tensors:

>>> df.to_torch("dict")
{'lbl': tensor([0, 1, 2, 3]),
 'feat1': tensor([1, 0, 0, 1]),
 'feat2': tensor([ 1.5000, -0.5000,  0.0000, -2.2500], dtype=torch.float64)}

As a "label" and "features" dictionary; note that as "features" is not
declared, it defaults to all the columns that are not in "label":

>>> df.to_torch("dict", label="lbl", dtype=pl.Float32)
{'label': tensor([[0.],
         [1.],
         [2.],
         [3.]]),
 'features': tensor([[ 1.0000,  1.5000],
         [ 0.0000, -0.5000],
         [ 0.0000,  0.0000],
         [ 1.0000, -2.2500]])}

As a PolarsDataset, with f64 supertype:

>>> ds = df.to_torch("dataset", dtype=pl.Float64)
>>> ds[3]
(tensor([ 3.0000,  1.0000, -2.2500], dtype=torch.float64),)
>>> ds[:2]
(tensor([[ 0.0000,  1.0000,  1.5000],
         [ 1.0000,  0.0000, -0.5000]], dtype=torch.float64),)
>>> ds[[0, 3]]
(tensor([[ 0.0000,  1.0000,  1.5000],
         [ 3.0000,  1.0000, -2.2500]], dtype=torch.float64),)

As a convenience the PolarsDataset can opt in to half-precision data
for experimentation (usually this would be set on the model/pipeline):

>>> list(ds.half())
[(tensor([0.0000, 1.0000, 1.5000], dtype=torch.float16),),
 (tensor([ 1.0000,  0.0000, -0.5000], dtype=torch.float16),),
 (tensor([2., 0., 0.], dtype=torch.float16),),
 (tensor([ 3.0000,  1.0000, -2.2500], dtype=torch.float16),)]

Pass PolarsDataset to a DataLoader, designating the label:

>>> from torch.utils.data import DataLoader
>>> ds = df.to_torch("dataset", label="lbl")
>>> dl = DataLoader(ds, batch_size=2)
>>> batches = list(dl)
>>> batches[0]
[tensor([[ 1.0000,  1.5000],
         [ 0.0000, -0.5000]], dtype=torch.float64), tensor([0, 1])]

Note that labels can be given as expressions, allowing them to have
a dtype independent of the feature columns (multi-column labels are
supported).

>>> ds = df.to_torch(
...     return_type="dataset",
...     dtype=pl.Float32,
...     label=pl.col("lbl").cast(pl.Int16),
... )
>>> ds[:2]
(tensor([[ 1.0000,  1.5000],
         [ 0.0000, -0.5000]]), tensor([0, 1], dtype=torch.int16))

Easily integrate with (for example) scikit-learn and other datasets:

>>> from sklearn.datasets import fetch_california_housing  # doctest: +SKIP
>>> housing = fetch_california_housing()  # doctest: +SKIP
>>> df = pl.DataFrame(
...     data=housing.data,
...     schema=housing.feature_names,
... ).with_columns(
...     Target=housing.target,
... )  # doctest: +SKIP
>>> train = df.to_torch("dataset", label="Target")  # doctest: +SKIP
>>> loader = DataLoader(
...     train,
...     shuffle=True,
...     batch_size=64,
... )  # doctest: +SKIP
)datasetr   NzK`label` and `features` only apply when `return_type` is 'dataset' or 'dict'r   rK  rX   z8PyTorch does not support u16, u32, or u64 dtypes; given r@  rA  tensorr   rP  Tr   )r(  rR  rU  rq  r   rV  rW  rX  )r   rW   rI   rJ   rK   rC   rD   rJ  r   r   rs   r   rb  r  rK  rN  r`  rQ  
from_numpyrl  r  polars.ml.torchr   r  r   r   )r   rF  rB  rC  r+  r   rX   to_dtyperf  rg  re  rQ  r/  rh  r   	pds_labelvalid_torch_typess                    r   rl  rm    s   ^ 11!5_CS/!F"u}9MVCS/!( VVV,,LUGTCS/!IVUFE65I++e,KeRWW--)..x8 ' H%YY 3 34d8n	 
 HHk:ME.6.BT[[*RRE (": hGC##C((F"  )113 . 7 7 9  =BBES#,,.0EBBI%5 %;3F3FI (KK $		(?*C D+K?:MN_M`aCS/! Cs   $Huse_pyarrow_extension_arrayc                  U(       a  [        [        R                  5      S:  a  S[        R                  < 3n[        U5      e[        (       a  [        [
        R                  5      S:  a;  Sn[        (       a#  US[
        R                  < S3-  n[        U5      e[        U5      e[        U R                  ;   a  U R                  " SSU0UD6$ U R                  " U 4SU0UD6$ )	a  
Convert this DataFrame to a pandas DataFrame.

This operation copies data if `use_pyarrow_extension_array` is not enabled.

Parameters
----------
use_pyarrow_extension_array
    Use PyArrow-backed extension arrays instead of NumPy arrays for the columns
    of the pandas DataFrame. This allows zero copy operations and preservation
    of null values. Subsequent operations on the resulting pandas DataFrame may
    trigger conversion to NumPy if those operations are not supported by PyArrow
    compute functions.
**kwargs
    Additional keyword arguments to be passed to
    :meth:`pyarrow.Table.to_pandas`.

Returns
-------
:class:`pandas.DataFrame`

Notes
-----
This operation requires that both :mod:`pandas` and :mod:`pyarrow` are
installed.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6.0, 7.0, 8.0],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.to_pandas()
   foo  bar ham
0    1  6.0   a
1    2  7.0   b
2    3  8.0   c

Null values in numeric columns are converted to `NaN`.

>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, None],
...         "bar": [6.0, None, 8.0],
...         "ham": [None, "b", "c"],
...     }
... )
>>> df.to_pandas()
   foo  bar   ham
0  1.0  6.0  None
1  2.0  NaN     b
2  NaN  8.0     c

Pass `use_pyarrow_extension_array=True` to get a pandas DataFrame with columns
backed by PyArrow extension arrays. This will preserve null values.

>>> df.to_pandas(use_pyarrow_extension_array=True)
    foo   bar   ham
0     1   6.0  <NA>
1     2  <NA>     b
2  <NA>   8.0     c
>>> _.dtypes
foo           int64[pyarrow]
bar          double[pyarrow]
ham    large_string[pyarrow]
dtype: object
r  r  z\pandas>=1.5.0 is required for `to_pandas("use_pyarrow_extension_array=True")`, found Pandas )   r   zLpyarrow>=8.0.0 is required for `to_pandas(use_pyarrow_extension_array=True)`z, found pyarrow .ry  r  )r2   r   r  r^   rP   r   r  rF   r  _to_pandas_with_object_columns!_to_pandas_without_object_columns)r   ry  kwargsr   s       r   	to_pandasDataFrame.to_pandas	  s    X 'R^^,v5tuw  vD  vD  uG  H055%%r~~)F)Od%%-bnn-?qAAC4S99-c22 T[[ 66 ,GKQ  55
.I
MS
 	
r   c                  / n/ n[        U R                  5       H>  u  pVUR                  5       (       a  UR                  U5        M-  UR                  U5        M@     U(       a  U S S 2U4   nU R                  " U4SU0UD6nO[
        R                  " 5       nU HA  nU R                  U   n	UR                  XYU R                  U5      R                  5       5        MC     U$ )Nry  )r`  r  	is_objectr  r  r   r   r  insertr  r  )
r   ry  r  object_columnsnot_object_columnsre  r+  df_without_objects	pandas_dfr  s
             r   r~  (DataFrame._to_pandas_with_object_columns
  s     !$++.HA  %%a("))!,	 / !%a);&;!<>>",G I I  A<<?DQdnnQ&7&A&A&CD   r   c               H   UR                   (       d  [        R                  " 5       $ UR                  R	                  5       n[
        R                  R                  U5      nU(       a  UR                  " SSSS S.UD6$ UR                  SS5      nUR                  " SSU0UD6$ )NTc                .    [         R                  " U 5      $ r   )r   
ArrowDtype)pa_dtypes    r   <lambda>=DataFrame._to_pandas_without_object_columns.<locals>.<lambda>:
  s    bmmH.Er   )self_destructsplit_blockstypes_mapperdate_as_objectFr  )	r   r   r   r   r  r   r   r  pop)r   r   ry  r  r  tblr  s          r   r  +DataFrame._to_pandas_without_object_columns*
  s     xx<<>!))+hh##N3&== "!E 	   $4e<}}ENEfEEr   c                J    [        U R                  R                  U5      5      $ )a|  
Select column as Series at index location.

Parameters
----------
index
    Location of selection.

See Also
--------
get_column

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.to_series(1)
shape: (3,)
Series: 'bar' [i64]
[
        6
        7
        8
]
)r9   r   r  )r   indexs     r   r  DataFrame.to_seriesA
  s    > dhh((/00r   c                T   [        5       nUR                  S5        [        U R                  5       HT  nUR                  S5        UR                  U R	                  U5      R                  U5      5        UR                  S5        MV     UR                  S5        UR                  5       $ )u;  
Convert DataFrame to instantiable string representation.

Parameters
----------
n
    Only use first n rows.

See Also
--------
polars.Series.to_init_repr
polars.from_repr

Examples
--------
>>> df = pl.DataFrame(
...     [
...         pl.Series("foo", [1, 2, 3], dtype=pl.UInt8),
...         pl.Series("bar", [6.0, 7.0, 8.0], dtype=pl.Float32),
...         pl.Series("ham", ["a", "b", "c"], dtype=pl.String),
...     ]
... )
>>> print(df.to_init_repr())
pl.DataFrame(
    [
        pl.Series('foo', [1, 2, 3], dtype=pl.UInt8),
        pl.Series('bar', [6.0, 7.0, 8.0], dtype=pl.Float32),
        pl.Series('ham', ['a', 'b', 'c'], dtype=pl.String),
    ]
)

>>> df_from_str_repr = eval(df.to_init_repr())
>>> df_from_str_repr
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ u8  ┆ f32 ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ 6.0 ┆ a   │
│ 2   ┆ 7.0 ┆ b   │
│ 3   ┆ 8.0 ┆ c   │
└─────┴─────┴─────┘
zpl.DataFrame(
    [
z        z,
z    ]
)
)r   writerY  r   r  to_init_reprr   )r   rQ  outputre  s       r   r  DataFrame.to_init_reprb
  s    Z -.tzz"ALL$LL*77:;LL #
 	\"  r   c                   g r   r  r   filer   s      r   r  DataFrame.serialize
  s     r   c                   g r   r  r  s      r   r  r  
  s    NQr   c                   g r   r  r  s      r   r  r  
  s     r   c                   US:X  a  U R                   R                  nO.US:X  a  U R                   R                  nOSU< 3n[        U5      e[	        X1U5      $ )u  
Serialize this DataFrame to a file or string in JSON format.

Parameters
----------
file
    File path or writable file-like object to which the result will be written.
    If set to `None` (default), the output is returned as a string instead.
format
    The format in which to serialize. Options:

    - `"binary"`: Serialize to binary format (bytes). This is the default.
    - `"json"`: Serialize to JSON format (string).

Notes
-----
Serialization is not stable across Polars versions: a LazyFrame serialized
in one Polars version may not be deserializable in another Polars version.

Examples
--------
Serialize the DataFrame into a binary representation.

>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...     }
... )
>>> bytes = df.serialize()
>>> type(bytes)
<class 'bytes'>

The bytes can later be deserialized back into a DataFrame.

>>> import io
>>> pl.DataFrame.deserialize(io.BytesIO(bytes))
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 6   │
│ 2   ┆ 7   │
│ 3   ┆ 8   │
└─────┴─────┘
r   r   r   )r   serialize_binaryserialize_jsonr   r,   )r   r  r   
serializerr   s        r   r  r  
  sV    l X22Jv00JFvjQCS/!&z@@r   c                    g r   r  r   r  s     r   
write_jsonDataFrame.write_json
  s    36r   c                    g r   r  r  s     r   r  r  
      =@r   c                @  ^  SU 4S jjnUc  U" 5       $ [        U[        5      (       a  U" 5       nUR                  U5        g[        U[        [        45      (       a'  [        U5      nT R                  R                  U5        gT R                  R                  U5        g)a  
Serialize to JSON representation.

Parameters
----------
file
    File path or writable file-like object to which the result will be written.
    If set to `None` (default), the output is returned as a string instead.

See Also
--------
DataFrame.write_ndjson

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...     }
... )
>>> df.write_json()
'[{"foo":1,"bar":6},{"foo":2,"bar":7},{"foo":3,"bar":8}]'
c                    > [        5        n TR                  R                  U 5        U R                  5       nS S S 5        WR	                  S5      $ ! , (       d  f       N= f)Nutf8)r
   r   r  r   decode)buf
json_bytesr   s     r   write_json_to_string2DataFrame.write_json.<locals>.write_json_to_string  sG    c##C( \\^
  $$V,, s   ,A
ANreturnr   )r   r   r  r   r   r1   r   r  )r   r  r  json_strs   `   r   r  r  
  s    4	- <'))h''+-HJJx sDk**%d+DHH%HH%r   c                    g r   r  r  s     r   write_ndjsonDataFrame.write_ndjson  s    69r   c                    g r   r  r  s     r   r  r    s    LOr   c                J   SnUc  [        S[        5       5      nSnO3[        U[        [        R
                  45      (       a  [        U5      nOUnSnSSKJn  U R                  5       R                  UUR                  5       US9  U(       a  [        UR                  5       S	S
9$ g)a  
Serialize to newline delimited JSON representation.

Parameters
----------
file
    File path or writable file-like object to which the result will be written.
    If set to `None` (default), the output is returned as a string instead.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...     }
... )
>>> df.write_ndjson()
'{"foo":1,"bar":6}\n{"foo":2,"bar":7}\n{"foo":3,"bar":8}\n'
FN	IO[bytes]T	in-memoryr   QueryOptFlags)optimizationsengineutf-8encoding)r   r
   r   r   r   PathLiker1   polars.lazyframe.opt_flagsr  lazysink_ndjson_eagerr   )r   r  should_return_bufferrR  r  r  s         r   r  r  "  s    .  %<+wy1F#' sBKK011'-FF(<		'..0 	  	
  v(7;;r   )include_bominclude_header	separatorline_terminator
quote_char
batch_sizedatetime_formatdate_formattime_formatfloat_scientificfloat_precisiondecimal_comma
null_valuequote_stylestorage_optionscredential_providerretriesc                   g r   r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  s                      r   	write_csvDataFrame.write_csvR  s    , r   c                   g r   r  r  s                      r   r  r  j  s    , r   ,
"i   autor  c                  SSK Jn  U" SUSS9  U" SUSS9  U(       d  SnSnUc  [        S	[        5       5      nSnO3[	        U[
        [        R                  45      (       a  [        U5      nOUnS
nSSK	J
n  U R                  5       R                  UUUUUUUUU	U
UUUUUUUUUR                  5       US9  U(       a  [        UR                  5       SS9$ g)a$  
Write to comma-separated values (CSV) file.

Parameters
----------
file
    File path or writable file-like object to which the result will be written.
    If set to `None` (default), the output is returned as a string instead.
include_bom
    Whether to include UTF-8 BOM in the CSV output.
include_header
    Whether to include header in the CSV output.
separator
    Separate CSV fields with this symbol.
line_terminator
    String used to end each row.
quote_char
    Byte to use as quoting character.
batch_size
    Number of rows that will be processed per thread.
datetime_format
    A format string, with the specifiers defined by the
    `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
    Rust crate. If no format specified, the default fractional-second
    precision is inferred from the maximum timeunit found in the frame's
    Datetime cols (if any).
date_format
    A format string, with the specifiers defined by the
    `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
    Rust crate.
time_format
    A format string, with the specifiers defined by the
    `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
    Rust crate.
float_scientific
    Whether to use scientific form always (true), never (false), or
    automatically (None) for `Float32` and `Float64` datatypes.
float_precision
    Number of decimal places to write, applied to both `Float32` and
    `Float64` datatypes.
decimal_comma
    Use a comma as the decimal separator instead of a point in standard
    notation. Floats will be encapsulated in quotes if necessary; set the
    field separator to override.
null_value
    A string representing null values (defaulting to the empty string).
quote_style : {'necessary', 'always', 'non_numeric', 'never'}
    Determines the quoting strategy used.

    - necessary (default): This puts quotes around fields only when necessary.
      They are necessary when fields contain a quote,
      separator or record terminator.
      Quotes are also necessary when writing an empty record
      (which is indistinguishable from a record with one empty field).
      This is the default.
    - always: This puts quotes around every field. Always.
    - never: This never puts quotes around fields, even if that results in
      invalid CSV data (e.g.: by not quoting strings containing the separator).
    - non_numeric: This puts quotes around all fields that are non-numeric.
      Namely, when writing a field that does not parse as a valid float
      or integer, then quotes will be used even if they aren`t strictly
      necessary.
storage_options
    Options that indicate how to connect to a cloud provider.

    The cloud providers currently supported are AWS, GCP, and Azure.
    See supported keys here:

    * `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
    * `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
    * `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
    * Hugging Face (`hf://`): Accepts an API key under the `token` parameter:             `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.

    If `storage_options` is not provided, Polars will try to infer the
    information from environment variables.
credential_provider
    Provide a function that can be called to provide cloud storage
    credentials. The function is expected to return a dictionary of
    credential keys along with an optional credential expiry time.

    .. warning::
        This functionality is considered **unstable**. It may be changed
        at any point without it being considered a breaking change.
retries
    Number of retries if accessing a cloud instance fails.

Examples
--------
>>> import pathlib
>>>
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3, 4, 5],
...         "bar": [6, 7, 8, 9, 10],
...         "ham": ["a", "b", "c", "d", "e"],
...     }
... )
>>> path: pathlib.Path = dirpath / "new_file.csv"
>>> df.write_csv(path, separator=",")
r   )_check_arg_is_1byter  F)can_be_emptyr  TNr  r  r  )r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  )polars.io.csv._utilsr  r   r
   r   r   r   r  r1   r  r  r  sink_csvr  r   )r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rR  r  r  s                           r   r  r    s    z 	=KGL*4HJ$<+wy1F#' sBKK011'-FF(<		#)+!!+##-+'!#+ 3'..0) 	 	
.  v(7;;r   	)r  c               D    U R                   " SSUS.UD6n[        U5        g)a  
Copy `DataFrame` in csv format to the system clipboard with `write_csv`.

Useful for pasting into Excel or other similar spreadsheet software.

Parameters
----------
separator
    Separate CSV fields with this symbol.
kwargs
    Additional arguments to pass to `write_csv`.

See Also
--------
polars.read_clipboard: Read a DataFrame from the clipboard.
write_csv: Write to comma-separated values (CSV) file.
N)r  r  r  )r  _write_clipboard_string)r   r  r  results       r   write_clipboardDataFrame.write_clipboard0  s$    $ nnN$)NvN'r   uncompressedc                    Uc  Sn[        U[        [        45      (       a  [        U5      nUc  SnU R                  R                  XU5        g)a;  
Write to Apache Avro file.

Parameters
----------
file
    File path or writable file-like object to which the data will be written.
compression : {'uncompressed', 'snappy', 'deflate'}
    Compression method. Defaults to "uncompressed".
name
    Schema name. Defaults to empty string.

Examples
--------
>>> import pathlib
>>>
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3, 4, 5],
...         "bar": [6, 7, 8, 9, 10],
...         "ham": ["a", "b", "c", "d", "e"],
...     }
... )
>>> path: pathlib.Path = dirpath / "new_file.avro"
>>> df.write_avro(path)
Nr  r  )r   r   r   r1   r   
write_avro)r   r  compressionr  s       r   r  DataFrame.write_avroE  sI    @ (KdS$K((%d+D<DDt4r   A1r   )positiontable_style
table_namecolumn_formatsdtype_formatsconditional_formatsheader_formatcolumn_totalscolumn_widths
row_totalsrow_heights
sparklinesformulasr  r  
autofilterautofithidden_columnshide_gridlines
sheet_zoomfreeze_panesc               |   SSK JnJnJnJnJnJnJnJn  [        SSS9n SSK
Jn!  U" X5      u  n"n#n$X R                  5       n&n%U%n'U" U"5      n(U=(       d    0 nU" U5      u  nn)U=(       d    U" U"5      nU" U%U(UU
UU	UUUUUS9u  n*nn%[        U[        5      (       a  U!" U5      OUn+U+S   U%R                  -   [!        U&5      -   [!        U(       + 5      -
  [!        [#        U
5      5      -   U+S   U%R$                  -   S-
  4n,S	n-S
n.U,S   U-:  d	  U,S   U.:  a2  SU%R                   SU%R$                   SU< SU- SU. S3n/['        U/5      eU&(       a  U(       aW  U#R(                  " / U+QU,QU%R+                  5       UU*UU[#        U
5      =(       a    U&(       + US.U)EP76   U(       a  U" U%U#UU+UU(S9  Uc  [-        5       n0O.[        U[        5      (       a  U1n0O[-        [/        U'U5      5      n0U(       aB  U&(       d;  U R0                  n1[3        U15      S:  a  SU1 3n/[5        U/5      eU#R7                  5         [        U[         5      (       a!  [8        R;                  U%R<                  U5      nO[?        U'USSS9nU" U=(       d    0 5      nU%R<                   Hd  n2U2U0;   a  SS0O0 n3U+S   U%RA                  U25      -   n4U2U;   a  U#RC                  U4U4UU2   SU35        MF  U3(       d  MO  U#RE                  U4U4SSU35        Mf     U=(       d    0 RG                  5        H  u  n2n5U" U#U%U+U2UU5S9  M     U(       a  U#RI                  S5        U(       a  U#RK                  U5        U(       a  [        U[         5      (       a/  [M        U+S   U,S   S-   5       H  n6U#RO                  U6U5        M     OG[        U[8        5      (       a2  U" U5      RG                  5        H  u  n6n7U#RO                  U6U75        M     U(       a6  [        U[        5      (       a  U#RQ                  U5        OU#RP                  " U6   U$(       a  U"RS                  5         U"$ )a?  
Write frame data to a table in an Excel workbook/worksheet.

Parameters
----------
workbook : {str, Workbook}
    String name or path of the workbook to create, BytesIO object, file opened
    in binary-mode, or an `xlsxwriter.Workbook` object that has not been closed.
    If None, writes to a `dataframe.xlsx` workbook in the working directory.
worksheet : {str, Worksheet}
    Name of target worksheet or an `xlsxwriter.Worksheet` object (in which
    case `workbook` must be the parent `xlsxwriter.Workbook` object); if None,
    writes to "Sheet1" when creating a new workbook (note that writing to an
    existing workbook requires a valid existing -or new- worksheet name).
position : {str, tuple}
    Table position in Excel notation (eg: "A1"), or a (row,col) integer tuple.
table_style : {str, dict}
    A named Excel table style, such as "Table Style Medium 4", or a dictionary
    of `{"key":value,}` options containing one or more of the following keys:
    "style", "first_column", "last_column", "banded_columns, "banded_rows".
table_name : str
    Name of the output table object in the worksheet; can then be referred to
    in the sheet by formulae/charts, or by subsequent `xlsxwriter` operations.
column_formats : dict
    A `{colname(s):str,}` or `{selector:str,}` dictionary for applying an
    Excel format string to the given columns. Formats defined here (such as
    "dd/mm/yyyy", "0.00%", etc) will override any defined in `dtype_formats`.
dtype_formats : dict
    A `{dtype:str,}` dictionary that sets the default Excel format for the
    given dtype. (This can be overridden on a per-column basis by the
    `column_formats` param).
conditional_formats : dict
    A dictionary of colname (or selector) keys to a format str, dict, or list
    that defines conditional formatting options for the specified columns.

    * If supplying a string typename, should be one of the valid `xlsxwriter`
      types such as "3_color_scale", "data_bar", etc.
    * If supplying a dictionary you can make use of any/all `xlsxwriter`
      supported options, including icon sets, formulae, etc.
    * Supplying multiple columns as a tuple/key will apply a single format
      across all columns - this is effective in creating a heatmap, as the
      min/max values will be determined across the entire range, not per-column.
    * Finally, you can also supply a list made up from the above options
      in order to apply *more* than one conditional format to the same range.
header_format : dict
    A `{key:value,}` dictionary of `xlsxwriter` format options to apply
    to the table header row, such as `{"bold":True, "font_color":"#702963"}`.
column_totals : {bool, list, dict}
    Add a column-total row to the exported table.

    * If True, all numeric columns will have an associated total using "sum".
    * If passing a string, it must be one of the valid total function names
      and all numeric columns will have an associated total using that function.
    * If passing a list of colnames, only those given will have a total.
    * For more control, pass a `{colname:funcname,}` dict.

    Valid column-total function names are "average", "count_nums", "count",
    "max", "min", "std_dev", "sum", and "var".
column_widths : {dict, int}
    A `{colname:int,}` or `{selector:int,}` dict or a single integer that
    sets (or overrides if autofitting) table column widths, in integer pixel
    units. If given as an integer the same value is used for all table columns.
row_totals : {dict, list, bool}
    Add a row-total column to the right-hand side of the exported table.

    * If True, a column called "total" will be added at the end of the table
      that applies a "sum" function row-wise across all numeric columns.
    * If passing a list/sequence of column names, only the matching columns
      will participate in the sum.
    * Can also pass a `{colname:columns,}` dictionary to create one or
      more total columns with distinct names, referencing different columns.
row_heights : {dict, int}
    An int or `{row_index:int,}` dictionary that sets the height of the given
    rows (if providing a dictionary) or all rows (if providing an integer) that
    intersect with the table body (including any header and total row) in
    integer pixel units. Note that `row_index` starts at zero and will be
    the header row (unless `include_header` is False).
sparklines : dict
    A `{colname:list,}` or `{colname:dict,}` dictionary defining one or more
    sparklines to be written into a new column in the table.

    * If passing a list of colnames (used as the source of the sparkline data)
      the default sparkline settings are used (eg: line chart with no markers).
    * For more control an `xlsxwriter`-compliant options dict can be supplied,
      in which case three additional polars-specific keys are available:
      "columns", "insert_before", and "insert_after". These allow you to define
      the source columns and position the sparkline(s) with respect to other
      table columns. If no position directive is given, sparklines are added to
      the end of the table (eg: to the far right) in the order they are given.
formulas : dict
    A `{colname:formula,}` or `{colname:dict,}` dictionary defining one or
    more formulas to be written into a new column in the table. Note that you
    are strongly advised to use structured references in your formulae wherever
    possible to make it simple to reference columns by name.

    * If providing a string formula (such as "=[@colx]*[@coly]") the column will
      be added to the end of the table (eg: to the far right), after any default
      sparklines and before any row_totals.
    * For the most control supply an options dictionary with the following keys:
      "formula" (mandatory), one of "insert_before" or "insert_after", and
      optionally "return_dtype". The latter is used to appropriately format the
      output of the formula and allow it to participate in row/column totals.
float_precision : int
    Default number of decimals displayed for floating point columns (note that
    this is purely a formatting directive; the actual values are not rounded).
include_header : bool
    Indicate if the table should be created with a header row.
autofilter : bool
    If the table has headers, provide autofilter capability.
autofit : bool
    Calculate individual column widths from the data.
hidden_columns : str | list
     A column name, list of column names, or a selector representing table
     columns to mark as hidden in the output worksheet.
hide_gridlines : bool
    Do not display any gridlines on the output worksheet.
sheet_zoom : int
    Set the default zoom level of the output worksheet.
freeze_panes : str | (str, int, int) | (int, int) | (int, int, int, int)
    Freeze workbook panes.

    * If (row, col) is supplied, panes are split at the top-left corner of the
      specified cell, which are 0-indexed. Thus, to freeze only the top row,
      supply (1, 0).
    * Alternatively, cell notation can be used to supply the cell. For example,
      "A2" indicates the split occurs at the top-left of cell A2, which is the
      equivalent of (1, 0).
    * If (row, col, top_row, top_col) are supplied, the panes are split based on
      the `row` and `col`, and the scrolling region is initialized to begin at
      the `top_row` and `top_col`. Thus, to freeze only the top row and have the
      scrolling region begin at row 10, column D (5th col), supply (1, 0, 9, 4).
      Using cell notation for (row, col), supplying ("A2", 9, 4) is equivalent.

Notes
-----
* A list of compatible `xlsxwriter` format property names can be found here:
  https://xlsxwriter.readthedocs.io/format.html#format-methods-and-format-properties

* Conditional formatting dictionaries should provide xlsxwriter-compatible
  definitions; polars will take care of how they are applied on the worksheet
  with respect to the relative sheet/column position. For supported options,
  see: https://xlsxwriter.readthedocs.io/working_with_conditional_formats.html

* Similarly, sparkline option dictionaries should contain xlsxwriter-compatible
  key/values, as well as a mandatory polars "columns" key that defines the
  sparkline source data; these source columns should all be adjacent. Two other
  polars-specific keys are available to help define where the sparkline appears
  in the table: "insert_after", and "insert_before". The value associated with
  these keys should be the name of a column in the exported table.
  https://xlsxwriter.readthedocs.io/working_with_sparklines.html

* Formula dictionaries *must* contain a key called "formula", and then optional
  "insert_after", "insert_before", and/or "return_dtype" keys. These additional
  keys allow the column to be injected into the table at a specific location,
  and/or to define the return type of the formula (eg: "Int64", "Float64", etc).
  Formulas that refer to table columns should use Excel's structured references
  syntax to ensure the formula is applied correctly and is table-relative.
  https://support.microsoft.com/en-us/office/using-structured-references-with-excel-tables-f5ed2452-2337-4f71-bed3-c8ae6d2b276e

Examples
--------
Instantiate a basic DataFrame:

>>> from random import uniform
>>> from datetime import date
>>>
>>> df = pl.DataFrame(
...     {
...         "dtm": [date(2023, 1, 1), date(2023, 1, 2), date(2023, 1, 3)],
...         "num": [uniform(-500, 500), uniform(-500, 500), uniform(-500, 500)],
...         "val": [10_000, 20_000, 30_000],
...     }
... )

Export to "dataframe.xlsx" (the default workbook name, if not specified) in the
working directory, add column totals ("sum" by default) on all numeric columns,
then autofit:

>>> df.write_excel(column_totals=True, autofit=True)  # doctest: +SKIP

Write frame to a specific location on the sheet, set a named table style,
apply US-style date formatting, increase default float precision, apply a
non-default total function to a single column, autofit:

>>> df.write_excel(  # doctest: +SKIP
...     position="B4",
...     table_style="Table Style Light 16",
...     dtype_formats={pl.Date: "mm/dd/yyyy"},
...     column_totals={"num": "average"},
...     float_precision=6,
...     autofit=True,
... )

Write the same frame to a named worksheet twice, applying different styles
and conditional formatting to each table, adding table titles using explicit
xlsxwriter integration:

>>> from xlsxwriter import Workbook
>>> with Workbook("multi_frame.xlsx") as wb:  # doctest: +SKIP
...     # basic/default conditional formatting
...     df.write_excel(
...         workbook=wb,
...         worksheet="data",
...         position=(3, 1),  # specify position as (row,col) coordinates
...         conditional_formats={"num": "3_color_scale", "val": "data_bar"},
...         table_style="Table Style Medium 4",
...     )
...
...     # advanced conditional formatting, custom styles
...     df.write_excel(
...         workbook=wb,
...         worksheet="data",
...         position=(df.height + 7, 1),
...         table_style={
...             "style": "Table Style Light 4",
...             "first_column": True,
...         },
...         conditional_formats={
...             "num": {
...                 "type": "3_color_scale",
...                 "min_color": "#76933c",
...                 "mid_color": "#c4d79b",
...                 "max_color": "#ebf1de",
...             },
...             "val": {
...                 "type": "data_bar",
...                 "data_bar_2010": True,
...                 "bar_color": "#9bbb59",
...                 "bar_negative_color_same": True,
...                 "bar_negative_border_color_same": True,
...             },
...         },
...         column_formats={"num": "#,##0.000;[White]-#,##0.000"},
...         column_widths={"val": 125},
...         autofit=True,
...     )
...
...     # add some table titles (with a custom format)
...     ws = wb.get_worksheet_by_name("data")
...     fmt_title = wb.add_format(
...         {
...             "font_color": "#4f6228",
...             "font_size": 12,
...             "italic": True,
...             "bold": True,
...         }
...     )
...     ws.write(2, 1, "Basic/default conditional formatting", fmt_title)
...     ws.write(
...         df.height + 6, 1, "Customised conditional formatting", fmt_title
...     )

Export a table containing two different types of sparklines. Use default
options for the "trend" sparkline and customized options (and positioning)
for the "+/-" win_loss sparkline, with non-default integer dtype formatting,
column totals, a subtle two-tone heatmap and hidden worksheet gridlines:

>>> df = pl.DataFrame(
...     {
...         "id": ["aaa", "bbb", "ccc", "ddd", "eee"],
...         "q1": [100, 55, -20, 0, 35],
...         "q2": [30, -10, 15, 60, 20],
...         "q3": [-50, 0, 40, 80, 80],
...         "q4": [75, 55, 25, -10, -55],
...     }
... )
>>> df.write_excel(  # doctest: +SKIP
...     table_style="Table Style Light 2",
...     # apply accounting format to all flavours of integer
...     dtype_formats={dt: "#,##0_);(#,##0)" for dt in [pl.Int32, pl.Int64]},
...     sparklines={
...         # default options; just provide source cols
...         "trend": ["q1", "q2", "q3", "q4"],
...         # customized sparkline type, with positioning directive
...         "+/-": {
...             "columns": ["q1", "q2", "q3", "q4"],
...             "insert_after": "id",
...             "type": "win_loss",
...         },
...     },
...     conditional_formats={
...         # create a unified multi-column heatmap
...         ("q1", "q2", "q3", "q4"): {
...             "type": "2_color_scale",
...             "min_color": "#95b3d7",
...             "max_color": "#ffffff",
...         },
...     },
...     column_totals=["q1", "q2", "q3", "q4"],
...     row_totals=True,
...     hide_gridlines=True,
... )

Export a table containing an Excel formula-based column that calculates a
standardised Z-score, showing use of structured references in conjunction
with positioning directives, column totals, and custom formatting.

>>> df = pl.DataFrame(
...     {
...         "id": ["a123", "b345", "c567", "d789", "e101"],
...         "points": [99, 45, 50, 85, 35],
...     }
... )
>>> df.write_excel(  # doctest: +SKIP
...     table_style={
...         "style": "Table Style Medium 15",
...         "first_column": True,
...     },
...     column_formats={
...         "id": {"font": "Consolas"},
...         "points": {"align": "center"},
...         "z-score": {"align": "center"},
...     },
...     column_totals="average",
...     formulas={
...         "z-score": {
...             # use structured references to refer to the table columns and 'totals' row
...             "formula": "=STANDARDIZE([@points], [[#Totals],[points]], STDEV([points]))",
...             "insert_after": "points",
...             "return_dtype": pl.Float64,
...         }
...     },
...     hide_gridlines=True,
...     sheet_zoom=125,
... )

Create and reference a Worksheet object directly, adding a basic chart.
Taking advantage of structured references to set chart series values and
categories is strongly recommended so that you do not have to calculate
cell positions with respect to the frame data and worksheet:

>>> with Workbook("basic_chart.xlsx") as wb:  # doctest: +SKIP
...     # create worksheet object and write frame data to it
...     ws = wb.add_worksheet("demo")
...     df.write_excel(
...         workbook=wb,
...         worksheet=ws,
...         table_name="DataTable",
...         table_style="Table Style Medium 26",
...         hide_gridlines=True,
...     )
...     # create chart object, point to the written table
...     # data using structured references, and style it
...     chart = wb.add_chart({"type": "column"})
...     chart.set_title({"name": "Example Chart"})
...     chart.set_legend({"none": True})
...     chart.set_style(38)
...     chart.add_series(
...         {  # note the use of structured references
...             "values": "=DataTable[points]",
...             "categories": "=DataTable[id]",
...             "data_labels": {"value": True},
...         }
...     )
...     # add chart to the worksheet
...     ws.insert_chart("D1", chart)
r   )_unpack_multi_column_dict_xl_apply_conditional_formats_xl_inject_sparklines_xl_setup_table_columns_xl_setup_table_options_xl_setup_workbook_xl_unique_table_name_XLFormatCache
xlsxwriterzExcel export requires)
err_prefix)xl_cell_to_rowcol)r   format_cacher  r  r  r   r  r  r  r  r  r  i i @  zwriting xz
 frame at z& does not fit worksheet dimensions of z
 rows and z columns)r   r   r  
header_rowr  	total_rowr  )r   wsr  table_startr  r  N)r   r   r|  z:`autofit=True` requires xlsxwriter 3.0.8 or higher, found TF)expand_keysexpand_valueshidden)r  paramsr  )*"polars.io.spreadsheet._write_utilsr  r  r  r  r  r  r  r  rW   xlsxwriter.utilityr  r5  r   r   r  r  r[  r   r]   	add_tabler-  setrf   r  r2   r^   r  r   fromkeysr  re   get_column_indexset_column_pixels
set_columnra  r
  set_zoomrY  set_row_pixelsr  close)8r   workbook	worksheetr  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r	  r
  r  r  r  r  r  r  r  r  r  r  r  r  wbr  	can_closer   r5  df_original	fmt_cachetable_optionstable_columnsr  table_finishexcel_max_valid_rowsexcel_max_valid_colsr   r!  xlvr   optionscol_idxr"  r>  r  s8                                                           r   write_excelDataFrame.write_exceln  s   L		
 		
 		
 %\>UV
8 /xCB	]]_H
  #2&	'-2%<[%I"]<#8#<
,C")'''+#!!-
)~r" ,6h+D+Dh'( 	 Nii(m n$%& $}%&	'
 NRXX%)
  '$ O22A!55RYYKq
*XLHn  pD  oE  EO  Pd  Oe  em  nC',, >LL  GGI(,"0",!%m!4!EX&	 $	  #-(; +#1!* !UF,,$%F*;GHF 8((CS!I-RSVRWX055JJLmS)) MM"**mDM2]EM 2-2E2FjjF*0F*:x&G!!nr':':6'BBG&$$!&) gwdGD !  */R668NFF!- 9 a KK
#+s++ Qa11DEC%%c;7 FK..#<[#I#O#O#QKC%%c62 $R ,,,-.HHJ	r   )r  r  r  r  r  c                   g r   r  r   r  r  r  r  r  r  s          r   	write_ipcDataFrame.write_ipc  s     r   c                   g r   r  r?  s          r   r@  rA    s     r   c                   USL nUc  [        5       nOUnSSKJn	  U R                  5       R	                  UUUUUUU	R                  5       SS9  U(       a  U$ S$ )a  
Write to Arrow IPC binary stream or Feather file.

See "File or Random Access format" in https://arrow.apache.org/docs/python/ipc.html.

.. versionchanged:: 1.1
    The `future` parameter was renamed `compat_level`.

Parameters
----------
file
    Path or writable file-like object to which the IPC data will be
    written. If set to `None`, the output is returned as a BytesIO object.
compression : {'uncompressed', 'lz4', 'zstd'}
    Compression method. Defaults to "uncompressed".
compat_level
    Use a specific compatibility level
    when exporting Polars' internal data structures.
storage_options
    Options that indicate how to connect to a cloud provider.

    The cloud providers currently supported are AWS, GCP, and Azure.
    See supported keys here:

    * `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
    * `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
    * `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
    * Hugging Face (`hf://`): Accepts an API key under the `token` parameter:             `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.

    If `storage_options` is not provided, Polars will try to infer the
    information from environment variables.
credential_provider
    Provide a function that can be called to provide cloud storage
    credentials. The function is expected to return a dictionary of
    credential keys along with an optional credential expiry time.

    .. warning::
        This functionality is considered **unstable**. It may be changed
        at any point without it being considered a breaking change.
retries
    Number of retries if accessing a cloud instance fails.

Examples
--------
>>> import pathlib
>>>
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3, 4, 5],
...         "bar": [6, 7, 8, 9, 10],
...         "ham": ["a", "b", "c", "d", "e"],
...     }
... )
>>> path: pathlib.Path = dirpath / "new_file.arrow"
>>> df.write_ipc(path)
Nr   r  r  )r  r  r  r  r  r  r  )r
   r  r  r  sink_ipcr  )
r   r  r  r  r  r  r  return_bytesrR  r  s
             r   r@  rA    sk    L t|<YFF<		#%+ 3'..0 	 		
 &v/4/r   )r  r  c                   g r   r  r   r  r  r  s       r   write_ipc_streamDataFrame.write_ipc_stream  s     r   c                   g r   r  rG  s       r   rH  rI         r   c                   USL nU(       a  [        5       nO&[        U[        [        45      (       a  [	        U5      nUc  SnO![        U[
        5      (       a  UR                  nUc  SnU R                  R                  XW5        U(       a  U$ S$ )aj  
Write to Arrow IPC record batch stream.

See "Streaming format" in https://arrow.apache.org/docs/python/ipc.html.

.. versionchanged:: 1.1
    The `future` parameter was renamed `compat_level`.

Parameters
----------
file
    Path or writable file-like object to which the IPC record batch data will
    be written. If set to `None`, the output is returned as a BytesIO object.
compression : {'uncompressed', 'lz4', 'zstd'}
    Compression method. Defaults to "uncompressed".
compat_level
    Use a specific compatibility level
    when exporting Polars' internal data structures.

Examples
--------
>>> import pathlib
>>>
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3, 4, 5],
...         "bar": [6, 7, 8, 9, 10],
...         "ham": ["a", "b", "c", "d", "e"],
...     }
... )
>>> path: pathlib.Path = dirpath / "new_file.arrow"
>>> df.write_ipc_stream(path)
NTr  )	r
   r   r   r   r1   rc   r  r   rH  )r   r  r  r  rE  r  s         r   rH  rI  '  s    R t|9DsDk**%d+D "Ok22*33O(K!!$_E#t--r   zstdl        )r  compression_level
statisticsrow_group_sizedata_page_sizer3  pyarrow_optionspartition_bypartition_chunk_size_bytesr  r  r  metadatamkdirc               (   Uc  Sn[        U[        [        45      (       a2  U	c  Ub!  UR                  S5      (       a  [	        USS9nO[	        U5      nU(       GaB  US:X  d  [        U[
        5      (       a  Sn[        U5      eUb  Sn[        U5      eU(       a  S	n[        U5      eU R                  5       n0 n[        U5       H)  u  nnUR                  c  S
U 3OUR                  nUUU'   M+     [        R                  " U5      nSSKnUc  0 nUS:X  a  SOUUS'   X8S'   XHS'   XXS'   XhS'   UR                  S5      (       a-  [        R                  R                  " SUUS.U=(       d    0 D6  g[        R                  R                  " SUUS.U=(       d    0 D6  gUnSnU	b2  [        U[        5      (       d  Sn[!        U5      eSSKJn  U" XS9nSnSnSSKJn  U R+                  5       R-                  UUUUUUUUUUUUUR/                  5       S9  g)aP  
Write to Apache Parquet file.

Parameters
----------
file
    File path or writable file-like object to which the result will be written.
    This should be a path to a directory if writing a partitioned dataset.
compression : {'lz4', 'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'zstd'}
    Choose "zstd" for good compression performance.
    Choose "lz4" for fast compression/decompression.
    Choose "snappy" for more backwards compatibility guarantees
    when you deal with older parquet readers.
compression_level
    The level of compression to use. Higher compression means smaller files on
    disk.

    - "gzip" : min-level: 0, max-level: 9.
    - "brotli" : min-level: 0, max-level: 11.
    - "zstd" : min-level: 1, max-level: 22.

statistics
    Write statistics to the parquet headers. This is the default behavior.

    Possible values:

    - `True`: enable default set of statistics (default). Some
      statistics may be disabled.
    - `False`: disable all statistics
    - "full": calculate and write all available statistics. Cannot be
      combined with `use_pyarrow`.
    - `{ "statistic-key": True / False, ... }`. Cannot be combined with
      `use_pyarrow`. Available keys:

      - "min": column minimum value (default: `True`)
      - "max": column maximum value (default: `True`)
      - "distinct_count": number of unique column values (default: `False`)
      - "null_count": number of null values in column (default: `True`)
row_group_size
    Size of the row groups in number of rows. Defaults to 512^2 rows.
data_page_size
    Size of the data page in bytes. Defaults to 1024^2 bytes.
use_pyarrow
    Use C++ parquet implementation vs Rust parquet implementation.
    At the moment C++ supports more features.
pyarrow_options
    Arguments passed to `pyarrow.parquet.write_table`.

    If you pass `partition_cols` here, the dataset will be written
    using `pyarrow.parquet.write_to_dataset`.
    The `partition_cols` parameter leads to write the dataset to a directory.
    Similar to Spark's partitioned datasets.
partition_by
    Column(s) to partition by. A partitioned dataset will be written if this is
    specified. This parameter is considered unstable and is subject to change.
partition_chunk_size_bytes
    Approximate size to split DataFrames within a single partition when
    writing. Note this is calculated using the size of the DataFrame in
    memory - the size of the output file may differ depending on the
    file format / compression.
storage_options
    Options that indicate how to connect to a cloud provider.

    The cloud providers currently supported are AWS, GCP, and Azure.
    See supported keys here:

    * `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
    * `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
    * `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
    * Hugging Face (`hf://`): Accepts an API key under the `token` parameter:             `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.

    If `storage_options` is not provided, Polars will try to infer the
    information from environment variables.
credential_provider
    Provide a function that can be called to provide cloud storage
    credentials. The function is expected to return a dictionary of
    credential keys along with an optional credential expiry time.

    .. warning::
        This functionality is considered **unstable**. It may be changed
        at any point without it being considered a breaking change.
retries
    Number of retries if accessing a cloud instance fails.
metadata
    A dictionary or callback to add key-values to the file-level Parquet
    metadata.

    .. warning::
        This functionality is considered **experimental**. It may be removed or
        changed at any point without it being considered a breaking change.
mkdir: bool
    Recursively create all the directories in the path.

    .. warning::
        This functionality is considered **unstable**. It may be changed at any
        point without it being considered a breaking change.

Examples
--------
>>> import pathlib
>>>
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3, 4, 5],
...         "bar": [6, 7, 8, 9, 10],
...         "ham": ["a", "b", "c", "d", "e"],
...     }
... )
>>> path: pathlib.Path = dirpath / "new_file.parquet"
>>> df.write_parquet(path)

We can use pyarrow with use_pyarrow_write_to_dataset=True
to write partitioned datasets. The following example will
write the first row to ../watermark=1/*.parquet and the
other rows to ../watermark=2/*.parquet.

>>> df = pl.DataFrame({"a": [1, 2, 3], "watermark": [1, 2, 2]})
>>> path: pathlib.Path = dirpath / "partitioned_object"
>>> df.write_parquet(
...     path,
...     use_pyarrow=True,
...     pyarrow_options={"partition_cols": ["watermark"]},
... )
Nr  partition_colsF)check_not_directoryfullzQwrite_parquet with `use_pyarrow=True` allows only boolean values for `statistics`zHwrite_parquet with `use_pyarrow=True` cannot be combined with `metadata`zEwrite_parquet with `use_pyarrow=True` cannot be combined with `mkdir`column_r   r  rN  write_statisticsrP  rQ  )r  	root_path)r  wherer  z5expected file to be a `str` since partition-by is set)PartitionByKey)byT	streamingr  )r  rN  rO  rP  rQ  r  r  r  rU  r  rV  r  r  )r   r   r   r  r1   r   r   r  r`  _namer   r  pyarrow.parquetparquetwrite_to_datasetwrite_tabler   	polars.ior_  r  r  r  sink_parquetr  )r   r  r  rN  rO  rP  rQ  r3  rR  rS  rT  r  r  r  rU  rV  r   r  r   re  r   r  r[   rR  r  r_  r  s                              r   write_parquetDataFrame.write_parquetb  sG   d (KdS$K(('+0C0CDT0U0U)$EJ)$/V#z*d'C'Ci o%#` o%] o%--/CD&s^	6(.(<}&,,#T
	 , ((4.C #&"$#~5; M* 4E/02<./0>,-0>,-""#344

++ " ',"  

&&  '," >B(#dC((Mn$0#D:FE F<		  #/!))+ 3'..0 	! 	
r   fail)if_table_existsr  engine_optionsc          
     	   U[        [        5      =n;  a,  SR                  S U 5       5      nSU SU< 3n[        U5      e[	        U5      R
                  R                  SS5      S   n	Uc6  [        U[        5      (       d  U	S	:X  a  S	nOU	R                  S
5      (       a  S
nS9S jn
US
:X  Ga;  SSK
JnJnJn  [        U[        5      (       a
  U" U5      S4OUS4u  p[        S5      n[        UUR                  R                   5      (       d  SU< 3n[#        U5      e[%        USS5      n['        U5      nUS:X  a  SnO:US:X  a  US:  a  SU 3n[)        U5      eSnOUS:X  a  SnOSU< S3n[        U5      eU(       a  UO[*        R,                  " 5          UR/                  5        nU
" U5      u  nnn[        U[        5      (       a  U" U5      OU	nU" USS9n[%        USS5      n['        U5      nUR                  S5      S   S:X  a,  USnnUS:  a"  US:  a  US:X  a  UR1                  S U 35        SnUS!:  a  U OU R3                  5       nUS:  a(  US":  a"  UR4                  " U4UUUUS#.U=(       d    0 D6nOJUb(  UR7                  SS$5      nS%U S&U S'U 3n[)        U5      eUR4                  " S:UUUS(.U=(       d    0 D6nUR9                  5         SSS5        SSS5        W$ US	:X  Ga6  [:        (       d  S)n[=        U5      e['        [>        R@                  5      =nS*:  a  S+[>        R@                  < 3n[)        U5      e[        S	US,:  a  S-OS.S/S09  SS1K!J"n J#n!  SS2K$J%n"  [        U[        5      (       a	  U!" U5      n#OG[        UU"5      (       a  URM                  5       n#O%[        UU 5      (       a  Un#OSU< 3n[#        U5      eU
" U5      u  nnnU(       a  S3U< S43n[        U5      eU RO                  SS59RP                  " S:UUU#USS6.U=(       d    0 D6n$U$c  S$ U$$ [        U[        5      (       a  S7U< S83n[        U5      eSU< 3n[#        U5      e! , (       d  f       GN= f! , (       d  f       W$ = f);a?	  
Write the data in a Polars DataFrame to a database.

.. versionadded:: 0.20.26
    Support for instantiated connection objects in addition to URI strings, and
    a new `engine_options` parameter.

Parameters
----------
table_name
    Schema-qualified name of the table to create or append to in the target
    SQL database. If your table name contains special characters, it should
    be quoted.
connection
    An existing SQLAlchemy or ADBC connection against the target database, or
    a URI string that will be used to instantiate such a connection, such as:

    * "postgresql://user:pass@server:port/database"
    * "sqlite:////path/to/database.db"
if_table_exists : {'append', 'replace', 'fail'}
    The insert mode:

    * 'replace' will create a new database table, overwriting an existing one.
    * 'append' will append to an existing table.
    * 'fail' will fail if table already exists.
engine : {'sqlalchemy', 'adbc'}
    Select the engine to use for writing frame data; only necessary when
    supplying a URI string (defaults to 'sqlalchemy' if unset)
engine_options
    Additional options to pass to the insert method associated with the engine
    specified by the option `engine`.

    * Setting `engine` to "sqlalchemy" currently inserts using Pandas' `to_sql`
      method (though this will eventually be phased out in favor of a native
      solution).
    * Setting `engine` to "adbc" inserts using the ADBC cursor's `adbc_ingest`
      method.

Examples
--------
Insert into a temporary table using a PostgreSQL URI and the ADBC engine:

>>> df.write_database(
...     table_name="target_table",
...     connection="postgresql://user:pass@server:port/database",
...     engine="adbc",
...     engine_options={"temporary": True},
... )  # doctest: +SKIP

Insert into a table using a `pyodbc` SQLAlchemy connection to SQL Server
that was instantiated with "fast_executemany=True" to improve performance:

>>> pyodbc_uri = (
...     "mssql+pyodbc://user:pass@server:1433/test?"
...     "driver=ODBC+Driver+18+for+SQL+Server"
... )
>>> engine = create_engine(pyodbc_uri, fast_executemany=True)  # doctest: +SKIP
>>> df.write_database(
...     table_name="target_table",
...     connection=engine,
... )  # doctest: +SKIP

Returns
-------
int
    The number of rows affected, if the driver provides this information.
    Otherwise, returns -1.
rV  c              3  8   #    U  H  n[        U5      v   M     g 7fr   )repr).0ms     r   	<genexpr>+DataFrame.write_database.<locals>.<genexpr>  s     C1BAQ1Bs   z1write_database `if_table_exists` must be one of {z}, got r}  r  r   N
sqlalchemyadbcc                    SSK Jn  [        U" U /SS95      n[        U5      S:  a  SU  S3n[	        U5      eS/S[        U5      -
  -  U-   u  pEnXEU4$ )	zEUnpack optionally qualified table name to catalog/schema/table tuple.r   )readerr}  )	delimiterr   z%`table_name` appears to be invalid: ''N)csvrx  nextr  r   )r  delimited_read
componentsr   catalogr   r  s          r   unpack_table_name3DataFrame.write_database.<locals>.unpack_table_name  sg    4+/vQT0U+VJ:"=dV1E o%%)Fa#j/.A$Bj#P GSC''r   )_get_adbc_module_name_from_uri_import_optional_adbc_driver_open_adbc_connectionTFadbc_driver_managerzunrecognised connection type r  z0.0rk  creater   )r      zB`if_table_exists = 'replace'` requires ADBC version >= 0.7, found r  z(unexpected value for `if_table_exists`: z-

Choose one of {'fail', 'replace', 'append'})dbapi_submodule_sqlite)r      zDROP TABLE IF EXISTS )r     )r   r|  )r   modecatalog_namedb_schema_name-zYuse of schema-qualified table names requires adbc-driver-manager version >= 0.7.0, found z and z version >= 0.8.0, found )r  r   r  z]writing with 'sqlalchemy' engine currently requires pandas.

Install with: pip install pandasr{  z?writing with 'sqlalchemy' engine requires pandas >= 1.5; found )r  r  z2.0z1.4zpandas >= 2.2 requires)module_namemin_versionmin_err_prefix)Connectablecreate_engine)Sessionz@Unexpected three-part table name; provide the database/catalog (z) on the connection URIrx  )r  r   con	if_existsr  zengine z is not supported)r  r   r  z"tuple[str | None, str | None, str]r  ))r   r   r  r   r   
__module__splitr   r   
startswithpolars.io.database._utilsr  r  r  rW   dbapi
Connectionr   getattrr2   r^   r]  r^  cursorexecuter  adbc_ingestr   commitrO   r  r   r  sqlalchemy.enginer  r  sqlalchemy.ormr  
connectionr  to_sql)%r   r  r  rl  r  rm  valid_write_modesallowedr   connection_module_rootr  r  r  r  conncan_close_conndriver_managerdriver_manager_str_versiondriver_manager_versionr  r  r  	db_schemaunpacked_table_nameadbc_module_nameadbc_driveradbc_driver_str_versionadbc_driver_versionr   n_rowsadbc_driver_pypi_name
pd_versionr  r  r  	sa_objectress%                                        r   write_databaseDataFrame.write_databaseP  sH   Z 8M#M#4NiiC1BCCGFwixXgWjkCS/!!%j!1!<!<!B!B3!J1!M>*c**.D.T%'226::		( V  j#.. 'z2D9 %( !D --BCN dN$8$8$C$CDD5j^Dn$)0PU)V&%23M%N"&(   I-)F2!!; <>  5S99  H, ?>QGI  !o% 'J,B,B,DD:KJ:W7$7
 "*c22 3:>/ !
 ;$e +2+}e*T'&34K&L##))#.r2h>)2DYG
 /&8/'9+y8)>zl'KL'  6?tT]]_
 *V38Kv8U#//+!!%,'0 */RF *,<,D,DS#,N)G56e<Q;R S33J2KM  5   $// #6!! */R	F C  EF M|#$$w)#.. -bnn ==*GWXZXfXfWij055(&0F&:U7 E. *c**)*5	J00&113	J44&	5j^Dn$6G
6S3GY 3XY`Xccz{ o% #nn,0 - f 	 ) )	 "'R	C 2-#-$$F:%67CS/!1*@CC. q  EDF Ms%   Q1)D/QQ1
Q.	)Q11
R c                   SSK Jn  [        U[        5      (       a  U" 5       nUR	                  U5      nOUnU R                  [        R                  " 5       S9nUS:X  a  UR                  U5        gUR                  U5        g)a  
Write DataFrame to an Iceberg table.

.. warning::
    This functionality is currently considered **unstable**. It may be
    changed at any point without it being considered a breaking change.

Parameters
----------
target
    Name of the table or the Table object representing an Iceberg table.
mode : {'append', 'overwrite'}
    How to handle existing data.

    - If 'append', will add new data.
    - If 'overwrite', will replace table with new data.

r   )load_catalogr  r  N)
pyiceberg.catalogr  r   r   
load_tabler  rc   oldestr  	overwrite)r   rR  r  r  r  r  r   s          r   write_icebergDataFrame.write_iceberg]  si    0 	3fc"""nG&&v.EE}}+*<*<*>}?8LLOOD!r   )r  overwrite_schemar  r  delta_write_optionsc                   g r   r  )r   rR  r  r  r  r  r  s          r   write_deltaDataFrame.write_delta  s     r   )r  r  r  c                   g r   r  )r   rR  r  r  r  r  delta_merge_optionss          r   r  r    s     '*r   error)r  r  r  r  r  r  c                  Ub
  [        SSS9  SSKJnJn	Jn
  U	" 5         SSKJnJn  U" U R                  5        [        U[        [        45      (       a  U
" [        U5      SS	9nSS
KJn  SSKJn  [        X5      (       d  U" XQUS5      nOUb  US:w  a  Sn[!        U5      eSnA0 nU(       a(  UR#                  5       =n(       a  U" U5      =(       d    0 nUc  Ub  0 U=(       d    0 EUEOSnUS:X  aA  Uc  Sn[!        U5      e[        U[        5      (       a  U" XS9nOUnUR$                  " U 40 UD6$ Uc  0 nU(       a  SUS'   U" SUU UUS.UD6  g)aP  
Write DataFrame as delta table.

Parameters
----------
target
    URI of a table or a DeltaTable object.
mode : {'error', 'append', 'overwrite', 'ignore', 'merge'}
    How to handle existing data.

    - If 'error', throw an error if the table already exists (default).
    - If 'append', will add new data.
    - If 'overwrite', will replace table with new data.
    - If 'ignore', will not write anything if table already exists.
    - If 'merge', return a `TableMerger` object to merge data from the DataFrame
      with the existing data.
overwrite_schema
    If True, allows updating the schema of the table.

    .. deprecated:: 0.20.14
        Use the parameter `delta_write_options` instead and pass
        `{"schema_mode": "overwrite"}`.
storage_options
    Extra options for the storage backends supported by `deltalake`.
    For cloud storages, this may include configurations for authentication etc.

    - See a list of supported storage options for S3 `here <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html#variants>`__.
    - See a list of supported storage options for GCS `here <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants>`__.
    - See a list of supported storage options for Azure `here <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants>`__.
credential_provider
    Provide a function that can be called to provide cloud storage
    credentials. The function is expected to return a dictionary of
    credential keys along with an optional credential expiry time.

    .. warning::
        This functionality is considered **unstable**. It may be changed
        at any point without it being considered a breaking change.
delta_write_options
    Additional keyword arguments while writing a Delta lake Table.
    See a list of supported write options `here <https://delta-io.github.io/delta-rs/api/delta_writer/#deltalake.write_deltalake>`__.
delta_merge_options
    Keyword arguments which are required to `MERGE` a Delta lake Table.
    See a list of supported merge options `here <https://delta-io.github.io/delta-rs/api/delta_table/#deltalake.DeltaTable.merge>`__.

Raises
------
TypeError
    If the DataFrame contains unsupported data types.
ArrowInvalidError
    If the DataFrame contains data types that could not be cast to their
    primitive type.
TableNotFoundError
    If the delta table doesn't exist and MERGE action is triggered

Notes
-----
The Polars data types :class:`Null` and :class:`Time` are not supported
by the delta protocol specification and will raise a TypeError. Columns
using The :class:`Categorical` data type will be converted to
normal (non-categorical) strings when written.

Polars columns are always nullable. To write data to a delta table with
non-nullable columns, a custom pyarrow schema has to be passed to the
`delta_write_options`. See the last example below.

Examples
--------
Write a dataframe to the local filesystem as a Delta Lake table.

>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3, 4, 5],
...         "bar": [6, 7, 8, 9, 10],
...         "ham": ["a", "b", "c", "d", "e"],
...     }
... )
>>> table_path = "/path/to/delta-table/"
>>> df.write_delta(table_path)  # doctest: +SKIP

Append data to an existing Delta Lake table on the local filesystem.
Note that this will fail if the schema of the new data does not match the
schema of the existing table.

>>> df.write_delta(table_path, mode="append")  # doctest: +SKIP

Overwrite a Delta Lake table as a new version.
If the schemas of the new and old data are the same, specifying the
`schema_mode` is not required.

>>> existing_table_path = "/path/to/delta-table/"
>>> df.write_delta(
...     existing_table_path,
...     mode="overwrite",
...     delta_write_options={"schema_mode": "overwrite"},
... )  # doctest: +SKIP

Write a DataFrame as a Delta Lake table to a cloud object store like S3.

>>> table_path = "s3://bucket/prefix/to/delta-table/"
>>> df.write_delta(
...     table_path,
...     storage_options={
...         "AWS_REGION": "THE_AWS_REGION",
...         "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
...         "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
...     },
... )  # doctest: +SKIP

Write DataFrame as a Delta Lake table with non-nullable columns.

>>> import pyarrow as pa
>>> existing_table_path = "/path/to/delta-table/"
>>> df.write_delta(
...     existing_table_path,
...     delta_write_options={
...         "schema": pa.schema([pa.field("foo", pa.int64(), nullable=False)])
...     },
... )  # doctest: +SKIP

Write DataFrame as a Delta Lake table with zstd compression.
For all `delta_write_options` keyword arguments, check the deltalake docs
`here
<https://delta-io.github.io/delta-rs/api/delta_writer/#deltalake.write_deltalake>`__,
and for Writer Properties in particular `here
<https://delta-io.github.io/delta-rs/api/delta_writer/#deltalake.WriterProperties>`__.

>>> import deltalake
>>> df.write_delta(
...     table_path,
...     delta_write_options={
...         "writer_properties": deltalake.WriterProperties(compression="zstd"),
...     },
... )  # doctest: +SKIP

Merge the DataFrame with an existing Delta Lake table.
For all `TableMerger` methods, check the deltalake docs
`here <https://delta-io.github.io/delta-rs/api/delta_table/delta_table_merger/>`__.

>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3, 4, 5],
...         "bar": [6, 7, 8, 9, 10],
...         "ham": ["a", "b", "c", "d", "e"],
...     }
... )
>>> table_path = "/path/to/delta-table/"
>>> (
...     df.write_delta(
...         "table_path",
...         mode="merge",
...         delta_merge_options={
...             "predicate": "s.foo = t.foo",
...             "source_alias": "s",
...             "target_alias": "t",
...         },
...     )
...     .when_matched_update_all()
...     .when_not_matched_insert_all()
...     .execute()
... )  # doctest: +SKIP
Nzthe parameter `overwrite_schema` for `write_delta` is deprecated. Use the parameter `delta_write_options` instead and pass `{"schema_mode": "overwrite"}`.0.20.14r  r   )_check_for_unsupported_types_check_if_delta_available_resolve_delta_lake_uri)
DeltaTablewrite_deltalakeFr   )!_init_credential_provider_builder)+_get_credentials_from_provider_expiry_awarer  r  z?cannot use credential_provider when passing a DeltaTable objectmergezYyou need to pass delta_merge_options with at least a given predicate for `MERGE` to work.)	table_urir  r  schema_mode)table_or_urir   r  r  r  )r&   polars.io.deltar  r  r  	deltalaker  r  r  r   r   r   ,polars.io.cloud.credential_provider._builderr  .polars.io.cloud.credential_provider._providersr  r   build_credential_providerr  )r   rR  r  r  r  r  r  r  r  r  r  r  r  r  r  credential_provider_builderr   credential_provider_credsproviderdts                       r   r  r    s   \ '%l!	
 	
 	"#9$T[[1fsDk**,S[GF	
	
 &--*K#_m+' !,1D1NSCS/!*.'$&!&3MMOOHO <HEK & *.I.U E%2D*CD 	 7?"*q o%&#&&&R88D8$788 #*&(#5@#M2 # /	
 & r   c                L    U R                   R                  5       n[        X!5      $ )a)  
Return an estimation of the total (heap) allocated size of the `DataFrame`.

Estimated size is given in the specified unit (bytes by default).

This estimation is the sum of the size of its buffers, validity, including
nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the
size of 2 arrays is not the sum of the sizes computed from this function. In
particular, [`StructArray`]'s size is an upper bound.

When an array is sliced, its allocated size remains constant because the buffer
unchanged. However, this function will yield a smaller number. This is because
this function returns the visible size of the buffer, not its total capacity.

FFI buffers are included in this estimation.

Notes
-----
For data with Object dtype, the estimated size only reports the pointer
size, which is a huge underestimation.

Parameters
----------
unit : {'b', 'kb', 'mb', 'gb', 'tb'}
    Scale the returned size to the given unit.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "x": list(reversed(range(1_000_000))),
...         "y": [v / 1000 for v in range(1_000_000)],
...         "z": [str(v) for v in range(1_000_000)],
...     },
...     schema=[("x", pl.UInt32), ("y", pl.Float64), ("z", pl.String)],
... )
>>> df.estimated_size()
17888890
>>> df.estimated_size("mb")
17.0601749420166
)r   estimated_sizer5   )r   unitszs      r   r  DataFrame.estimated_size  s"    T XX$$&2$$r   )r  header_namecolumn_namesc                  U(       a  UOSn[        U[        5      (       a/  [        U R                  5       Vs/ s H  n[	        U5      PM     nnOUnU R                  U R                  R                  XF5      5      $ s  snf )u  
Transpose a DataFrame over the diagonal.

Parameters
----------
include_header
    If set, the column names will be added as first column.
header_name
    If `include_header` is set, this determines the name of the column that will
    be inserted.
column_names
    Optional iterable yielding strings or a string naming an existing column.
    These will name the value (non-header) columns in the transposed data.

Notes
-----
This is a very expensive operation. Perhaps you can do it differently.

Returns
-------
DataFrame

Examples
--------
>>> df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
>>> df.transpose(include_header=True)
shape: (2, 4)
┌────────┬──────────┬──────────┬──────────┐
│ column ┆ column_0 ┆ column_1 ┆ column_2 │
│ ---    ┆ ---      ┆ ---      ┆ ---      │
│ str    ┆ i64      ┆ i64      ┆ i64      │
╞════════╪══════════╪══════════╪══════════╡
│ a      ┆ 1        ┆ 2        ┆ 3        │
│ b      ┆ 4        ┆ 5        ┆ 6        │
└────────┴──────────┴──────────┴──────────┘

Replace the auto-generated column names with a list

>>> df.transpose(include_header=False, column_names=["x", "y", "z"])
shape: (2, 3)
┌─────┬─────┬─────┐
│ x   ┆ y   ┆ z   │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 1   ┆ 2   ┆ 3   │
│ 4   ┆ 5   ┆ 6   │
└─────┴─────┴─────┘

Include the header as a separate column

>>> df.transpose(
...     include_header=True, header_name="foo", column_names=["x", "y", "z"]
... )
shape: (2, 4)
┌─────┬─────┬─────┬─────┐
│ foo ┆ x   ┆ y   ┆ z   │
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╪═════╡
│ a   ┆ 1   ┆ 2   ┆ 3   │
│ b   ┆ 4   ┆ 5   ┆ 6   │
└─────┴─────┴─────┴─────┘

Replace the auto-generated column with column names from a generator function

>>> def name_generator():
...     base_name = "my_column_"
...     count = 0
...     while True:
...         yield f"{base_name}{count}"
...         count += 1
>>> df.transpose(include_header=False, column_names=name_generator())
shape: (2, 3)
┌─────────────┬─────────────┬─────────────┐
│ my_column_0 ┆ my_column_1 ┆ my_column_2 │
│ ---         ┆ ---         ┆ ---         │
│ i64         ┆ i64         ┆ i64         │
╞═════════════╪═════════════╪═════════════╡
│ 1           ┆ 2           ┆ 3           │
│ 4           ┆ 5           ┆ 6           │
└─────────────┴─────────────┴─────────────┘

Use an existing column as the new column names

>>> df = pl.DataFrame(dict(id=["i", "j", "k"], a=[1, 2, 3], b=[4, 5, 6]))
>>> df.transpose(column_names="id")
shape: (2, 3)
┌─────┬─────┬─────┐
│ i   ┆ j   ┆ k   │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 1   ┆ 2   ┆ 3   │
│ 4   ┆ 5   ┆ 6   │
└─────┴─────┴─────┘
>>> df.transpose(include_header=True, header_name="new_id", column_names="id")
shape: (2, 4)
┌────────┬─────┬─────┬─────┐
│ new_id ┆ i   ┆ j   ┆ k   │
│ ---    ┆ --- ┆ --- ┆ --- │
│ str    ┆ i64 ┆ i64 ┆ i64 │
╞════════╪═════╪═════╪═════╡
│ a      ┆ 1   ┆ 2   ┆ 3   │
│ b      ┆ 4   ┆ 5   ┆ 6   │
└────────┴─────┴─────┴─────┘
N)r   r   rY  r  r|  r   r   	transpose)r   r  r  r  keep_names_asr  column_names_s          r   r  DataFrame.transpose  sj    d (64lI..9>t{{9KL9KAT,/9KMLM(Mtxx11-OPP Ms   A<c                h    U R                  [        R                  " S5      R                  5       5      $ )u  
Reverse the DataFrame.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "key": ["a", "b", "c"],
...         "val": [1, 2, 3],
...     }
... )
>>> df.reverse()
shape: (3, 2)
┌─────┬─────┐
│ key ┆ val │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════╪═════╡
│ c   ┆ 3   │
│ b   ┆ 2   │
│ a   ┆ 1   │
└─────┴─────┘
r  )rJ  rK  ra   reverser  s    r   r  DataFrame.reverseF  s$    0 {{155:--/00r   r  c               ~    SSK Jn  U R                  5       R                  XS9R	                  UR                  5       S9$ )u/  
Rename column names.

Parameters
----------
mapping
    Key value pairs that map from old name to new name, or a function
    that takes the old name as input and returns the new name.
strict
    Validate that all column names exist in the current schema,
    and throw an exception if any do not. (Note that this parameter
    is a no-op when passing a function to `mapping`).

Examples
--------
>>> df = pl.DataFrame(
...     {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]}
... )
>>> df.rename({"foo": "apple"})
shape: (3, 3)
┌───────┬─────┬─────┐
│ apple ┆ bar ┆ ham │
│ ---   ┆ --- ┆ --- │
│ i64   ┆ i64 ┆ str │
╞═══════╪═════╪═════╡
│ 1     ┆ 6   ┆ a   │
│ 2     ┆ 7   ┆ b   │
│ 3     ┆ 8   ┆ c   │
└───────┴─────┴─────┘
>>> df.rename(lambda column_name: "c" + column_name[1:])
shape: (3, 3)
┌─────┬─────┬─────┐
│ coo ┆ car ┆ cam │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ 6   ┆ a   │
│ 2   ┆ 7   ┆ b   │
│ 3   ┆ 8   ┆ c   │
└─────┴─────┴─────┘
r   r  r  r  )r  r  r  renamecollectr  )r   mappingr   r  s       r   r  DataFrame.rename`  s9    X 	= IIKVGV+W=#7#7#9W:	
r   c                   U=nS:  a4  U R                   U-   nUS:  a  SU SU R                    S3n[        U5      eO-XR                   :  a  SU SU R                    S3n[        U5      e[        U[        R                  5      (       a'  U R
                  R                  XR                  5        U $ [        U[        5      (       a  [        R                  " U5      n[        U[        R                  5      (       a?  U R                  nUR                  X5        U R                  U5      R
                  U l        U $ SU< S[        U5       S3n[!        U5      e)u  
Insert a Series (or expression) at a certain column index.

This operation is in place.

Parameters
----------
index
    Index at which to insert the new column.
column
    `Series` or expression to insert.

Examples
--------
Insert a new Series column at the given index:

>>> df = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
>>> s = pl.Series("baz", [97, 98, 99])
>>> df.insert_column(1, s)
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ baz ┆ bar │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 1   ┆ 97  ┆ 4   │
│ 2   ┆ 98  ┆ 5   │
│ 3   ┆ 99  ┆ 6   │
└─────┴─────┴─────┘

Insert a new expression column at the given index:

>>> df = pl.DataFrame(
...     {"a": [2, 4, 2], "b": [0.5, 4, 10], "c": ["xx", "yy", "zz"]}
... )
>>> expr = (pl.col("b") / pl.col("a")).alias("b_div_a")
>>> df.insert_column(2, expr)
shape: (3, 4)
┌─────┬──────┬─────────┬─────┐
│ a   ┆ b    ┆ b_div_a ┆ c   │
│ --- ┆ ---  ┆ ---     ┆ --- │
│ i64 ┆ f64  ┆ f64     ┆ str │
╞═════╪══════╪═════════╪═════╡
│ 2   ┆ 0.5  ┆ 0.25    ┆ xx  │
│ 4   ┆ 4.0  ┆ 1.0     ┆ yy  │
│ 2   ┆ 10.0 ┆ 5.0     ┆ zz  │
└─────┴──────┴─────────┴─────┘
r   zcolumn index z is out of range (frame has z	 columns)z%column must be a Series or Expr, got z (type=))r   
IndexErrorr   r   ru   r   insert_columnr   r   rK  ra   rs   r  r  rJ  r3   r   )r   r  r   original_indexr   colss         r   r  DataFrame.insert_column  s<   b $#Nq(JJ&Eqy%n%55QRVR\R\Q]]fg o%  ZZ!.!11Mdjj\YbcCS/!fbii((HH""5))4  &#&&v&"''**||E*;;t,00  >fZwObciOjNkklmn$r   c                    SSK Jn  U R                  5       R                  " U0 UD6R	                  UR                  5       S9$ )u  
Filter rows, retaining those that match the given predicate expression(s).

The original order of the remaining rows is preserved.

Only rows where the predicate resolves as True are retained; when the
predicate result is False (or null), the row is discarded.

Parameters
----------
predicates
    Expression(s) that evaluate to a boolean Series.
constraints
    Column filters; use `name = value` to filter columns by the supplied value.
    Each constraint will behave the same as `pl.col(name).eq(value)`, and
    be implicitly joined with the other filter conditions using `&`.

Notes
-----
If you are transitioning from Pandas, and performing filter operations based on
the comparison of two or more columns, please note that in Polars any comparison
involving `null` values will result in a `null` result, *not* boolean True or
False. As a result, these rows will not be retained. Ensure that null values
are handled appropriately to avoid unexpected behaviour (see examples below).

See Also
--------
remove

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3, None, 4, None, 0],
...         "bar": [6, 7, 8, None, None, 9, 0],
...         "ham": ["a", "b", "c", None, "d", "e", "f"],
...     }
... )

Filter rows matching a condition:

>>> df.filter(pl.col("foo") > 1)
shape: (3, 3)
┌─────┬──────┬─────┐
│ foo ┆ bar  ┆ ham │
│ --- ┆ ---  ┆ --- │
│ i64 ┆ i64  ┆ str │
╞═════╪══════╪═════╡
│ 2   ┆ 7    ┆ b   │
│ 3   ┆ 8    ┆ c   │
│ 4   ┆ null ┆ d   │
└─────┴──────┴─────┘

Filter on multiple conditions, combined with and/or operators:

>>> df.filter(
...     (pl.col("foo") < 3) & (pl.col("ham") == "a"),
... )
shape: (1, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ 6   ┆ a   │
└─────┴─────┴─────┘

>>> df.filter(
...     (pl.col("foo") == 1) | (pl.col("ham") == "c"),
... )
shape: (2, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ 6   ┆ a   │
│ 3   ┆ 8   ┆ c   │
└─────┴─────┴─────┘

Provide multiple filters using `*args` syntax:

>>> df.filter(
...     pl.col("foo") <= 2,
...     ~pl.col("ham").is_in(["b", "c"]),
... )
shape: (2, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ 6   ┆ a   │
│ 0   ┆ 0   ┆ f   │
└─────┴─────┴─────┘

Provide multiple filters using `**kwargs` syntax:

>>> df.filter(foo=2, ham="b")
shape: (1, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 2   ┆ 7   ┆ b   │
└─────┴─────┴─────┘

Filter by comparing two columns against each other:

>>> df.filter(
...     pl.col("foo") == pl.col("bar"),
... )
shape: (1, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 0   ┆ 0   ┆ f   │
└─────┴─────┴─────┘

>>> df.filter(
...     pl.col("foo") != pl.col("bar"),
... )
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ 6   ┆ a   │
│ 2   ┆ 7   ┆ b   │
│ 3   ┆ 8   ┆ c   │
└─────┴─────┴─────┘

Notice how the row with `None` values is filtered out. In order to keep the
same behavior as pandas, use:

>>> df.filter(
...     pl.col("foo").ne_missing(pl.col("bar")),
... )
shape: (5, 3)
┌──────┬──────┬─────┐
│ foo  ┆ bar  ┆ ham │
│ ---  ┆ ---  ┆ --- │
│ i64  ┆ i64  ┆ str │
╞══════╪══════╪═════╡
│ 1    ┆ 6    ┆ a   │
│ 2    ┆ 7    ┆ b   │
│ 3    ┆ 8    ┆ c   │
│ 4    ┆ null ┆ d   │
│ null ┆ 9    ┆ e   │
└──────┴──────┴─────┘
r   r  r  )r  r  r  filterr  r  r   
predicatesconstraintsr  s       r   r  DataFrame.filter  sG    L 	= IIKV0#.0W=#7#7#9W:	
r   c                    SSK Jn  U R                  5       R                  " U0 UD6R	                  UR                  5       S9$ )u(  
Remove rows, dropping those that match the given predicate expression(s).

The original order of the remaining rows is preserved.

Rows where the filter predicate does not evaluate to True are retained
(this includes rows where the predicate evaluates as `null`).

Parameters
----------
predicates
    Expression that evaluates to a boolean Series.
constraints
    Column filters; use `name = value` to filter columns using the supplied
    value. Each constraint behaves the same as `pl.col(name).eq(value)`,
    and is implicitly joined with the other filter conditions using `&`.

Notes
-----
If you are transitioning from Pandas, and performing filter operations based on
the comparison of two or more columns, please note that in Polars any comparison
involving `null` values will result in a `null` result, *not* boolean True or
False. As a result, these rows will not be removed. Ensure that null values
are handled appropriately to avoid unexpected behaviour (see examples below).

See Also
--------
filter

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [2, 3, None, 4, 0],
...         "bar": [5, 6, None, None, 0],
...         "ham": ["a", "b", None, "c", "d"],
...     }
... )

Remove rows matching a condition:

>>> df.remove(pl.col("bar") >= 5)
shape: (3, 3)
┌──────┬──────┬──────┐
│ foo  ┆ bar  ┆ ham  │
│ ---  ┆ ---  ┆ ---  │
│ i64  ┆ i64  ┆ str  │
╞══════╪══════╪══════╡
│ null ┆ null ┆ null │
│ 4    ┆ null ┆ c    │
│ 0    ┆ 0    ┆ d    │
└──────┴──────┴──────┘

Discard rows based on multiple conditions, combined with and/or operators:

>>> df.remove(
...     (pl.col("foo") >= 0) & (pl.col("bar") >= 0),
... )
shape: (2, 3)
┌──────┬──────┬──────┐
│ foo  ┆ bar  ┆ ham  │
│ ---  ┆ ---  ┆ ---  │
│ i64  ┆ i64  ┆ str  │
╞══════╪══════╪══════╡
│ null ┆ null ┆ null │
│ 4    ┆ null ┆ c    │
└──────┴──────┴──────┘

>>> df.remove(
...     (pl.col("foo") >= 0) | (pl.col("bar") >= 0),
... )
shape: (1, 3)
┌──────┬──────┬──────┐
│ foo  ┆ bar  ┆ ham  │
│ ---  ┆ ---  ┆ ---  │
│ i64  ┆ i64  ┆ str  │
╞══════╪══════╪══════╡
│ null ┆ null ┆ null │
└──────┴──────┴──────┘

Provide multiple constraints using `*args` syntax:

>>> df.remove(
...     pl.col("ham").is_not_null(),
...     pl.col("bar") >= 0,
... )
shape: (2, 3)
┌──────┬──────┬──────┐
│ foo  ┆ bar  ┆ ham  │
│ ---  ┆ ---  ┆ ---  │
│ i64  ┆ i64  ┆ str  │
╞══════╪══════╪══════╡
│ null ┆ null ┆ null │
│ 4    ┆ null ┆ c    │
└──────┴──────┴──────┘

Provide constraints(s) using `**kwargs` syntax:

>>> df.remove(foo=0, bar=0)
shape: (4, 3)
┌──────┬──────┬──────┐
│ foo  ┆ bar  ┆ ham  │
│ ---  ┆ ---  ┆ ---  │
│ i64  ┆ i64  ┆ str  │
╞══════╪══════╪══════╡
│ 2    ┆ 5    ┆ a    │
│ 3    ┆ 6    ┆ b    │
│ null ┆ null ┆ null │
│ 4    ┆ null ┆ c    │
└──────┴──────┴──────┘

Remove rows by comparing two columns against each other:

>>> df.remove(
...     pl.col("foo").ne_missing(pl.col("bar")),
... )
shape: (2, 3)
┌──────┬──────┬──────┐
│ foo  ┆ bar  ┆ ham  │
│ ---  ┆ ---  ┆ ---  │
│ i64  ┆ i64  ┆ str  │
╞══════╪══════╪══════╡
│ null ┆ null ┆ null │
│ 0    ┆ 0    ┆ d    │
└──────┴──────┴──────┘
r   r  r  )r  r  r  remover  r  r  s       r   r  DataFrame.remove  sG    R 	= IIKV0#.0W=#7#7#9W:	
r   )max_items_per_columnmax_colname_lengthreturn_as_stringc                   g r   r  r   r
  r  r  s       r   glimpseDataFrame.glimpse  rK  r   )r
  r  c                   g r   r  r  s       r   r  r  $  s     r   c                   g r   r  r  s       r   r  r  -  s     r   r   2   c          
     2  ^ ^^^ [        UT R                  5      mT R                  mSUUUU 4S jjnT R                  R                  5        VVs/ s H  u  pVU" XV5      PM     nnn[	        S U 5       5      n[	        S U 5       5      n	[        5       n
U
R                  ST R                   ST R                   S35        U H)  u  pnU
R                  SUSU 3 S	US
U	 3 S	U S35        M+     U
R                  5       nU(       a  U$ [        USS9  gs  snnf )ah  
Return a dense preview of the DataFrame.

The formatting shows one line per column so that wide dataframes display
cleanly. Each line shows the column name, the data type, and the first
few values.

Parameters
----------
max_items_per_column
    Maximum number of items to show per column.
max_colname_length
    Maximum length of the displayed column names; values that exceed this
    value are truncated with a trailing ellipsis.
return_as_string
    If True, return the preview as a string instead of printing to stdout.

See Also
--------
describe, head, tail

Examples
--------
>>> from datetime import date
>>> df = pl.DataFrame(
...     {
...         "a": [1.0, 2.8, 3.0],
...         "b": [4, 5, None],
...         "c": [True, False, True],
...         "d": [None, "b", "c"],
...         "e": ["usd", "eur", None],
...         "f": [date(2020, 1, 1), date(2021, 1, 2), date(2022, 1, 1)],
...     }
... )
>>> df.glimpse()
Rows: 3
Columns: 6
$ a  <f64> 1.0, 2.8, 3.0
$ b  <i64> 4, 5, None
$ c <bool> True, False, True
$ d  <str> None, 'b', 'c'
$ e  <str> 'usd', 'eur', None
$ f <date> 2020-01-01, 2021-01-02, 2022-01-01
c                   >^ TU    [         :X  a  [        O[        mTS T2U 4   R                  5       nSR	                  U4S jU 5       5      n[        U 5      T:  a  U S TS-
   S-   n U S[        U5       S3U4$ )NrV  c              3  4   >#    U  H  nT" U5      v   M     g 7fr   r  )rq  vfns     r   rs  ;DataFrame.glimpse.<locals>._parse_column.<locals>.<genexpr>q  s     6v!1vs   r  u   …<>)rG   rp  r   r)  r  r  _dtype_str_repr)	col_namer+  valuesval_strr  r  max_n_valuesr   r   s	       @r   _parse_column(DataFrame.glimpse.<locals>._parse_columnn  s    )V3B-<-12::<Fii6v66G8}11#$>'9A'=?%Gq!7 8:GCCr   c              3  @   #    U  H  u  n  n[        U5      v   M     g 7fr   r  )rq  r  r  s      r   rs  $DataFrame.glimpse.<locals>.<genexpr>y  s     Enh1CMMs   c              3  >   #    U  H  u  pn[        U5      v   M     g 7fr   r$  )rq  r  	dtype_strs      r   rs  r%  z  s     H4aS^^4s   zRows: z

Columns: r  z$ r   r  N)end)r  r   r+  r   r  ztuple[str, str, str])
minr  r   ra  maxr   r  r   r   print)r   r
  r  r  r!  rd  r+  r   max_col_namemax_col_dtyper  r  r'  r  r   r   s   ` `           @@r   r  r  6  s   j /=	D 	D 9=8I8I8KL8KHAa'8KL EEFH4HI vdkk]+djj\DE -1(HLLXa~-.a	!M?9J/K1WIUWX -1
 OOHaT+ Ms   Dnearest)interpolationc               x    U R                   (       d  Sn[        U5      eU R                  5       R                  XS9$ )u6  
Summary statistics for a DataFrame.

Parameters
----------
percentiles
    One or more percentiles to include in the summary statistics.
    All values must be in the range `[0, 1]`.

interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable'}
    Interpolation method used when calculating percentiles.

Notes
-----
The median is included by default as the 50% percentile.

Warnings
--------
We do not guarantee the output of `describe` to be stable. It will show
statistics that we deem informative, and may be updated in the future.
Using `describe` programmatically (versus interactive exploration) is
not recommended for this reason.

See Also
--------
glimpse

Examples
--------
>>> from datetime import date, time
>>> df = pl.DataFrame(
...     {
...         "float": [1.0, 2.8, 3.0],
...         "int": [40, 50, None],
...         "bool": [True, False, True],
...         "str": ["zz", "xx", "yy"],
...         "date": [date(2020, 1, 1), date(2021, 7, 5), date(2022, 12, 31)],
...         "time": [time(10, 20, 30), time(14, 45, 50), time(23, 15, 10)],
...     }
... )

Show default frame statistics:

>>> df.describe()
shape: (9, 7)
┌────────────┬──────────┬──────────┬──────────┬──────┬─────────────────────┬──────────┐
│ statistic  ┆ float    ┆ int      ┆ bool     ┆ str  ┆ date                ┆ time     │
│ ---        ┆ ---      ┆ ---      ┆ ---      ┆ ---  ┆ ---                 ┆ ---      │
│ str        ┆ f64      ┆ f64      ┆ f64      ┆ str  ┆ str                 ┆ str      │
╞════════════╪══════════╪══════════╪══════════╪══════╪═════════════════════╪══════════╡
│ count      ┆ 3.0      ┆ 2.0      ┆ 3.0      ┆ 3    ┆ 3                   ┆ 3        │
│ null_count ┆ 0.0      ┆ 1.0      ┆ 0.0      ┆ 0    ┆ 0                   ┆ 0        │
│ mean       ┆ 2.266667 ┆ 45.0     ┆ 0.666667 ┆ null ┆ 2021-07-02 16:00:00 ┆ 16:07:10 │
│ std        ┆ 1.101514 ┆ 7.071068 ┆ null     ┆ null ┆ null                ┆ null     │
│ min        ┆ 1.0      ┆ 40.0     ┆ 0.0      ┆ xx   ┆ 2020-01-01          ┆ 10:20:30 │
│ 25%        ┆ 2.8      ┆ 40.0     ┆ null     ┆ null ┆ 2021-07-05          ┆ 14:45:50 │
│ 50%        ┆ 2.8      ┆ 50.0     ┆ null     ┆ null ┆ 2021-07-05          ┆ 14:45:50 │
│ 75%        ┆ 3.0      ┆ 50.0     ┆ null     ┆ null ┆ 2022-12-31          ┆ 23:15:10 │
│ max        ┆ 3.0      ┆ 50.0     ┆ 1.0      ┆ zz   ┆ 2022-12-31          ┆ 23:15:10 │
└────────────┴──────────┴──────────┴──────────┴──────┴─────────────────────┴──────────┘

Customize which percentiles are displayed, applying linear interpolation:

>>> with pl.Config(tbl_rows=12):
...     df.describe(
...         percentiles=[0.1, 0.3, 0.5, 0.7, 0.9],
...         interpolation="linear",
...     )
shape: (11, 7)
┌────────────┬──────────┬──────────┬──────────┬──────┬─────────────────────┬──────────┐
│ statistic  ┆ float    ┆ int      ┆ bool     ┆ str  ┆ date                ┆ time     │
│ ---        ┆ ---      ┆ ---      ┆ ---      ┆ ---  ┆ ---                 ┆ ---      │
│ str        ┆ f64      ┆ f64      ┆ f64      ┆ str  ┆ str                 ┆ str      │
╞════════════╪══════════╪══════════╪══════════╪══════╪═════════════════════╪══════════╡
│ count      ┆ 3.0      ┆ 2.0      ┆ 3.0      ┆ 3    ┆ 3                   ┆ 3        │
│ null_count ┆ 0.0      ┆ 1.0      ┆ 0.0      ┆ 0    ┆ 0                   ┆ 0        │
│ mean       ┆ 2.266667 ┆ 45.0     ┆ 0.666667 ┆ null ┆ 2021-07-02 16:00:00 ┆ 16:07:10 │
│ std        ┆ 1.101514 ┆ 7.071068 ┆ null     ┆ null ┆ null                ┆ null     │
│ min        ┆ 1.0      ┆ 40.0     ┆ 0.0      ┆ xx   ┆ 2020-01-01          ┆ 10:20:30 │
│ 10%        ┆ 1.36     ┆ 41.0     ┆ null     ┆ null ┆ 2020-04-20          ┆ 11:13:34 │
│ 30%        ┆ 2.08     ┆ 43.0     ┆ null     ┆ null ┆ 2020-11-26          ┆ 12:59:42 │
│ 50%        ┆ 2.8      ┆ 45.0     ┆ null     ┆ null ┆ 2021-07-05          ┆ 14:45:50 │
│ 70%        ┆ 2.88     ┆ 47.0     ┆ null     ┆ null ┆ 2022-02-07          ┆ 18:09:34 │
│ 90%        ┆ 2.96     ┆ 49.0     ┆ null     ┆ null ┆ 2022-09-13          ┆ 21:33:18 │
│ max        ┆ 3.0      ┆ 50.0     ┆ 1.0      ┆ zz   ┆ 2022-12-31          ┆ 23:15:10 │
└────────────┴──────────┴──────────┴──────────┴──────┴─────────────────────┴──────────┘
z/cannot describe a DataFrame that has no columns)percentilesr0  )r  r   r  describe)r   r2  r0  r   s       r   r3  DataFrame.describe  s<    z ||CCC. yy{### $ 
 	
r   c                8    U R                   R                  U5      $ )aK  
Find the index of a column by name.

Parameters
----------
name
    Name of the column to find.

Examples
--------
>>> df = pl.DataFrame(
...     {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]}
... )
>>> df.get_column_index("ham")
2
>>> df.get_column_index("sandwich")  # doctest: +SKIP
ColumnNotFoundError: sandwich
)r   r(  r  s     r   r(  DataFrame.get_column_index  s    & xx((..r   c                z    US:  a  U R                   U-   nU R                  R                  XR                  5        U $ )u  
Replace a column at an index location.

This operation is in place.

Parameters
----------
index
    Column index.
column
    Series that will replace the column.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> s = pl.Series("apple", [10, 20, 30])
>>> df.replace_column(0, s)
shape: (3, 3)
┌───────┬─────┬─────┐
│ apple ┆ bar ┆ ham │
│ ---   ┆ --- ┆ --- │
│ i64   ┆ i64 ┆ str │
╞═══════╪═════╪═════╡
│ 10    ┆ 6   ┆ a   │
│ 20    ┆ 7   ┆ b   │
│ 30    ┆ 8   ┆ c   │
└───────┴─────┴─────┘
r   )r   r   r  r   )r   r  r   s      r   r  DataFrame.replace_column  s5    F 19JJ&Eyy1r   
descending
nulls_lastmultithreadedmaintain_orderr`  r:  c                   SSK Jn  U R                  5       R                  " U/UQ7UUUUS.6R	                  UR                  5       S9$ )u
  
Sort the dataframe by the given columns.

Parameters
----------
by
    Column(s) to sort by. Accepts expression input, including selectors. Strings
    are parsed as column names.
*more_by
    Additional columns to sort by, specified as positional arguments.
descending
    Sort in descending order. When sorting by multiple columns, can be specified
    per column by passing a sequence of booleans.
nulls_last
    Place null values last; can specify a single boolean applying to all columns
    or a sequence of booleans for per-column control.
multithreaded
    Sort using multiple threads.
maintain_order
    Whether the order should be maintained if elements are equal.

Examples
--------
Pass a single column name to sort by that column.

>>> df = pl.DataFrame(
...     {
...         "a": [1, 2, None],
...         "b": [6.0, 5.0, 4.0],
...         "c": ["a", "c", "b"],
...     }
... )
>>> df.sort("a")
shape: (3, 3)
┌──────┬─────┬─────┐
│ a    ┆ b   ┆ c   │
│ ---  ┆ --- ┆ --- │
│ i64  ┆ f64 ┆ str │
╞══════╪═════╪═════╡
│ null ┆ 4.0 ┆ b   │
│ 1    ┆ 6.0 ┆ a   │
│ 2    ┆ 5.0 ┆ c   │
└──────┴─────┴─────┘

Sorting by expressions is also supported.

>>> df.sort(pl.col("a") + pl.col("b") * 2, nulls_last=True)
shape: (3, 3)
┌──────┬─────┬─────┐
│ a    ┆ b   ┆ c   │
│ ---  ┆ --- ┆ --- │
│ i64  ┆ f64 ┆ str │
╞══════╪═════╪═════╡
│ 2    ┆ 5.0 ┆ c   │
│ 1    ┆ 6.0 ┆ a   │
│ null ┆ 4.0 ┆ b   │
└──────┴─────┴─────┘

Sort by multiple columns by passing a list of columns.

>>> df.sort(["c", "a"], descending=True)
shape: (3, 3)
┌──────┬─────┬─────┐
│ a    ┆ b   ┆ c   │
│ ---  ┆ --- ┆ --- │
│ i64  ┆ f64 ┆ str │
╞══════╪═════╪═════╡
│ 2    ┆ 5.0 ┆ c   │
│ null ┆ 4.0 ┆ b   │
│ 1    ┆ 6.0 ┆ a   │
└──────┴─────┴─────┘

Or use positional arguments to sort by multiple columns in the same way.

>>> df.sort("c", "a", descending=[False, True])
shape: (3, 3)
┌──────┬─────┬─────┐
│ a    ┆ b   ┆ c   │
│ ---  ┆ --- ┆ --- │
│ i64  ┆ f64 ┆ str │
╞══════╪═════╪═════╡
│ 1    ┆ 6.0 ┆ a   │
│ null ┆ 4.0 ┆ b   │
│ 2    ┆ 5.0 ┆ c   │
└──────┴─────┴─────┘
r   r  r9  r  )polars.lazyframer  r  sortr  r  )r   r`  r:  r;  r<  r=  more_byr  s           r   r@  DataFrame.sort/  s^    ~ 	3 IIKT  &%+- W=#7#7#9W:	
r   r   )r  c                   SSK Jn  [        S5        U" SSS9 nU(       a  UOSnUR                  XPS9  UR	                  U5      sS	S	S	5        $ ! , (       d  f       g	= f)
u~
  
Execute a SQL query against the DataFrame.

.. versionadded:: 0.20.24

.. warning::
    This functionality is considered **unstable**, although it is close to
    being considered stable. It may be changed at any point without it being
    considered a breaking change.

Parameters
----------
query
    SQL query to execute.
table_name
    Optionally provide an explicit name for the table that represents the
    calling frame (defaults to "self").

Notes
-----
* The calling frame is automatically registered as a table in the SQL context
  under the name "self". If you want access to the DataFrames and LazyFrames
  found in the current globals, use the top-level :meth:`pl.sql <polars.sql>`.
* More control over registration and execution behaviour is available by
  using the :class:`SQLContext` object.
* The SQL query executes in lazy mode before being collected and returned
  as a DataFrame.

See Also
--------
SQLContext

Examples
--------
>>> from datetime import date
>>> df1 = pl.DataFrame(
...     {
...         "a": [1, 2, 3],
...         "b": ["zz", "yy", "xx"],
...         "c": [date(1999, 12, 31), date(2010, 10, 10), date(2077, 8, 8)],
...     }
... )

Query the DataFrame using SQL:

>>> df1.sql("SELECT c, b FROM self WHERE a > 1")
shape: (2, 2)
┌────────────┬─────┐
│ c          ┆ b   │
│ ---        ┆ --- │
│ date       ┆ str │
╞════════════╪═════╡
│ 2010-10-10 ┆ yy  │
│ 2077-08-08 ┆ xx  │
└────────────┴─────┘

Apply transformations to a DataFrame using SQL, aliasing "self" to "frame".

>>> df1.sql(
...     query='''
...         SELECT
...             a,
...             (a % 2 == 0) AS a_is_even,
...             CONCAT_WS(':', b, b) AS b_b,
...             EXTRACT(year FROM c) AS year,
...             0::float4 AS "zero",
...         FROM frame
...     ''',
...     table_name="frame",
... )
shape: (3, 5)
┌─────┬───────────┬───────┬──────┬──────┐
│ a   ┆ a_is_even ┆ b_b   ┆ year ┆ zero │
│ --- ┆ ---       ┆ ---   ┆ ---  ┆ ---  │
│ i64 ┆ bool      ┆ str   ┆ i32  ┆ f32  │
╞═════╪═══════════╪═══════╪══════╪══════╡
│ 1   ┆ false     ┆ zz:zz ┆ 1999 ┆ 0.0  │
│ 2   ┆ true      ┆ yy:yy ┆ 2010 ┆ 0.0  │
│ 3   ┆ false     ┆ xx:xx ┆ 2077 ┆ 0.0  │
└─────┴───────────┴───────┴──────┴──────┘
r   )
SQLContextzS`sql` is considered **unstable** (although it is close to being considered stable).FT)register_globalseagerr   )r  re  N)
polars.sqlrD  r-   registerr  )r   queryr  rD  ctxr  s         r   sqlDataFrame.sql  sP    d 	*a	
 d;s!+:DLLdL/;;u% <;;s   +A
Ar  z1.0.0)r  c          	     r    SSK Jn  U R                  5       R                  XUS9R	                  U" SSSSS9S9$ )uS  
Return the `k` largest rows.

Non-null elements are always preferred over null elements, regardless of
the value of `reverse`. The output is not guaranteed to be in any
particular order, call :func:`sort` after this function if you wish the
output to be sorted.

.. versionchanged:: 1.0.0
    The `descending` parameter was renamed `reverse`.

Parameters
----------
k
    Number of rows to return.
by
    Column(s) used to determine the top rows.
    Accepts expression input. Strings are parsed as column names.
reverse
    Consider the `k` smallest elements of the `by` column(s) (instead of the `k`
    largest). This can be specified per column by passing a sequence of
    booleans.

See Also
--------
bottom_k

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": ["a", "b", "a", "b", "b", "c"],
...         "b": [2, 1, 1, 3, 2, 1],
...     }
... )

Get the rows which contain the 4 largest values in column b.

>>> df.top_k(4, by="b")
shape: (4, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════╪═════╡
│ b   ┆ 3   │
│ a   ┆ 2   │
│ b   ┆ 2   │
│ b   ┆ 1   │
└─────┴─────┘

Get the rows which contain the 4 largest values when sorting on column b and a.

>>> df.top_k(4, by=["b", "a"])
shape: (4, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════╪═════╡
│ b   ┆ 3   │
│ b   ┆ 2   │
│ a   ┆ 2   │
│ c   ┆ 1   │
└─────┴─────┘
r   r  r`  r  FTprojection_pushdownpredicate_pushdowncomm_subplan_elimslice_pushdownr  )r  r  r  top_kr  r   kr`  r  r  s        r   rT  DataFrame.top_k  sI    T 	= IIKU1WU-W+(-',&+#'	  	
r   c          	     r    SSK Jn  U R                  5       R                  XUS9R	                  U" SSSSS9S9$ )u\  
Return the `k` smallest rows.

Non-null elements are always preferred over null elements, regardless of
the value of `reverse`. The output is not guaranteed to be in any
particular order, call :func:`sort` after this function if you wish the
output to be sorted.

.. versionchanged:: 1.0.0
    The `descending` parameter was renamed `reverse`.

Parameters
----------
k
    Number of rows to return.
by
    Column(s) used to determine the bottom rows.
    Accepts expression input. Strings are parsed as column names.
reverse
    Consider the `k` largest elements of the `by` column(s) (instead of the `k`
    smallest). This can be specified per column by passing a sequence of
    booleans.

See Also
--------
top_k

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": ["a", "b", "a", "b", "b", "c"],
...         "b": [2, 1, 1, 3, 2, 1],
...     }
... )

Get the rows which contain the 4 smallest values in column b.

>>> df.bottom_k(4, by="b")
shape: (4, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════╪═════╡
│ b   ┆ 1   │
│ a   ┆ 1   │
│ c   ┆ 1   │
│ a   ┆ 2   │
└─────┴─────┘

Get the rows which contain the 4 smallest values when sorting on column a and b.

>>> df.bottom_k(4, by=["a", "b"])
shape: (4, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════╪═════╡
│ a   ┆ 1   │
│ a   ┆ 2   │
│ b   ┆ 1   │
│ b   ┆ 2   │
└─────┴─────┘
r   r  rN  FTrO  r  )r  r  r  bottom_kr  rU  s        r   rY  DataFrame.bottom_kR  sI    T 	= IIKXaX0W+(-',&+#'	  	
r   
null_equalc               `    [        X5        U R                  R                  UR                  US9$ )aQ  
Check whether the DataFrame is equal to another DataFrame.

Parameters
----------
other
    DataFrame to compare with.
null_equal
    Consider null values as equal.

See Also
--------
polars.testing.assert_frame_equal

Examples
--------
>>> df1 = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6.0, 7.0, 8.0],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df2 = pl.DataFrame(
...     {
...         "foo": [3, 2, 1],
...         "bar": [8.0, 7.0, 6.0],
...         "ham": ["c", "b", "a"],
...     }
... )
>>> df1.equals(df1)
True
>>> df1.equals(df2)
False
r[  )r4   r   equals)r   r:  r\  s      r   r^  DataFrame.equals  s(    H 	$&xxuyyZ@@r   c                    Ub  US:  a  U R                   U-
  U-   nU R                  U R                  R                  X5      5      $ )u  
Get a slice of this DataFrame.

Parameters
----------
offset
    Start index. Negative indexing is supported.
length
    Length of the slice. If set to `None`, all rows starting at the offset
    will be selected.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6.0, 7.0, 8.0],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.slice(1, 2)
shape: (2, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 2   ┆ 7.0 ┆ b   │
│ 3   ┆ 8.0 ┆ c   │
└─────┴─────┴─────┘
r   )r  r   r   slice)r   offsetrW  s      r   ra  DataFrame.slice  s@    @ FQJ[[6)F2Ftxx~~f=>>r   c                    US:  a  [        SU R                  U-   5      nU R                  U R                  R	                  U5      5      $ )u  
Get the first `n` rows.

Parameters
----------
n
    Number of rows to return. If a negative value is passed, return all rows
    except the last `abs(n)`.

See Also
--------
tail, glimpse, slice

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3, 4, 5],
...         "bar": [6, 7, 8, 9, 10],
...         "ham": ["a", "b", "c", "d", "e"],
...     }
... )
>>> df.head(3)
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ 6   ┆ a   │
│ 2   ┆ 7   ┆ b   │
│ 3   ┆ 8   ┆ c   │
└─────┴─────┴─────┘

Pass a negative value to get all rows `except` the last `abs(n)`.

>>> df.head(-3)
shape: (2, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ 6   ┆ a   │
│ 2   ┆ 7   ┆ b   │
└─────┴─────┴─────┘
r   )r+  r  r   r   headr   rQ  s     r   re  DataFrame.head  <    ` q5At{{Q'Atxx}}Q/00r   c                    US:  a  [        SU R                  U-   5      nU R                  U R                  R	                  U5      5      $ )u  
Get the last `n` rows.

Parameters
----------
n
    Number of rows to return. If a negative value is passed, return all rows
    except the first `abs(n)`.

See Also
--------
head, slice

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3, 4, 5],
...         "bar": [6, 7, 8, 9, 10],
...         "ham": ["a", "b", "c", "d", "e"],
...     }
... )
>>> df.tail(3)
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 3   ┆ 8   ┆ c   │
│ 4   ┆ 9   ┆ d   │
│ 5   ┆ 10  ┆ e   │
└─────┴─────┴─────┘

Pass a negative value to get all rows `except` the first `abs(n)`.

>>> df.tail(-3)
shape: (2, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 4   ┆ 9   ┆ d   │
│ 5   ┆ 10  ┆ e   │
└─────┴─────┴─────┘
r   )r+  r  r   r   tailrf  s     r   rj  DataFrame.tail*  rh  r   c                $    U R                  U5      $ )u-  
Get the first `n` rows.

Alias for :func:`DataFrame.head`.

Parameters
----------
n
    Number of rows to return. If a negative value is passed, return all rows
    except the last `abs(n)`.

See Also
--------
head

Examples
--------
Get the first 3 rows of a DataFrame.

>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3, 4, 5],
...         "bar": [6, 7, 8, 9, 10],
...         "ham": ["a", "b", "c", "d", "e"],
...     }
... )
>>> df.limit(3)
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ 6   ┆ a   │
│ 2   ┆ 7   ┆ b   │
│ 3   ┆ 8   ┆ c   │
└─────┴─────┴─────┘
)re  rf  s     r   limitDataFrame.limit^  s    N yy|r   c                    SSK Jn  U R                  5       R                  U5      R	                  UR                  5       S9$ )u	  
Drop all rows that contain one or more NaN values.

The original order of the remaining rows is preserved.

Parameters
----------
subset
    Column name(s) for which NaN values are considered; if set to `None`
    (default), use all columns (note that only floating-point columns
    can contain NaNs).

See Also
--------
drop_nulls

Notes
-----
A NaN value is not the same as a null value.
To drop null values, use :func:`drop_nulls`.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [-20.5, float("nan"), 80.0],
...         "bar": [float("nan"), 110.0, 25.5],
...         "ham": ["xxx", "yyy", None],
...     }
... )

The default behavior of this method is to drop rows where any single
value in the row is NaN:

>>> df.drop_nans()
shape: (1, 3)
┌──────┬──────┬──────┐
│ foo  ┆ bar  ┆ ham  │
│ ---  ┆ ---  ┆ ---  │
│ f64  ┆ f64  ┆ str  │
╞══════╪══════╪══════╡
│ 80.0 ┆ 25.5 ┆ null │
└──────┴──────┴──────┘

This behaviour can be constrained to consider only a subset of columns, as
defined by name, or with a selector. For example, dropping rows only if
there is a NaN in the "bar" column:

>>> df.drop_nans(subset=["bar"])
shape: (2, 3)
┌──────┬───────┬──────┐
│ foo  ┆ bar   ┆ ham  │
│ ---  ┆ ---   ┆ ---  │
│ f64  ┆ f64   ┆ str  │
╞══════╪═══════╪══════╡
│ NaN  ┆ 110.0 ┆ yyy  │
│ 80.0 ┆ 25.5  ┆ null │
└──────┴───────┴──────┘

Dropping a row only if *all* values are NaN requires a different formulation:

>>> df = pl.DataFrame(
...     {
...         "a": [float("nan"), float("nan"), float("nan"), float("nan")],
...         "b": [10.0, 2.5, float("nan"), 5.25],
...         "c": [65.75, float("nan"), float("nan"), 10.5],
...     }
... )
>>> df.filter(~pl.all_horizontal(pl.all().is_nan()))
shape: (3, 3)
┌─────┬──────┬───────┐
│ a   ┆ b    ┆ c     │
│ --- ┆ ---  ┆ ---   │
│ f64 ┆ f64  ┆ f64   │
╞═════╪══════╪═══════╡
│ NaN ┆ 10.0 ┆ 65.75 │
│ NaN ┆ 2.5  ┆ NaN   │
│ NaN ┆ 5.25 ┆ 10.5  │
└─────┴──────┴───────┘
r   r  r  )r  r  r  	drop_nansr  r  r   subsetr  s      r   rp  DataFrame.drop_nans  s:    h 	= IIK!!&)11@T@T@V1W	
r   c                    SSK Jn  U R                  5       R                  U5      R	                  UR                  5       S9$ )uW  
Drop all rows that contain one or more null values.

The original order of the remaining rows is preserved.

Parameters
----------
subset
    Column name(s) for which null values are considered.
    If set to `None` (default), use all columns.

See Also
--------
drop_nans

Notes
-----
A null value is not the same as a NaN value.
To drop NaN values, use :func:`drop_nans`.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, None, 8],
...         "ham": ["a", "b", None],
...     }
... )

The default behavior of this method is to drop rows where any single
value of the row is null.

>>> df.drop_nulls()
shape: (1, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ 6   ┆ a   │
└─────┴─────┴─────┘

This behaviour can be constrained to consider only a subset of columns, as
defined by name or with a selector. For example, dropping rows if there is
a null in any of the integer columns:

>>> import polars.selectors as cs
>>> df.drop_nulls(subset=cs.integer())
shape: (2, 3)
┌─────┬─────┬──────┐
│ foo ┆ bar ┆ ham  │
│ --- ┆ --- ┆ ---  │
│ i64 ┆ i64 ┆ str  │
╞═════╪═════╪══════╡
│ 1   ┆ 6   ┆ a    │
│ 3   ┆ 8   ┆ null │
└─────┴─────┴──────┘

Below are some additional examples that show how to drop null
values based on other conditions.

>>> df = pl.DataFrame(
...     {
...         "a": [None, None, None, None],
...         "b": [1, 2, None, 1],
...         "c": [1, None, None, 1],
...     }
... )
>>> df
shape: (4, 3)
┌──────┬──────┬──────┐
│ a    ┆ b    ┆ c    │
│ ---  ┆ ---  ┆ ---  │
│ null ┆ i64  ┆ i64  │
╞══════╪══════╪══════╡
│ null ┆ 1    ┆ 1    │
│ null ┆ 2    ┆ null │
│ null ┆ null ┆ null │
│ null ┆ 1    ┆ 1    │
└──────┴──────┴──────┘

Drop a row only if all values are null:

>>> df.filter(~pl.all_horizontal(pl.all().is_null()))
shape: (3, 3)
┌──────┬─────┬──────┐
│ a    ┆ b   ┆ c    │
│ ---  ┆ --- ┆ ---  │
│ null ┆ i64 ┆ i64  │
╞══════╪═════╪══════╡
│ null ┆ 1   ┆ 1    │
│ null ┆ 2   ┆ null │
│ null ┆ 1   ┆ 1    │
└──────┴─────┴──────┘

Drop a column if all values are null:

>>> df[[s.name for s in df if not (s.null_count() == df.height)]]
shape: (4, 2)
┌──────┬──────┐
│ b    ┆ c    │
│ ---  ┆ ---  │
│ i64  ┆ i64  │
╞══════╪══════╡
│ 1    ┆ 1    │
│ 2    ┆ null │
│ null ┆ null │
│ 1    ┆ 1    │
└──────┴──────┘
r   r  r  )r  r  r  
drop_nullsr  r  rq  s      r   ru  DataFrame.drop_nulls  s:    f 	= IIK""6*22AUAUAW2X	
r   c                    U" U /UQ70 UD6$ )u=  
Offers a structured way to apply a sequence of user-defined functions (UDFs).

Parameters
----------
function
    Callable; will receive the frame as the first parameter,
    followed by any given args/kwargs.
*args
    Arguments to pass to the UDF.
**kwargs
    Keyword arguments to pass to the UDF.

Notes
-----
It is recommended to use LazyFrame when piping operations, in order
to fully take advantage of query optimization and parallelization.
See :meth:`df.lazy() <polars.DataFrame.lazy>`.

Examples
--------
>>> def cast_str_to_int(data, col_name):
...     return data.with_columns(pl.col(col_name).cast(pl.Int64))
>>> df = pl.DataFrame({"a": [1, 2, 3, 4], "b": ["10", "20", "30", "40"]})
>>> df.pipe(cast_str_to_int, col_name="b")
shape: (4, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 10  │
│ 2   ┆ 20  │
│ 3   ┆ 30  │
│ 4   ┆ 40  │
└─────┴─────┘

>>> df = pl.DataFrame({"b": [1, 2], "a": [3, 4]})
>>> df
shape: (2, 2)
┌─────┬─────┐
│ b   ┆ a   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 3   │
│ 2   ┆ 4   │
└─────┴─────┘
>>> df.pipe(lambda tdf: tdf.select(sorted(tdf.columns)))
shape: (2, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 3   ┆ 1   │
│ 4   ┆ 2   │
└─────┴─────┘
r  )r   functionargsr  s       r   pipeDataFrame.pipeZ  s    B .t.v..r   c                @   [        U[        R                  [        R                  45      (       a  SSKJn  [        U" X5      5      nO$[        U[        5      (       a  U/nO[        U5      nU R                  " S0 U Vs0 s H  owU" X   /UQ70 UD6_M     snD6$ s  snf )u	  
Apply eager functions to columns of a DataFrame.

Users should always prefer :meth:`with_columns` unless they are using
expressions that are only possible on `Series` and not on `Expr`. This is almost
never the case, except for a very select few functions that cannot know the
output datatype without looking at the data.

Parameters
----------
column_names
    The columns to apply the UDF to.
function
    Callable; will receive a column series as the first parameter,
    followed by any given args/kwargs.
*args
    Arguments to pass to the UDF.
**kwargs
    Keyword arguments to pass to the UDF.

Examples
--------
>>> df = pl.DataFrame({"a": [1, 2, 3, 4], "b": ["10", "20", "30", "40"]})
>>> df.map_columns("a", lambda s: s.shrink_dtype())
shape: (4, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i8  ┆ str │
╞═════╪═════╡
│ 1   ┆ 10  │
│ 2   ┆ 20  │
│ 3   ┆ 30  │
│ 4   ┆ 40  │
└─────┴─────┘

>>> df = pl.DataFrame(
...     {
...         "a": ['{"x":"a"}', None, '{"x":"b"}', None],
...         "b": ['{"a":1, "b": true}', None, '{"a":2, "b": false}', None],
...     }
... )
>>> df.map_columns(["a", "b"], lambda s: s.str.json_decode())
shape: (4, 2)
┌───────────┬───────────┐
│ a         ┆ b         │
│ ---       ┆ ---       │
│ struct[1] ┆ struct[2] │
╞═══════════╪═══════════╡
│ {"a"}     ┆ {1,true}  │
│ null      ┆ null      │
│ {"b"}     ┆ {2,false} │
│ null      ┆ null      │
└───────────┴───────────┘
>>> import polars.selectors as cs
>>> df.map_columns(cs.all(), lambda s: s.str.json_decode())
shape: (4, 2)
┌───────────┬───────────┐
│ a         ┆ b         │
│ ---       ┆ ---       │
│ struct[1] ┆ struct[2] │
╞═══════════╪═══════════╡
│ {"a"}     ┆ {1,true}  │
│ null      ┆ null      │
│ {"b"}     ┆ {2,false} │
│ null      ┆ null      │
└───────────┴───────────┘

See Also
--------
with_columns
r   )expand_selectorr  )	r   r   Selectorrs   polars.selectorsr}  r   r   r]  )r   r  rx  ry  r  r}  c_namesr?  s           r   map_columnsDataFrame.map_columns  s    ` lR[["''$:;;8?4>?Gc**#nG<(G   
>EFg(474T4V44gF
 	
Fs   >Bc                     U R                  U R                  R                  X5      5      $ ! [         a    US:  a  SOSnSU SU 3n[	        U5      Sef = f)u=  
Add a row index as the first column in the DataFrame.

Parameters
----------
name
    Name of the index column.
offset
    Start the index at this offset. Cannot be negative.

Notes
-----
The resulting column does not have any special properties. It is a regular
column of type `UInt32` (or `UInt64` in `polars-u64-idx`).

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [1, 3, 5],
...         "b": [2, 4, 6],
...     }
... )
>>> df.with_row_index()
shape: (3, 3)
┌───────┬─────┬─────┐
│ index ┆ a   ┆ b   │
│ ---   ┆ --- ┆ --- │
│ u32   ┆ i64 ┆ i64 │
╞═══════╪═════╪═════╡
│ 0     ┆ 1   ┆ 2   │
│ 1     ┆ 3   ┆ 4   │
│ 2     ┆ 5   ┆ 6   │
└───────┴─────┴─────┘
>>> df.with_row_index("id", offset=1000)
shape: (3, 3)
┌──────┬─────┬─────┐
│ id   ┆ a   ┆ b   │
│ ---  ┆ --- ┆ --- │
│ u32  ┆ i64 ┆ i64 │
╞══════╪═════╪═════╡
│ 1000 ┆ 1   ┆ 2   │
│ 1001 ┆ 3   ┆ 4   │
│ 1002 ┆ 5   ┆ 6   │
└──────┴─────┴─────┘

An index column can also be created using the expressions :func:`int_range`
and :func:`len`.

>>> df.select(
...     pl.int_range(pl.len(), dtype=pl.UInt32).alias("index"),
...     pl.all(),
... )
shape: (3, 3)
┌───────┬─────┬─────┐
│ index ┆ a   ┆ b   │
│ ---   ┆ --- ┆ --- │
│ u32   ┆ i64 ┆ i64 │
╞═══════╪═════╪═════╡
│ 0     ┆ 1   ┆ 2   │
│ 1     ┆ 3   ┆ 4   │
│ 2     ┆ 5   ┆ 6   │
└───────┴─────┴─────┘
r   negativez$greater than the maximum index valuez.`offset` input for `with_row_index` cannot be z, got N)r   r   with_row_indexOverflowErrorr   )r   r  rb  issuer   s        r   r  DataFrame.with_row_index  sb    B	,??488#:#:4#HII 	,"(1*J2XEB5'PVxXCS/t+	,s	   ), )Az`DataFrame.with_row_count` is deprecated; use `with_row_index` instead. Note that the default column name has changed from 'row_nr' to 'index'.c                $    U R                  X5      $ )us  
Add a column at index 0 that counts the rows.

.. deprecated:: 0.20.4
    Use the :meth:`with_row_index` method instead.
    Note that the default column name has changed from 'row_nr' to 'index'.

Parameters
----------
name
    Name of the column to add.
offset
    Start the row count at this offset. Default = 0

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [1, 3, 5],
...         "b": [2, 4, 6],
...     }
... )
>>> df.with_row_count()  # doctest: +SKIP
shape: (3, 3)
┌────────┬─────┬─────┐
│ row_nr ┆ a   ┆ b   │
│ ---    ┆ --- ┆ --- │
│ u32    ┆ i64 ┆ i64 │
╞════════╪═════╪═════╡
│ 0      ┆ 1   ┆ 2   │
│ 1      ┆ 3   ┆ 4   │
│ 2      ┆ 5   ┆ 6   │
└────────┴─────┴─────┘
)r  )r   r  rb  s      r   with_row_countDataFrame.with_row_countB  s    N ""400r   r=  c                  UR                  5        HY  n[        U[        [        R                  [        R
                  45      (       a  M9  S[        U5       SU< SU< S3n[        U5      e   [        U /UQ70 UDSU0D6$ )u  
Start a group by operation.

Parameters
----------
*by
    Column(s) to group by. Accepts expression input. Strings are parsed as
    column names.
maintain_order
    Ensure that the order of the groups is consistent with the input data.
    This is slower than a default group by.
    Settings this to `True` blocks the possibility
    to run on the streaming engine.

    .. note::
        Within each group, the order of rows is always preserved, regardless
        of this argument.
**named_by
    Additional columns to group by, specified as keyword arguments.
    The columns will be renamed to the keyword used.

Returns
-------
GroupBy
    Object which can be used to perform aggregations.

Examples
--------
Group by one column and call `agg` to compute the grouped sum of another
column.

>>> df = pl.DataFrame(
...     {
...         "a": ["a", "b", "a", "b", "c"],
...         "b": [1, 2, 1, 3, 3],
...         "c": [5, 4, 3, 2, 1],
...     }
... )
>>> df.group_by("a").agg(pl.col("b").sum())  # doctest: +IGNORE_RESULT
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════╪═════╡
│ a   ┆ 2   │
│ b   ┆ 5   │
│ c   ┆ 3   │
└─────┴─────┘

Set `maintain_order=True` to ensure the order of the groups is consistent with
the input.

>>> df.group_by("a", maintain_order=True).agg(pl.col("c"))
shape: (3, 2)
┌─────┬───────────┐
│ a   ┆ c         │
│ --- ┆ ---       │
│ str ┆ list[i64] │
╞═════╪═══════════╡
│ a   ┆ [5, 3]    │
│ b   ┆ [4, 2]    │
│ c   ┆ [1]       │
└─────┴───────────┘

Group by multiple columns by passing a list of column names.

>>> df.group_by(["a", "b"]).agg(pl.max("c"))  # doctest: +IGNORE_RESULT
shape: (4, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ a   ┆ 1   ┆ 5   │
│ b   ┆ 2   ┆ 4   │
│ b   ┆ 3   ┆ 2   │
│ c   ┆ 3   ┆ 1   │
└─────┴─────┴─────┘

Or use positional arguments to group by multiple columns in the same way.
Expressions are also accepted.

>>> df.group_by("a", pl.col("b") // 2).agg(pl.col("c").mean())  # doctest: +SKIP
shape: (3, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ f64 │
╞═════╪═════╪═════╡
│ a   ┆ 0   ┆ 4.0 │
│ b   ┆ 1   ┆ 3.0 │
│ c   ┆ 1   ┆ 1.0 │
└─────┴─────┴─────┘

The `GroupBy` object returned by this method is iterable, returning the name
and data of each group.

>>> for name, data in df.group_by("a"):  # doctest: +SKIP
...     print(name)
...     print(data)
('a',)
shape: (2, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ a   ┆ 1   ┆ 5   │
│ a   ┆ 1   ┆ 3   │
└─────┴─────┴─────┘
('b',)
shape: (2, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ b   ┆ 2   ┆ 4   │
│ b   ┆ 3   ┆ 2   │
└─────┴─────┴─────┘
('c',)
shape: (1, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ c   ┆ 3   ┆ 1   │
└─────┴─────┴─────┘
z=Expected Polars expression or object convertible to one, got z&.

Hint: if you tried
    group_by(by=z;)
then you probably want to use this instead:
    group_by(r  r=  )	r  r   r   r   rs   ru   r   r   r<   )r   r=  r`  named_byr  r   s         r   group_byDataFrame.group_byk  s    R __&Eec277BII%>??STXY^T_S` a'',i 0$$)9A	/   n$ ' tLbLHL^LLr   r  r  right)rb  closedr  c          	         [        U UUUUUS9$ )ua  
Create rolling groups based on a temporal or integer column.

Different from a `group_by_dynamic` the windows are now determined by the
individual values and are not of constant intervals. For constant intervals use
:func:`DataFrame.group_by_dynamic`.

If you have a time series `<t_0, t_1, ..., t_n>`, then by default the
windows created will be

    * (t_0 - period, t_0]
    * (t_1 - period, t_1]
    * ...
    * (t_n - period, t_n]

whereas if you pass a non-default `offset`, then the windows will be

    * (t_0 + offset, t_0 + offset + period]
    * (t_1 + offset, t_1 + offset + period]
    * ...
    * (t_n + offset, t_n + offset + period]

The `period` and `offset` arguments are created either from a timedelta, or
by using the following string language:

- 1ns   (1 nanosecond)
- 1us   (1 microsecond)
- 1ms   (1 millisecond)
- 1s    (1 second)
- 1m    (1 minute)
- 1h    (1 hour)
- 1d    (1 calendar day)
- 1w    (1 calendar week)
- 1mo   (1 calendar month)
- 1q    (1 calendar quarter)
- 1y    (1 calendar year)
- 1i    (1 index count)

Or combine them:
"3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

By "calendar day", we mean the corresponding time on the next day (which may
not be 24 hours, due to daylight savings). Similarly for "calendar week",
"calendar month", "calendar quarter", and "calendar year".

.. versionchanged:: 0.20.14
    The `by` parameter was renamed `group_by`.

Parameters
----------
index_column
    Column used to group based on the time window.
    Often of type Date/Datetime.
    This column must be sorted in ascending order (or, if `group_by` is
    specified, then it must be sorted in ascending order within each group).

    In case of a rolling operation on indices, dtype needs to be one of
    {UInt32, UInt64, Int32, Int64}. Note that the first three get temporarily
    cast to Int64, so if performance matters use an Int64 column.
period
    Length of the window - must be non-negative.
offset
    Offset of the window. Default is `-period`.
closed : {'right', 'left', 'both', 'none'}
    Define which sides of the temporal interval are closed (inclusive).
group_by
    Also group by this column/these columns

Returns
-------
RollingGroupBy
    Object you can call `.agg` on to aggregate by groups, the result
    of which will be sorted by `index_column` (but note that if `group_by`
    columns are passed, it will only be sorted within each group).

See Also
--------
group_by_dynamic

Examples
--------
>>> dates = [
...     "2020-01-01 13:45:48",
...     "2020-01-01 16:42:13",
...     "2020-01-01 16:45:09",
...     "2020-01-02 18:12:48",
...     "2020-01-03 19:45:32",
...     "2020-01-08 23:16:43",
... ]
>>> df = pl.DataFrame({"dt": dates, "a": [3, 7, 5, 9, 2, 1]}).with_columns(
...     pl.col("dt").str.strptime(pl.Datetime).set_sorted()
... )
>>> out = df.rolling(index_column="dt", period="2d").agg(
...     [
...         pl.sum("a").alias("sum_a"),
...         pl.min("a").alias("min_a"),
...         pl.max("a").alias("max_a"),
...     ]
... )
>>> assert out["sum_a"].to_list() == [3, 10, 15, 24, 11, 1]
>>> assert out["max_a"].to_list() == [3, 7, 7, 9, 9, 1]
>>> assert out["min_a"].to_list() == [3, 3, 3, 3, 2, 1]
>>> out
shape: (6, 4)
┌─────────────────────┬───────┬───────┬───────┐
│ dt                  ┆ sum_a ┆ min_a ┆ max_a │
│ ---                 ┆ ---   ┆ ---   ┆ ---   │
│ datetime[μs]        ┆ i64   ┆ i64   ┆ i64   │
╞═════════════════════╪═══════╪═══════╪═══════╡
│ 2020-01-01 13:45:48 ┆ 3     ┆ 3     ┆ 3     │
│ 2020-01-01 16:42:13 ┆ 10    ┆ 3     ┆ 7     │
│ 2020-01-01 16:45:09 ┆ 15    ┆ 3     ┆ 7     │
│ 2020-01-02 18:12:48 ┆ 24    ┆ 3     ┆ 9     │
│ 2020-01-03 19:45:32 ┆ 11    ┆ 2     ┆ 9     │
│ 2020-01-08 23:16:43 ┆ 1     ┆ 1     ┆ 1     │
└─────────────────────┴───────┴───────┴───────┘

If you use an index count in `period` or `offset`, then it's based on the
values in `index_column`:

>>> df = pl.DataFrame({"int": [0, 4, 5, 6, 8], "value": [1, 4, 2, 4, 1]})
>>> df.rolling("int", period="3i").agg(pl.col("int").alias("aggregated"))
shape: (5, 2)
┌─────┬────────────┐
│ int ┆ aggregated │
│ --- ┆ ---        │
│ i64 ┆ list[i64]  │
╞═════╪════════════╡
│ 0   ┆ [0]        │
│ 4   ┆ [4]        │
│ 5   ┆ [4, 5]     │
│ 6   ┆ [4, 5, 6]  │
│ 8   ┆ [6, 8]     │
└─────┴────────────┘

If you want the index count to be based on row number, then you may want to
combine `rolling` with :meth:`.with_row_index`.
)index_columnperiodrb  r  r  )r=   )r   r  r  rb  r  r  s         r   rollingDataFrame.rolling   s$    h %
 	
r   leftwindow)r  rb  include_boundariesr  rB  r  start_byc               &    [        U UUUUUUUUU	S9
$ )u5  
Group based on a time value (or index value of type Int32, Int64).

Time windows are calculated and rows are assigned to windows. Different from a
normal group by is that a row can be member of multiple groups.
By default, the windows look like:

- [start, start + period)
- [start + every, start + every + period)
- [start + 2*every, start + 2*every + period)
- ...

where `start` is determined by `start_by`, `offset`, `every`, and the earliest
datapoint. See the `start_by` argument description for details.

.. warning::
    The index column must be sorted in ascending order. If `group_by` is passed, then
    the index column must be sorted in ascending order within each group.

.. versionchanged:: 0.20.14
    The `by` parameter was renamed `group_by`.

Parameters
----------
index_column
    Column used to group based on the time window.
    Often of type Date/Datetime.
    This column must be sorted in ascending order (or, if `group_by` is specified,
    then it must be sorted in ascending order within each group).

    In case of a dynamic group by on indices, dtype needs to be one of
    {Int32, Int64}. Note that Int32 gets temporarily cast to Int64, so if
    performance matters use an Int64 column.
every
    interval of the window
period
    length of the window, if None it will equal 'every'
offset
    offset of the window, does not take effect if `start_by` is 'datapoint'.
    Defaults to zero.
include_boundaries
    Add the lower and upper bound of the window to the "_lower_boundary" and
    "_upper_boundary" columns. This will impact performance because it's harder to
    parallelize
closed : {'left', 'right', 'both', 'none'}
    Define which sides of the temporal interval are closed (inclusive).
label : {'left', 'right', 'datapoint'}
    Define which label to use for the window:

    - 'left': lower boundary of the window
    - 'right': upper boundary of the window
    - 'datapoint': the first value of the index column in the given window.
      If you don't need the label to be at one of the boundaries, choose this
      option for maximum performance
group_by
    Also group by this column/these columns
start_by : {'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}
    The strategy to determine the start of the first window by.

    * 'window': Start by taking the earliest timestamp, truncating it with
      `every`, and then adding `offset`.
      Note that weekly windows start on Monday.
    * 'datapoint': Start from the first encountered data point.
    * a day of the week (only takes effect if `every` contains `'w'`):

      * 'monday': Start the window on the Monday before the first data point.
      * 'tuesday': Start the window on the Tuesday before the first data point.
      * ...
      * 'sunday': Start the window on the Sunday before the first data point.

      The resulting window is then shifted back until the earliest datapoint
      is in or in front of it.

Returns
-------
DynamicGroupBy
    Object you can call `.agg` on to aggregate by groups, the result
    of which will be sorted by `index_column` (but note that if `group_by` columns are
    passed, it will only be sorted within each group).

See Also
--------
rolling

Notes
-----
1) If you're coming from pandas, then

   .. code-block:: python

       # polars
       df.group_by_dynamic("ts", every="1d").agg(pl.col("value").sum())

   is equivalent to

   .. code-block:: python

       # pandas
       df.set_index("ts").resample("D")["value"].sum().reset_index()

   though note that, unlike pandas, polars doesn't add extra rows for empty
   windows. If you need `index_column` to be evenly spaced, then please combine
   with :func:`DataFrame.upsample`.

2) The `every`, `period` and `offset` arguments are created with
   the following string language:

   - 1ns   (1 nanosecond)
   - 1us   (1 microsecond)
   - 1ms   (1 millisecond)
   - 1s    (1 second)
   - 1m    (1 minute)
   - 1h    (1 hour)
   - 1d    (1 calendar day)
   - 1w    (1 calendar week)
   - 1mo   (1 calendar month)
   - 1q    (1 calendar quarter)
   - 1y    (1 calendar year)
   - 1i    (1 index count)

   Or combine them (except in `every`):
   "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

   By "calendar day", we mean the corresponding time on the next day (which may
   not be 24 hours, due to daylight savings). Similarly for "calendar week",
   "calendar month", "calendar quarter", and "calendar year".

   In case of a group_by_dynamic on an integer column, the windows are defined by:

   - "1i"      # length 1
   - "10i"     # length 10

Examples
--------
>>> from datetime import datetime
>>> df = pl.DataFrame(
...     {
...         "time": pl.datetime_range(
...             start=datetime(2021, 12, 16),
...             end=datetime(2021, 12, 16, 3),
...             interval="30m",
...             eager=True,
...         ),
...         "n": range(7),
...     }
... )
>>> df
shape: (7, 2)
┌─────────────────────┬─────┐
│ time                ┆ n   │
│ ---                 ┆ --- │
│ datetime[μs]        ┆ i64 │
╞═════════════════════╪═════╡
│ 2021-12-16 00:00:00 ┆ 0   │
│ 2021-12-16 00:30:00 ┆ 1   │
│ 2021-12-16 01:00:00 ┆ 2   │
│ 2021-12-16 01:30:00 ┆ 3   │
│ 2021-12-16 02:00:00 ┆ 4   │
│ 2021-12-16 02:30:00 ┆ 5   │
│ 2021-12-16 03:00:00 ┆ 6   │
└─────────────────────┴─────┘

Group by windows of 1 hour.

>>> df.group_by_dynamic("time", every="1h", closed="right").agg(pl.col("n"))
shape: (4, 2)
┌─────────────────────┬───────────┐
│ time                ┆ n         │
│ ---                 ┆ ---       │
│ datetime[μs]        ┆ list[i64] │
╞═════════════════════╪═══════════╡
│ 2021-12-15 23:00:00 ┆ [0]       │
│ 2021-12-16 00:00:00 ┆ [1, 2]    │
│ 2021-12-16 01:00:00 ┆ [3, 4]    │
│ 2021-12-16 02:00:00 ┆ [5, 6]    │
└─────────────────────┴───────────┘

The window boundaries can also be added to the aggregation result

>>> df.group_by_dynamic(
...     "time", every="1h", include_boundaries=True, closed="right"
... ).agg(pl.col("n").mean())
shape: (4, 4)
┌─────────────────────┬─────────────────────┬─────────────────────┬─────┐
│ _lower_boundary     ┆ _upper_boundary     ┆ time                ┆ n   │
│ ---                 ┆ ---                 ┆ ---                 ┆ --- │
│ datetime[μs]        ┆ datetime[μs]        ┆ datetime[μs]        ┆ f64 │
╞═════════════════════╪═════════════════════╪═════════════════════╪═════╡
│ 2021-12-15 23:00:00 ┆ 2021-12-16 00:00:00 ┆ 2021-12-15 23:00:00 ┆ 0.0 │
│ 2021-12-16 00:00:00 ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 00:00:00 ┆ 1.5 │
│ 2021-12-16 01:00:00 ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 01:00:00 ┆ 3.5 │
│ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 5.5 │
└─────────────────────┴─────────────────────┴─────────────────────┴─────┘

When closed="left", the window excludes the right end of interval:
[lower_bound, upper_bound)

>>> df.group_by_dynamic("time", every="1h", closed="left").agg(pl.col("n"))
shape: (4, 2)
┌─────────────────────┬───────────┐
│ time                ┆ n         │
│ ---                 ┆ ---       │
│ datetime[μs]        ┆ list[i64] │
╞═════════════════════╪═══════════╡
│ 2021-12-16 00:00:00 ┆ [0, 1]    │
│ 2021-12-16 01:00:00 ┆ [2, 3]    │
│ 2021-12-16 02:00:00 ┆ [4, 5]    │
│ 2021-12-16 03:00:00 ┆ [6]       │
└─────────────────────┴───────────┘

When closed="both" the time values at the window boundaries belong to 2 groups.

>>> df.group_by_dynamic("time", every="1h", closed="both").agg(pl.col("n"))
shape: (4, 2)
┌─────────────────────┬───────────┐
│ time                ┆ n         │
│ ---                 ┆ ---       │
│ datetime[μs]        ┆ list[i64] │
╞═════════════════════╪═══════════╡
│ 2021-12-16 00:00:00 ┆ [0, 1, 2] │
│ 2021-12-16 01:00:00 ┆ [2, 3, 4] │
│ 2021-12-16 02:00:00 ┆ [4, 5, 6] │
│ 2021-12-16 03:00:00 ┆ [6]       │
└─────────────────────┴───────────┘

Dynamic group bys can also be combined with grouping on normal keys

>>> df = df.with_columns(groups=pl.Series(["a", "a", "a", "b", "b", "a", "a"]))
>>> df
shape: (7, 3)
┌─────────────────────┬─────┬────────┐
│ time                ┆ n   ┆ groups │
│ ---                 ┆ --- ┆ ---    │
│ datetime[μs]        ┆ i64 ┆ str    │
╞═════════════════════╪═════╪════════╡
│ 2021-12-16 00:00:00 ┆ 0   ┆ a      │
│ 2021-12-16 00:30:00 ┆ 1   ┆ a      │
│ 2021-12-16 01:00:00 ┆ 2   ┆ a      │
│ 2021-12-16 01:30:00 ┆ 3   ┆ b      │
│ 2021-12-16 02:00:00 ┆ 4   ┆ b      │
│ 2021-12-16 02:30:00 ┆ 5   ┆ a      │
│ 2021-12-16 03:00:00 ┆ 6   ┆ a      │
└─────────────────────┴─────┴────────┘
>>> df.group_by_dynamic(
...     "time",
...     every="1h",
...     closed="both",
...     group_by="groups",
...     include_boundaries=True,
... ).agg(pl.col("n"))
shape: (6, 5)
┌────────┬─────────────────────┬─────────────────────┬─────────────────────┬───────────┐
│ groups ┆ _lower_boundary     ┆ _upper_boundary     ┆ time                ┆ n         │
│ ---    ┆ ---                 ┆ ---                 ┆ ---                 ┆ ---       │
│ str    ┆ datetime[μs]        ┆ datetime[μs]        ┆ datetime[μs]        ┆ list[i64] │
╞════════╪═════════════════════╪═════════════════════╪═════════════════════╪═══════════╡
│ a      ┆ 2021-12-16 00:00:00 ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 00:00:00 ┆ [0, 1, 2] │
│ a      ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 01:00:00 ┆ [2]       │
│ a      ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ [5, 6]    │
│ a      ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 04:00:00 ┆ 2021-12-16 03:00:00 ┆ [6]       │
│ b      ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 01:00:00 ┆ [3, 4]    │
│ b      ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ [4]       │
└────────┴─────────────────────┴─────────────────────┴─────────────────────┴───────────┘

Dynamic group by on an index column

>>> df = pl.DataFrame(
...     {
...         "idx": pl.int_range(0, 6, eager=True),
...         "A": ["A", "A", "B", "B", "B", "C"],
...     }
... )
>>> (
...     df.group_by_dynamic(
...         "idx",
...         every="2i",
...         period="3i",
...         include_boundaries=True,
...         closed="right",
...     ).agg(pl.col("A").alias("A_agg_list"))
... )
shape: (4, 4)
┌─────────────────┬─────────────────┬─────┬─────────────────┐
│ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list      │
│ ---             ┆ ---             ┆ --- ┆ ---             │
│ i64             ┆ i64             ┆ i64 ┆ list[str]       │
╞═════════════════╪═════════════════╪═════╪═════════════════╡
│ -2              ┆ 1               ┆ -2  ┆ ["A", "A"]      │
│ 0               ┆ 3               ┆ 0   ┆ ["A", "B", "B"] │
│ 2               ┆ 5               ┆ 2   ┆ ["B", "B", "C"] │
│ 4               ┆ 7               ┆ 4   ┆ ["C"]           │
└─────────────────┴─────────────────┴─────┴─────────────────┘
)	r  everyr  rb  rB  r  r  r  r  )r;   )
r   r  r  r  rb  r  r  rB  r  r  s
             r   group_by_dynamicDataFrame.group_by_dynamic  s0    f	 %1
 	
r   )r  r=  c                   Uc  / n[        U[        5      (       a  U/n[        U5      nU R                  U R                  R                  X1X$5      5      $ )u
  
Upsample a DataFrame at a regular frequency.

The `every` argument is created with the following string language:

- 1ns   (1 nanosecond)
- 1us   (1 microsecond)
- 1ms   (1 millisecond)
- 1s    (1 second)
- 1m    (1 minute)
- 1h    (1 hour)
- 1d    (1 calendar day)
- 1w    (1 calendar week)
- 1mo   (1 calendar month)
- 1q    (1 calendar quarter)
- 1y    (1 calendar year)
- 1i    (1 index count)

Or combine them:

- "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

By "calendar day", we mean the corresponding time on the next day (which may
not be 24 hours, due to daylight savings). Similarly for "calendar week",
"calendar month", "calendar quarter", and "calendar year".

.. versionchanged:: 0.20.14
    The `by` parameter was renamed `group_by`.

Parameters
----------
time_column
    Time column will be used to determine a date_range.
    Note that this column has to be sorted for the output to make sense.
every
    Interval will start 'every' duration.
group_by
    First group by these columns and then upsample for every group.
maintain_order
    Keep the ordering predictable. This is slower.

Returns
-------
DataFrame
    Result will be sorted by `time_column` (but note that if `group_by` columns
    are passed, it will only be sorted within each group).

Examples
--------
Upsample a DataFrame by a certain interval.

>>> from datetime import datetime
>>> df = pl.DataFrame(
...     {
...         "time": [
...             datetime(2021, 2, 1),
...             datetime(2021, 4, 1),
...             datetime(2021, 5, 1),
...             datetime(2021, 6, 1),
...         ],
...         "groups": ["A", "B", "A", "B"],
...         "values": [0, 1, 2, 3],
...     }
... ).set_sorted("time")
>>> df.upsample(
...     time_column="time", every="1mo", group_by="groups", maintain_order=True
... ).select(pl.all().fill_null(strategy="forward"))
shape: (7, 3)
┌─────────────────────┬────────┬────────┐
│ time                ┆ groups ┆ values │
│ ---                 ┆ ---    ┆ ---    │
│ datetime[μs]        ┆ str    ┆ i64    │
╞═════════════════════╪════════╪════════╡
│ 2021-02-01 00:00:00 ┆ A      ┆ 0      │
│ 2021-03-01 00:00:00 ┆ A      ┆ 0      │
│ 2021-04-01 00:00:00 ┆ A      ┆ 0      │
│ 2021-05-01 00:00:00 ┆ A      ┆ 2      │
│ 2021-04-01 00:00:00 ┆ B      ┆ 1      │
│ 2021-05-01 00:00:00 ┆ B      ┆ 1      │
│ 2021-06-01 00:00:00 ┆ B      ┆ 3      │
└─────────────────────┴────────┴────────┘
)r   r   r#   r   r   upsample)r   time_columnr  r  r=  s        r   r  DataFrame.upsample  sS    v Hh$$ zH(/HHhUK
 	
r   backward_rightleft_onright_ononby_leftby_rightr`  strategyrM  	toleranceallow_parallelforce_parallelcoalesceallow_exact_matchescheck_sortednessr  c               N   [        X5        Ub@  [        U[        [        R                  45      (       d  S[        U5      < 3n[        U5      eO~[        U[        [        R                  45      (       d  S[        U5      < 3n[        U5      e[        U[        [        R                  45      (       d  S[        U5      < 3n[        U5      eSSKJn  U R                  5       R                  UR                  5       UUUUUUUU	U
UUUUUS9R                  UR                  5       S9$ )u0  
Perform an asof join.

This is similar to a left-join except that we match on nearest key rather than
equal keys.

Both DataFrames must be sorted by the `on` key (within each `by` group, if
specified).

For each row in the left DataFrame:

  - A "backward" search selects the last row in the right DataFrame whose
    'on' key is less than or equal to the left's key.

  - A "forward" search selects the first row in the right DataFrame whose
    'on' key is greater than or equal to the left's key.

  - A "nearest" search selects the last row in the right DataFrame whose value
    is nearest to the left's key. String keys are not currently supported for a
    nearest search.

The default is "backward".

Parameters
----------
other
    Lazy DataFrame to join with.
left_on
    Join column of the left DataFrame.
right_on
    Join column of the right DataFrame.
on
    Join column of both DataFrames. If set, `left_on` and `right_on` should be
    None.
by
    Join on these columns before doing asof join
by_left
    Join on these columns before doing asof join
by_right
    Join on these columns before doing asof join
strategy : {'backward', 'forward', 'nearest'}
    Join strategy.
suffix
    Suffix to append to columns with a duplicate name.
tolerance
    Numeric tolerance. By setting this the join will only be done if the near
    keys are within this distance. If an asof join is done on columns of dtype
    "Date", "Datetime", "Duration" or "Time", use either a datetime.timedelta
    object or the following string language:

        - 1ns   (1 nanosecond)
        - 1us   (1 microsecond)
        - 1ms   (1 millisecond)
        - 1s    (1 second)
        - 1m    (1 minute)
        - 1h    (1 hour)
        - 1d    (1 calendar day)
        - 1w    (1 calendar week)
        - 1mo   (1 calendar month)
        - 1q    (1 calendar quarter)
        - 1y    (1 calendar year)

        Or combine them:
        "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

        By "calendar day", we mean the corresponding time on the next day
        (which may not be 24 hours, due to daylight savings). Similarly for
        "calendar week", "calendar month", "calendar quarter", and
        "calendar year".

allow_parallel
    Allow the physical plan to optionally evaluate the computation of both
    DataFrames up to the join in parallel.
force_parallel
    Force the physical plan to evaluate the computation of both DataFrames up to
    the join in parallel.
coalesce
    Coalescing behavior (merging of `on` / `left_on` / `right_on` columns):

    - *True*: Always coalesce join columns.
    - *False*: Never coalesce join columns.

    Note that joining on any other expressions than `col`
    will turn off coalescing.
allow_exact_matches
    Whether exact matches are valid join predicates.

    - If True, allow matching with the same ``on`` value
        (i.e. less-than-or-equal-to / greater-than-or-equal-to)
    - If False, don't match the same ``on`` value
        (i.e., strictly less-than / strictly greater-than).
check_sortedness
    Check the sortedness of the asof keys. If the keys are not sorted Polars
    will error. Currently, sortedness cannot be checked if 'by' groups are
    provided.

Examples
--------
>>> from datetime import date
>>> gdp = pl.DataFrame(
...     {
...         "date": pl.date_range(
...             date(2016, 1, 1),
...             date(2020, 1, 1),
...             "1y",
...             eager=True,
...         ),
...         "gdp": [4164, 4411, 4566, 4696, 4827],
...     }
... )
>>> gdp
shape: (5, 2)
┌────────────┬──────┐
│ date       ┆ gdp  │
│ ---        ┆ ---  │
│ date       ┆ i64  │
╞════════════╪══════╡
│ 2016-01-01 ┆ 4164 │
│ 2017-01-01 ┆ 4411 │
│ 2018-01-01 ┆ 4566 │
│ 2019-01-01 ┆ 4696 │
│ 2020-01-01 ┆ 4827 │
└────────────┴──────┘

>>> population = pl.DataFrame(
...     {
...         "date": [date(2016, 3, 1), date(2018, 8, 1), date(2019, 1, 1)],
...         "population": [82.19, 82.66, 83.12],
...     }
... ).sort("date")
>>> population
shape: (3, 2)
┌────────────┬────────────┐
│ date       ┆ population │
│ ---        ┆ ---        │
│ date       ┆ f64        │
╞════════════╪════════════╡
│ 2016-03-01 ┆ 82.19      │
│ 2018-08-01 ┆ 82.66      │
│ 2019-01-01 ┆ 83.12      │
└────────────┴────────────┘

Note how the dates don't quite match. If we join them using `join_asof` and
`strategy='backward'`, then each date from `population` which doesn't have an
exact match is matched with the closest earlier date from `gdp`:

>>> population.join_asof(gdp, on="date", strategy="backward")
shape: (3, 3)
┌────────────┬────────────┬──────┐
│ date       ┆ population ┆ gdp  │
│ ---        ┆ ---        ┆ ---  │
│ date       ┆ f64        ┆ i64  │
╞════════════╪════════════╪══════╡
│ 2016-03-01 ┆ 82.19      ┆ 4164 │
│ 2018-08-01 ┆ 82.66      ┆ 4566 │
│ 2019-01-01 ┆ 83.12      ┆ 4696 │
└────────────┴────────────┴──────┘

Note how:

- date `2016-03-01` from `population` is matched with `2016-01-01` from `gdp`;
- date `2018-08-01` from `population` is matched with `2018-01-01` from `gdp`.

You can verify this by passing `coalesce=False`:

>>> population.join_asof(gdp, on="date", strategy="backward", coalesce=False)
shape: (3, 4)
┌────────────┬────────────┬────────────┬──────┐
│ date       ┆ population ┆ date_right ┆ gdp  │
│ ---        ┆ ---        ┆ ---        ┆ ---  │
│ date       ┆ f64        ┆ date       ┆ i64  │
╞════════════╪════════════╪════════════╪══════╡
│ 2016-03-01 ┆ 82.19      ┆ 2016-01-01 ┆ 4164 │
│ 2018-08-01 ┆ 82.66      ┆ 2018-01-01 ┆ 4566 │
│ 2019-01-01 ┆ 83.12      ┆ 2019-01-01 ┆ 4696 │
└────────────┴────────────┴────────────┴──────┘

If we instead use `strategy='forward'`, then each date from `population` which
doesn't have an exact match is matched with the closest later date from `gdp`:

>>> population.join_asof(gdp, on="date", strategy="forward")
shape: (3, 3)
┌────────────┬────────────┬──────┐
│ date       ┆ population ┆ gdp  │
│ ---        ┆ ---        ┆ ---  │
│ date       ┆ f64        ┆ i64  │
╞════════════╪════════════╪══════╡
│ 2016-03-01 ┆ 82.19      ┆ 4411 │
│ 2018-08-01 ┆ 82.66      ┆ 4696 │
│ 2019-01-01 ┆ 83.12      ┆ 4696 │
└────────────┴────────────┴──────┘

Note how:

- date `2016-03-01` from `population` is matched with `2017-01-01` from `gdp`;
- date `2018-08-01` from `population` is matched with `2019-01-01` from `gdp`.

Finally, `strategy='nearest'` gives us a mix of the two results above, as each
date from `population` which doesn't have an exact match is matched with the
closest date from `gdp`, regardless of whether it's earlier or later:

>>> population.join_asof(gdp, on="date", strategy="nearest")
shape: (3, 3)
┌────────────┬────────────┬──────┐
│ date       ┆ population ┆ gdp  │
│ ---        ┆ ---        ┆ ---  │
│ date       ┆ f64        ┆ i64  │
╞════════════╪════════════╪══════╡
│ 2016-03-01 ┆ 82.19      ┆ 4164 │
│ 2018-08-01 ┆ 82.66      ┆ 4696 │
│ 2019-01-01 ┆ 83.12      ┆ 4696 │
└────────────┴────────────┴──────┘

Note how:

- date `2016-03-01` from `population` is matched with `2016-01-01` from `gdp`;
- date `2018-08-01` from `population` is matched with `2019-01-01` from `gdp`.

They `by` argument allows joining on another column first, before the asof join.
In this example we join by `country` first, then asof join by date, as above.

>>> gdp_dates = pl.date_range(  # fmt: skip
...     date(2016, 1, 1), date(2020, 1, 1), "1y", eager=True
... )
>>> gdp2 = pl.DataFrame(
...     {
...         "country": ["Germany"] * 5 + ["Netherlands"] * 5,
...         "date": pl.concat([gdp_dates, gdp_dates]),
...         "gdp": [4164, 4411, 4566, 4696, 4827, 784, 833, 914, 910, 909],
...     }
... ).sort("country", "date")
>>>
>>> gdp2
shape: (10, 3)
┌─────────────┬────────────┬──────┐
│ country     ┆ date       ┆ gdp  │
│ ---         ┆ ---        ┆ ---  │
│ str         ┆ date       ┆ i64  │
╞═════════════╪════════════╪══════╡
│ Germany     ┆ 2016-01-01 ┆ 4164 │
│ Germany     ┆ 2017-01-01 ┆ 4411 │
│ Germany     ┆ 2018-01-01 ┆ 4566 │
│ Germany     ┆ 2019-01-01 ┆ 4696 │
│ Germany     ┆ 2020-01-01 ┆ 4827 │
│ Netherlands ┆ 2016-01-01 ┆ 784  │
│ Netherlands ┆ 2017-01-01 ┆ 833  │
│ Netherlands ┆ 2018-01-01 ┆ 914  │
│ Netherlands ┆ 2019-01-01 ┆ 910  │
│ Netherlands ┆ 2020-01-01 ┆ 909  │
└─────────────┴────────────┴──────┘
>>> pop2 = pl.DataFrame(
...     {
...         "country": ["Germany"] * 3 + ["Netherlands"] * 3,
...         "date": [
...             date(2016, 3, 1),
...             date(2018, 8, 1),
...             date(2019, 1, 1),
...             date(2016, 3, 1),
...             date(2018, 8, 1),
...             date(2019, 1, 1),
...         ],
...         "population": [82.19, 82.66, 83.12, 17.11, 17.32, 17.40],
...     }
... ).sort("country", "date")
>>>
>>> pop2
shape: (6, 3)
┌─────────────┬────────────┬────────────┐
│ country     ┆ date       ┆ population │
│ ---         ┆ ---        ┆ ---        │
│ str         ┆ date       ┆ f64        │
╞═════════════╪════════════╪════════════╡
│ Germany     ┆ 2016-03-01 ┆ 82.19      │
│ Germany     ┆ 2018-08-01 ┆ 82.66      │
│ Germany     ┆ 2019-01-01 ┆ 83.12      │
│ Netherlands ┆ 2016-03-01 ┆ 17.11      │
│ Netherlands ┆ 2018-08-01 ┆ 17.32      │
│ Netherlands ┆ 2019-01-01 ┆ 17.4       │
└─────────────┴────────────┴────────────┘
>>> pop2.join_asof(gdp2, by="country", on="date", strategy="nearest")
shape: (6, 4)
┌─────────────┬────────────┬────────────┬──────┐
│ country     ┆ date       ┆ population ┆ gdp  │
│ ---         ┆ ---        ┆ ---        ┆ ---  │
│ str         ┆ date       ┆ f64        ┆ i64  │
╞═════════════╪════════════╪════════════╪══════╡
│ Germany     ┆ 2016-03-01 ┆ 82.19      ┆ 4164 │
│ Germany     ┆ 2018-08-01 ┆ 82.66      ┆ 4696 │
│ Germany     ┆ 2019-01-01 ┆ 83.12      ┆ 4696 │
│ Netherlands ┆ 2016-03-01 ┆ 17.11      ┆ 784  │
│ Netherlands ┆ 2018-08-01 ┆ 17.32      ┆ 910  │
│ Netherlands ┆ 2019-01-01 ┆ 17.4       ┆ 910  │
└─────────────┴────────────┴────────────┴──────┘
z%expected `on` to be str or Expr, got z*expected `left_on` to be str or Expr, got z+expected `right_on` to be str or Expr, got r   r  r  r  )r4   r   r   r   rs   r3   r   r  r  r  	join_asofr  r  )r   r:  r  r  r  r  r  r`  r  rM  r  r  r  r  r  r  r   r  s                     r   r  DataFrame.join_asofC  s#   r	 	$&>b3.11;<OPR<S;VW   n$	 2 gRWW~66BCVW^C_Bbcn$3.99CDWX`DaCden$< IIKY

!!!#--!$7!1  " W=#7#7#9W:'	
r   
join_nullsnulls_equalz1.24zm:m)r  r  rM  validater  r  r=  c                   [        X5        SSKJn  U R                  5       R	                  UR                  5       UUUUUUUU	U
S9
R                  UR                  5       S9$ )u  
Join in SQL-like fashion.

.. versionchanged:: 1.24
    The `join_nulls` parameter was renamed `nulls_equal`.

Parameters
----------
other
    DataFrame to join with.
on
    Name(s) of the join columns in both DataFrames. If set, `left_on` and
    `right_on` should be None. This should not be specified if `how='cross'`.
how : {'inner', 'left', 'right', 'full', 'semi', 'anti', 'cross'}
    Join strategy.

    .. list-table ::
       :header-rows: 0

       * - **inner**
         - *(Default)* Returns rows that have matching values in both tables.
       * - **left**
         - Returns all rows from the left table, and the matched rows from
           the right table.
       * - **full**
         - Returns all rows when there is a match in either left or right.
       * - **cross**
         - Returns the Cartesian product of rows from both tables
       * - **semi**
         - Returns rows from the left table that have a match in the right
           table.
       * - **anti**
         - Returns rows from the left table that have no match in the right
           table.

left_on
    Name(s) of the left join column(s).
right_on
    Name(s) of the right join column(s).
suffix
    Suffix to append to columns with a duplicate name.
validate: {'m:m', 'm:1', '1:m', '1:1'}
    Checks if join is of specified type.

    .. list-table ::
       :header-rows: 0

       * - **m:m**
         - *(Default)* Many-to-many (default). Does not result in checks.
       * - **1:1**
         - One-to-one. Checks if join keys are unique in both left and
           right datasets.
       * - **1:m**
         - One-to-many. Checks if join keys are unique in left dataset.
       * - **m:1**
         - Many-to-one. Check if join keys are unique in right dataset.

    .. note::
        This is currently not supported by the streaming engine.

nulls_equal
    Join on null values. By default null values will never produce matches.
coalesce
    Coalescing behavior (merging of join columns).

    .. list-table ::
       :header-rows: 0

       * - **None**
         - *(Default)* Coalesce unless `how='full'` is specified.
       * - **True**
         - Always coalesce join columns.
       * - **False**
         - Never coalesce join columns.

    .. note::
        Joining on any other expressions than `col`
        will turn off coalescing.
maintain_order : {'none', 'left', 'right', 'left_right', 'right_left'}
    Which DataFrame row order to preserve, if any.
    Do not rely on any observed ordering without explicitly setting this
    parameter, as your code may break in a future release.
    Not specifying any ordering can improve performance.
    Supported for inner, left, right and full joins

    .. list-table ::
       :header-rows: 0

       * - **none**
         - *(Default)* No specific ordering is desired. The ordering might
           differ across Polars versions or even between different runs.
       * - **left**
         - Preserves the order of the left DataFrame.
       * - **right**
         - Preserves the order of the right DataFrame.
       * - **left_right**
         - First preserves the order of the left DataFrame, then the right.
       * - **right_left**
         - First preserves the order of the right DataFrame, then the left.

See Also
--------
join_asof

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6.0, 7.0, 8.0],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> other_df = pl.DataFrame(
...     {
...         "apple": ["x", "y", "z"],
...         "ham": ["a", "b", "d"],
...     }
... )
>>> df.join(other_df, on="ham")
shape: (2, 4)
┌─────┬─────┬─────┬───────┐
│ foo ┆ bar ┆ ham ┆ apple │
│ --- ┆ --- ┆ --- ┆ ---   │
│ i64 ┆ f64 ┆ str ┆ str   │
╞═════╪═════╪═════╪═══════╡
│ 1   ┆ 6.0 ┆ a   ┆ x     │
│ 2   ┆ 7.0 ┆ b   ┆ y     │
└─────┴─────┴─────┴───────┘

>>> df.join(other_df, on="ham", how="full")
shape: (4, 5)
┌──────┬──────┬──────┬───────┬───────────┐
│ foo  ┆ bar  ┆ ham  ┆ apple ┆ ham_right │
│ ---  ┆ ---  ┆ ---  ┆ ---   ┆ ---       │
│ i64  ┆ f64  ┆ str  ┆ str   ┆ str       │
╞══════╪══════╪══════╪═══════╪═══════════╡
│ 1    ┆ 6.0  ┆ a    ┆ x     ┆ a         │
│ 2    ┆ 7.0  ┆ b    ┆ y     ┆ b         │
│ null ┆ null ┆ null ┆ z     ┆ d         │
│ 3    ┆ 8.0  ┆ c    ┆ null  ┆ null      │
└──────┴──────┴──────┴───────┴───────────┘

>>> df.join(other_df, on="ham", how="full", coalesce=True)
shape: (4, 4)
┌──────┬──────┬─────┬───────┐
│ foo  ┆ bar  ┆ ham ┆ apple │
│ ---  ┆ ---  ┆ --- ┆ ---   │
│ i64  ┆ f64  ┆ str ┆ str   │
╞══════╪══════╪═════╪═══════╡
│ 1    ┆ 6.0  ┆ a   ┆ x     │
│ 2    ┆ 7.0  ┆ b   ┆ y     │
│ null ┆ null ┆ d   ┆ z     │
│ 3    ┆ 8.0  ┆ c   ┆ null  │
└──────┴──────┴─────┴───────┘

>>> df.join(other_df, on="ham", how="left")
shape: (3, 4)
┌─────┬─────┬─────┬───────┐
│ foo ┆ bar ┆ ham ┆ apple │
│ --- ┆ --- ┆ --- ┆ ---   │
│ i64 ┆ f64 ┆ str ┆ str   │
╞═════╪═════╪═════╪═══════╡
│ 1   ┆ 6.0 ┆ a   ┆ x     │
│ 2   ┆ 7.0 ┆ b   ┆ y     │
│ 3   ┆ 8.0 ┆ c   ┆ null  │
└─────┴─────┴─────┴───────┘

>>> df.join(other_df, on="ham", how="semi")
shape: (2, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ 6.0 ┆ a   │
│ 2   ┆ 7.0 ┆ b   │
└─────┴─────┴─────┘

>>> df.join(other_df, on="ham", how="anti")
shape: (1, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 3   ┆ 8.0 ┆ c   │
└─────┴─────┴─────┘

>>> df.join(other_df, how="cross")
shape: (9, 5)
┌─────┬─────┬─────┬───────┬───────────┐
│ foo ┆ bar ┆ ham ┆ apple ┆ ham_right │
│ --- ┆ --- ┆ --- ┆ ---   ┆ ---       │
│ i64 ┆ f64 ┆ str ┆ str   ┆ str       │
╞═════╪═════╪═════╪═══════╪═══════════╡
│ 1   ┆ 6.0 ┆ a   ┆ x     ┆ a         │
│ 1   ┆ 6.0 ┆ a   ┆ y     ┆ b         │
│ 1   ┆ 6.0 ┆ a   ┆ z     ┆ d         │
│ 2   ┆ 7.0 ┆ b   ┆ x     ┆ a         │
│ 2   ┆ 7.0 ┆ b   ┆ y     ┆ b         │
│ 2   ┆ 7.0 ┆ b   ┆ z     ┆ d         │
│ 3   ┆ 8.0 ┆ c   ┆ x     ┆ a         │
│ 3   ┆ 8.0 ┆ c   ┆ y     ┆ b         │
│ 3   ┆ 8.0 ┆ c   ┆ z     ┆ d         │
└─────┴─────┴─────┴───────┴───────────┘

Notes
-----
For joining on columns with categorical data, see :class:`polars.StringCache`.
r   r  )
r:  r  r  r  rB  rM  r  r  r  r=  r  )r4   r  r  r  r  r  r  )r   r:  r  rB  r  r  rM  r  r  r  r=  r  s               r   r  DataFrame.join  sj    D 	$&< IIKTjjl!!'!-   W=#7#7#9W:	
r   )rM  c                   [        X5        SSKJn  U R                  5       R                  " UR                  5       /UQ7SU06R                  UR                  5       S9$ )u  
Perform a join based on one or multiple (in)equality predicates.

This performs an inner join, so only rows where all predicates are true
are included in the result, and a row from either DataFrame may be included
multiple times in the result.

.. note::
    The row order of the input DataFrames is not preserved.

.. warning::
    This functionality is experimental. It may be
    changed at any point without it being considered a breaking change.

Parameters
----------
other
    DataFrame to join with.
*predicates
    (In)Equality condition to join the two tables on.
    When a column name occurs in both tables, the proper suffix must
    be applied in the predicate.
suffix
    Suffix to append to columns with a duplicate name.

Examples
--------
Join two dataframes together based on two predicates which get AND-ed together.

>>> east = pl.DataFrame(
...     {
...         "id": [100, 101, 102],
...         "dur": [120, 140, 160],
...         "rev": [12, 14, 16],
...         "cores": [2, 8, 4],
...     }
... )
>>> west = pl.DataFrame(
...     {
...         "t_id": [404, 498, 676, 742],
...         "time": [90, 130, 150, 170],
...         "cost": [9, 13, 15, 16],
...         "cores": [4, 2, 1, 4],
...     }
... )
>>> east.join_where(
...     west,
...     pl.col("dur") < pl.col("time"),
...     pl.col("rev") < pl.col("cost"),
... )
shape: (5, 8)
┌─────┬─────┬─────┬───────┬──────┬──────┬──────┬─────────────┐
│ id  ┆ dur ┆ rev ┆ cores ┆ t_id ┆ time ┆ cost ┆ cores_right │
│ --- ┆ --- ┆ --- ┆ ---   ┆ ---  ┆ ---  ┆ ---  ┆ ---         │
│ i64 ┆ i64 ┆ i64 ┆ i64   ┆ i64  ┆ i64  ┆ i64  ┆ i64         │
╞═════╪═════╪═════╪═══════╪══════╪══════╪══════╪═════════════╡
│ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 498  ┆ 130  ┆ 13   ┆ 2           │
│ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
│ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
│ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
│ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
└─────┴─────┴─────┴───────┴──────┴──────┴──────┴─────────────┘

To OR them together, use a single expression and the `|` operator.

>>> east.join_where(
...     west,
...     (pl.col("dur") < pl.col("time")) | (pl.col("rev") < pl.col("cost")),
... )
shape: (6, 8)
┌─────┬─────┬─────┬───────┬──────┬──────┬──────┬─────────────┐
│ id  ┆ dur ┆ rev ┆ cores ┆ t_id ┆ time ┆ cost ┆ cores_right │
│ --- ┆ --- ┆ --- ┆ ---   ┆ ---  ┆ ---  ┆ ---  ┆ ---         │
│ i64 ┆ i64 ┆ i64 ┆ i64   ┆ i64  ┆ i64  ┆ i64  ┆ i64         │
╞═════╪═════╪═════╪═══════╪══════╪══════╪══════╪═════════════╡
│ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 498  ┆ 130  ┆ 13   ┆ 2           │
│ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
│ 100 ┆ 120 ┆ 12  ┆ 2     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
│ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 676  ┆ 150  ┆ 15   ┆ 1           │
│ 101 ┆ 140 ┆ 14  ┆ 8     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
│ 102 ┆ 160 ┆ 16  ┆ 4     ┆ 742  ┆ 170  ┆ 16   ┆ 4           │
└─────┴─────┴─────┴───────┴──────┴──────┴──────┴─────────────┘
r   r  rM  r  )r4   r  r  r  
join_wherer  r  )r   r:  rM  r  r  s        r   r  DataFrame.join_where  sc    t 	$&< IIKZ

  	 W=#7#7#9W:	
r      )inference_sizec                   U R                   R                  XU5      u  pEU(       a  U R                  U5      $ [        U5      R	                  5       $ )u&  
Apply a custom/user-defined function (UDF) over the rows of the DataFrame.

.. warning::
    This method is much slower than the native expressions API.
    Only use it if you cannot implement your logic otherwise.

The UDF will receive each row as a tuple of values: `udf(row)`.

Implementing logic using a Python function is almost always *significantly*
slower and more memory intensive than implementing the same logic using
the native expression API because:

- The native expression engine runs in Rust; UDFs run in Python.
- Use of Python UDFs forces the DataFrame to be materialized in memory.
- Polars-native expressions can be parallelised (UDFs typically cannot).
- Polars-native expressions can be logically optimised (UDFs cannot).

Wherever possible you should strongly prefer the native expression API
to achieve the best performance.

Parameters
----------
function
    Custom function or lambda.
return_dtype
    Output type of the operation. If none given, Polars tries to infer the type.
inference_size
    Only used in the case when the custom function returns rows.
    This uses the first `n` rows to determine the output schema.

Notes
-----
* The frame-level `map_rows` cannot track column names (as the UDF is a
  black-box that may arbitrarily drop, rearrange, transform, or add new
  columns); if you want to apply a UDF such that column names are preserved,
  you should use the expression-level `map_elements` syntax instead.

* If your function is expensive and you don't want it to be called more than
  once for a given input, consider applying an `@lru_cache` decorator to it.
  If your data is suitable you may achieve *significant* speedups.

Examples
--------
>>> df = pl.DataFrame({"foo": [1, 2, 3], "bar": [-1, 5, 8]})

Return a DataFrame by mapping each row to a tuple:

>>> df.map_rows(lambda t: (t[0] * 2, t[1] * 3))
shape: (3, 2)
┌──────────┬──────────┐
│ column_0 ┆ column_1 │
│ ---      ┆ ---      │
│ i64      ┆ i64      │
╞══════════╪══════════╡
│ 2        ┆ -3       │
│ 4        ┆ 15       │
│ 6        ┆ 24       │
└──────────┴──────────┘

However, it is much better to implement this with a native expression:

>>> df.select(
...     pl.col("foo") * 2,
...     pl.col("bar") * 3,
... )  # doctest: +IGNORE_RESULT

Return a DataFrame with a single column by mapping each row to a scalar:

>>> df.map_rows(lambda t: (t[0] * 2 + t[1]))
shape: (3, 1)
┌─────┐
│ map │
│ --- │
│ i64 │
╞═════╡
│ 1   │
│ 9   │
│ 14  │
└─────┘

In this case it is better to use the following native expression:

>>> df.select(pl.col("foo") * 2 + pl.col("bar"))  # doctest: +IGNORE_RESULT
)r   map_rowsr   r9   to_frame)r   rx  return_dtyper  r=  is_dfs         r   r  DataFrame.map_rows   sB    @ XX&&x~N
??3''#;''))r   )in_placer  c               d   [        U[        5      (       d  UR                  5       nU(       a7  U R                  R	                  U Vs/ s H  o3R
                  PM     sn5        U $ U R                  U R                  R                  U Vs/ s H  o3R
                  PM     sn5      5      $ s  snf s  snf )uh  
Return a new DataFrame grown horizontally by stacking multiple Series to it.

Parameters
----------
columns
    Series to stack.
in_place
    Modify in place.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> x = pl.Series("apple", [10, 20, 30])
>>> df.hstack([x])
shape: (3, 4)
┌─────┬─────┬─────┬───────┐
│ foo ┆ bar ┆ ham ┆ apple │
│ --- ┆ --- ┆ --- ┆ ---   │
│ i64 ┆ i64 ┆ str ┆ i64   │
╞═════╪═════╪═════╪═══════╡
│ 1   ┆ 6   ┆ a   ┆ 10    │
│ 2   ┆ 7   ┆ b   ┆ 20    │
│ 3   ┆ 8   ┆ c   ┆ 30    │
└─────┴─────┴─────┴───────┘
)r   r   r  r   
hstack_mutr   r   hstack)r   r  r  rd  s       r   r  DataFrame.hstacki   s    F '4(())+GHHw 7w!w 78K??488??'3J'QDD'3J#KLL !8 4Ks   B(B-c                   [        X5        U(       a'  U R                  R                  UR                  5        U $ U R                  U R                  R	                  UR                  5      5      $ )ua  
Grow this DataFrame vertically by stacking a DataFrame to it.

Parameters
----------
other
    DataFrame to stack.
in_place
    Modify in place.

See Also
--------
extend

Examples
--------
>>> df1 = pl.DataFrame(
...     {
...         "foo": [1, 2],
...         "bar": [6, 7],
...         "ham": ["a", "b"],
...     }
... )
>>> df2 = pl.DataFrame(
...     {
...         "foo": [3, 4],
...         "bar": [8, 9],
...         "ham": ["c", "d"],
...     }
... )
>>> df1.vstack(df2)
shape: (4, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ 6   ┆ a   │
│ 2   ┆ 7   ┆ b   │
│ 3   ┆ 8   ┆ c   │
│ 4   ┆ 9   ┆ d   │
└─────┴─────┴─────┘
)r4   r   
vstack_mutr   vstack)r   r:  r  s      r   r  DataFrame.vstack   sK    X 	$&HH		*Ktxxuyy9::r   c                f    [        X5        U R                  R                  UR                  5        U $ )u  
Extend the memory backed by this `DataFrame` with the values from `other`.

Different from `vstack` which adds the chunks from `other` to the chunks of
this `DataFrame`, `extend` appends the data from `other` to the underlying
memory locations and thus may cause a reallocation.

If this does not cause a reallocation, the resulting data structure will not
have any extra chunks and thus will yield faster queries.

Prefer `extend` over `vstack` when you want to do a query after a single
append. For instance, during online operations where you add `n` rows and rerun
a query.

Prefer `vstack` over `extend` when you want to append many times before
doing a query. For instance, when you read in multiple files and want to store
them in a single `DataFrame`. In the latter case, finish the sequence of
`vstack` operations with a `rechunk`.

Parameters
----------
other
    DataFrame to vertically add.

Warnings
--------
This method modifies the dataframe in-place. The dataframe is returned for
convenience only.

See Also
--------
vstack

Examples
--------
>>> df1 = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
>>> df2 = pl.DataFrame({"foo": [10, 20, 30], "bar": [40, 50, 60]})
>>> df1.extend(df2)
shape: (6, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 4   │
│ 2   ┆ 5   │
│ 3   ┆ 6   │
│ 10  ┆ 40  │
│ 20  ┆ 50  │
│ 30  ┆ 60  │
└─────┴─────┘
)r4   r   extendrs  s     r   r  DataFrame.extend   s&    j 	$&		"r   c                   SSK Jn  U R                  5       R                  " USU06R	                  UR                  5       S9$ )u  
Remove columns from the dataframe.

Parameters
----------
*columns
    Names of the columns that should be removed from the dataframe.
    Accepts column selector input.
strict
    Validate that all column names exist in the current schema,
    and throw an exception if any do not.

Examples
--------
Drop a single column by passing the name of that column.

>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6.0, 7.0, 8.0],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.drop("ham")
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪═════╡
│ 1   ┆ 6.0 │
│ 2   ┆ 7.0 │
│ 3   ┆ 8.0 │
└─────┴─────┘

Drop multiple columns by passing a list of column names.

>>> df.drop(["bar", "ham"])
shape: (3, 1)
┌─────┐
│ foo │
│ --- │
│ i64 │
╞═════╡
│ 1   │
│ 2   │
│ 3   │
└─────┘

Drop multiple columns by passing a selector.

>>> import polars.selectors as cs
>>> df.drop(cs.numeric())
shape: (3, 1)
┌─────┐
│ ham │
│ --- │
│ str │
╞═════╡
│ a   │
│ b   │
│ c   │
└─────┘

Use positional arguments to drop multiple columns.

>>> df.drop("foo", "ham")
shape: (3, 1)
┌─────┐
│ bar │
│ --- │
│ f64 │
╞═════╡
│ 6.0 │
│ 7.0 │
│ 8.0 │
└─────┘
r   r  r   r  )r  r  r  rb  r  r  )r   r   r  r  s       r   rb  DataFrame.drop !  sG    f 	= IIKT+#)+W=#7#7#9W:	
r   c                J    [        U R                  R                  U5      5      $ )a  
Drop a single column in-place and return the dropped column.

Parameters
----------
name
    Name of the column to drop.

Returns
-------
Series
    The dropped column.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.drop_in_place("ham")
shape: (3,)
Series: 'ham' [str]
[
    "a"
    "b"
    "c"
]
)r9   r   drop_in_placer  s     r   r  DataFrame.drop_in_place[!  s    @ dhh,,T233r   c               ~    SSK Jn  U R                  5       R                  XS9R	                  UR                  5       S9$ )uc
  
Cast DataFrame column(s) to the specified dtype(s).

Parameters
----------
dtypes
    Mapping of column names (or selector) to dtypes, or a single dtype
    to which all columns will be cast.
strict
    Raise if cast is invalid on rows after predicates are pushed down.
    If `False`, invalid casts will produce null values.

Examples
--------
>>> from datetime import date
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6.0, 7.0, 8.0],
...         "ham": [date(2020, 1, 2), date(2021, 3, 4), date(2022, 5, 6)],
...     }
... )

Cast specific frame columns to the specified dtypes:

>>> df.cast({"foo": pl.Float32, "bar": pl.UInt8})
shape: (3, 3)
┌─────┬─────┬────────────┐
│ foo ┆ bar ┆ ham        │
│ --- ┆ --- ┆ ---        │
│ f32 ┆ u8  ┆ date       │
╞═════╪═════╪════════════╡
│ 1.0 ┆ 6   ┆ 2020-01-02 │
│ 2.0 ┆ 7   ┆ 2021-03-04 │
│ 3.0 ┆ 8   ┆ 2022-05-06 │
└─────┴─────┴────────────┘

Cast all frame columns matching one dtype (or dtype group) to another dtype:

>>> df.cast({pl.Date: pl.Datetime})
shape: (3, 3)
┌─────┬─────┬─────────────────────┐
│ foo ┆ bar ┆ ham                 │
│ --- ┆ --- ┆ ---                 │
│ i64 ┆ f64 ┆ datetime[μs]        │
╞═════╪═════╪═════════════════════╡
│ 1   ┆ 6.0 ┆ 2020-01-02 00:00:00 │
│ 2   ┆ 7.0 ┆ 2021-03-04 00:00:00 │
│ 3   ┆ 8.0 ┆ 2022-05-06 00:00:00 │
└─────┴─────┴─────────────────────┘

Use selectors to define the columns being cast:

>>> import polars.selectors as cs
>>> df.cast({cs.numeric(): pl.UInt32, cs.temporal(): pl.String})
shape: (3, 3)
┌─────┬─────┬────────────┐
│ foo ┆ bar ┆ ham        │
│ --- ┆ --- ┆ ---        │
│ u32 ┆ u32 ┆ str        │
╞═════╪═════╪════════════╡
│ 1   ┆ 6   ┆ 2020-01-02 │
│ 2   ┆ 7   ┆ 2021-03-04 │
│ 3   ┆ 8   ┆ 2022-05-06 │
└─────┴─────┴────────────┘

Cast all frame columns to the specified dtype:

>>> df.cast(pl.String).to_dict(as_series=False)
{'foo': ['1', '2', '3'],
 'bar': ['6.0', '7.0', '8.0'],
 'ham': ['2020-01-02', '2021-03-04', '2022-05-06']}
r   r  r  r  )r  r  r  r   r  r  )r   r  r   r  s       r   r   DataFrame.cast}!  s9    h 	= IIKT&T(W=#7#7#9W:	
r   c                Z   US:  a  SU 3n[        U5      eUS:X  a)  U R                  U R                  R                  5       5      $ U R	                  U R
                  R                  5        VVs0 s H*  u  p4U[        R                  " X4S9R                  SU5      _M,     snn5      $ s  snnf )u  
Create an empty (n=0) or `n`-row null-filled (n>0) copy of the DataFrame.

Returns a `n`-row null-filled DataFrame with an identical schema.
`n` can be greater than the current number of rows in the DataFrame.

Parameters
----------
n
    Number of (null-filled) rows to return in the cleared frame.

See Also
--------
clone : Cheap deepcopy/clone.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [None, 2, 3, 4],
...         "b": [0.5, None, 2.5, 13],
...         "c": [True, True, False, None],
...     }
... )
>>> df.clear()
shape: (0, 3)
┌─────┬─────┬──────┐
│ a   ┆ b   ┆ c    │
│ --- ┆ --- ┆ ---  │
│ i64 ┆ f64 ┆ bool │
╞═════╪═════╪══════╡
└─────┴─────┴──────┘

>>> df.clear(n=2)
shape: (2, 3)
┌──────┬──────┬──────┐
│ a    ┆ b    ┆ c    │
│ ---  ┆ ---  ┆ ---  │
│ i64  ┆ f64  ┆ bool │
╞══════╪══════╪══════╡
│ null ┆ null ┆ null │
│ null ┆ null ┆ null │
└──────┴──────┴──────┘
r   z.`n` should be greater than or equal to 0, got )r  r+  N)
r   r   r   clear	__class__r   ra  r   ru   extend_constant)r   rQ  r   nmrg  s        r   r  DataFrame.clear!  s    Z q5B1#FCS/!6??488>>#344~~ #kk//11FB BII20@@qII1
 	
s   .1B'
c                T    U R                  U R                  R                  5       5      $ )u8  
Create a copy of this DataFrame.

This is a cheap operation that does not copy data.

See Also
--------
clear : Create an empty copy of the current DataFrame, with identical
    schema but no data.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [1, 2, 3, 4],
...         "b": [0.5, 4, 10, 13],
...         "c": [True, True, False, True],
...     }
... )
>>> df.clone()
shape: (4, 3)
┌─────┬──────┬───────┐
│ a   ┆ b    ┆ c     │
│ --- ┆ ---  ┆ ---   │
│ i64 ┆ f64  ┆ bool  │
╞═════╪══════╪═══════╡
│ 1   ┆ 0.5  ┆ true  │
│ 2   ┆ 4.0  ┆ true  │
│ 3   ┆ 10.0 ┆ false │
│ 4   ┆ 13.0 ┆ true  │
└─────┴──────┴───────┘
)r   r   r  r  s    r   r  DataFrame.clone"  s    B txx~~/00r   c                t    U R                   R                  5        Vs/ s H  n[        U5      PM     sn$ s  snf )a  
Get the DataFrame as a List of Series.

Examples
--------
>>> df = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
>>> df.get_columns()
[shape: (3,)
Series: 'foo' [i64]
[
        1
        2
        3
], shape: (3,)
Series: 'bar' [i64]
[
        4
        5
        6
]]

>>> df = pl.DataFrame(
...     {
...         "a": [1, 2, 3, 4],
...         "b": [0.5, 4, 10, 13],
...         "c": [True, True, False, True],
...     }
... )
>>> df.get_columns()
[shape: (4,)
Series: 'a' [i64]
[
    1
    2
    3
    4
], shape: (4,)
Series: 'b' [f64]
[
    0.5
    4.0
    10.0
    13.0
], shape: (4,)
Series: 'c' [bool]
[
    true
    true
    false
    true
]]
r   r  r9   r   rd  s     r   r  DataFrame.get_columns6"  s0    j $(88#7#7#9:#9aq	#9:::s   5r  c                   g r   r  r   r  r  s      r   r  DataFrame.get_columnm"  s    UXr   c                   g r   r  r  s      r   r  r  p"  r  r   c                    [        U R                  R                  U5      5      $ ! [         a    U[        L a  e Us $ f = f)aY  
Get a single column by name.

Parameters
----------
name
    String name of the column to retrieve.
default
    Value to return if the column does not exist; if not explicitly set and
    the column is not present a `ColumnNotFoundError` exception is raised.

Returns
-------
Series (or arbitrary default value, if specified).

See Also
--------
to_series

Examples
--------
>>> df = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
>>> df.get_column("foo")
shape: (3,)
Series: 'foo' [i64]
[
    1
    2
    3
]

Missing column handling; can optionally provide an arbitrary default value
to the method (otherwise a `ColumnNotFoundError` exception is raised).

>>> df.get_column("baz", default=pl.Series("baz", ["?", "?", "?"]))
shape: (3,)
Series: 'baz' [str]
[
    "?"
    "?"
    "?"
]
>>> res = df.get_column("baz", default=None)
>>> res is None
True
)r9   r   r  r\   r0   r  s      r   r  r  s"  sA    b	$((--d344" 	*$N	s   #& ??matches_supertypec                   SSK Jn  U R                  5       R                  XX4S9R	                  UR                  5       S9$ )u  
Fill null values using the specified value or strategy.

Parameters
----------
value
    Value used to fill null values.
strategy : {None, 'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}
    Strategy used to fill null values.
limit
    Number of consecutive null values to fill when using the 'forward' or
    'backward' strategy.
matches_supertype
    Fill all matching supertype of the fill `value`.

Returns
-------
DataFrame
    DataFrame with None values replaced by the filling strategy.

See Also
--------
fill_nan

Notes
-----
A null value is not the same as a NaN value.
To fill NaN values, use :func:`fill_nan`.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [1, 2, None, 4],
...         "b": [0.5, 4, None, 13],
...     }
... )
>>> df.fill_null(99)
shape: (4, 2)
┌─────┬──────┐
│ a   ┆ b    │
│ --- ┆ ---  │
│ i64 ┆ f64  │
╞═════╪══════╡
│ 1   ┆ 0.5  │
│ 2   ┆ 4.0  │
│ 99  ┆ 99.0 │
│ 4   ┆ 13.0 │
└─────┴──────┘
>>> df.fill_null(strategy="forward")
shape: (4, 2)
┌─────┬──────┐
│ a   ┆ b    │
│ --- ┆ ---  │
│ i64 ┆ f64  │
╞═════╪══════╡
│ 1   ┆ 0.5  │
│ 2   ┆ 4.0  │
│ 2   ┆ 4.0  │
│ 4   ┆ 13.0 │
└─────┴──────┘

>>> df.fill_null(strategy="max")
shape: (4, 2)
┌─────┬──────┐
│ a   ┆ b    │
│ --- ┆ ---  │
│ i64 ┆ f64  │
╞═════╪══════╡
│ 1   ┆ 0.5  │
│ 2   ┆ 4.0  │
│ 4   ┆ 13.0 │
│ 4   ┆ 13.0 │
└─────┴──────┘

>>> df.fill_null(strategy="zero")
shape: (4, 2)
┌─────┬──────┐
│ a   ┆ b    │
│ --- ┆ ---  │
│ i64 ┆ f64  │
╞═════╪══════╡
│ 1   ┆ 0.5  │
│ 2   ┆ 4.0  │
│ 0   ┆ 0.0  │
│ 4   ┆ 13.0 │
└─────┴──────┘
r   r  r  r  )r  r  r  	fill_nullr  r  )r   r  r  rm  r  r  s         r   r  DataFrame.fill_null"  s;    @ 	= IIKYuYSW=#7#7#9W:	
r   c                    SSK Jn  U R                  5       R                  U5      R	                  UR                  5       S9$ )u9  
Fill floating point NaN values by an Expression evaluation.

Parameters
----------
value
    Value used to fill NaN values.

Returns
-------
DataFrame
    DataFrame with NaN values replaced by the given value.

See Also
--------
fill_null

Notes
-----
A NaN value is not the same as a null value.
To fill null values, use :func:`fill_null`.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [1.5, 2, float("nan"), 4],
...         "b": [0.5, 4, float("nan"), 13],
...     }
... )
>>> df.fill_nan(99)
shape: (4, 2)
┌──────┬──────┐
│ a    ┆ b    │
│ ---  ┆ ---  │
│ f64  ┆ f64  │
╞══════╪══════╡
│ 1.5  ┆ 0.5  │
│ 2.0  ┆ 4.0  │
│ 99.0 ┆ 99.0 │
│ 4.0  ┆ 13.0 │
└──────┴──────┘
r   r  r  )r  r  r  fill_nanr  r  )r   r  r  s      r   r  DataFrame.fill_nan#  s6    X 	=yy{##E*22AUAUAW2XXr   c                    SSK Jn  U R                  5       R                  " U/UQ76 R	                  UR                  5       S9$ )u  
Explode the dataframe to long format by exploding the given columns.

Parameters
----------
columns
    Column names, expressions, or a selector defining them. The underlying
    columns being exploded must be of the `List` or `Array` data type.
*more_columns
    Additional names of columns to explode, specified as positional arguments.

Returns
-------
DataFrame

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "letters": ["a", "a", "b", "c"],
...         "numbers": [[1], [2, 3], [4, 5], [6, 7, 8]],
...     }
... )
>>> df
shape: (4, 2)
┌─────────┬───────────┐
│ letters ┆ numbers   │
│ ---     ┆ ---       │
│ str     ┆ list[i64] │
╞═════════╪═══════════╡
│ a       ┆ [1]       │
│ a       ┆ [2, 3]    │
│ b       ┆ [4, 5]    │
│ c       ┆ [6, 7, 8] │
└─────────┴───────────┘
>>> df.explode("numbers")
shape: (8, 2)
┌─────────┬─────────┐
│ letters ┆ numbers │
│ ---     ┆ ---     │
│ str     ┆ i64     │
╞═════════╪═════════╡
│ a       ┆ 1       │
│ a       ┆ 2       │
│ a       ┆ 3       │
│ b       ┆ 4       │
│ b       ┆ 5       │
│ c       ┆ 6       │
│ c       ┆ 7       │
│ c       ┆ 8       │
└─────────┴─────────┘
r   r  r  )r  r  r  exploder  r  r   r  more_columnsr  s       r   r  DataFrame.explodeC#  sG    r 	= IIKW-+-W=#7#7#9W:	
r   r  )r  r  aggregate_functionr=  sort_columnsr  c                  [        X5      nUb  [        X5      nUb  [        X5      n[        U[        5      (       Ga  US:X  a/  [        R                  " 5       R                  5       R                  nGOUS:X  a/  [        R                  " 5       R                  5       R                  nGOUS:X  a/  [        R                  " 5       R                  5       R                  nGOJUS:X  a/  [        R                  " 5       R                  5       R                  nGOUS:X  a.  [        R                  " 5       R                  5       R                  nOUS:X  a.  [        R                  " 5       R                  5       R                  nOUS:X  a.  [        R                  " 5       R                  5       R                  nOyUS	:X  a   [        R                  " 5       R                  nOSUS
:X  a*  [        SSS9  [        R                  " 5       R                  nO#SU< 3n	[        U	5      eUc  SnOUR                  nU R!                  U R"                  R%                  UUUUUUU5      5      $ )u  
Create a spreadsheet-style pivot table as a DataFrame.

Only available in eager mode. See "Examples" section below for how to do a
"lazy pivot" if you know the unique column values in advance.

.. versionchanged:: 1.0.0
    The `columns` parameter was renamed `on`.

Parameters
----------
on
    The column(s) whose values will be used as the new columns of the output
    DataFrame.
index
    The column(s) that remain from the input to the output. The output DataFrame will have one row
    for each unique combination of the `index`'s values.
    If None, all remaining columns not specified on `on` and `values` will be used. At least one
    of `index` and `values` must be specified.
values
    The existing column(s) of values which will be moved under the new columns from index. If an
    aggregation is specified, these are the values on which the aggregation will be computed.
    If None, all remaining columns not specified on `on` and `index` will be used.
    At least one of `index` and `values` must be specified.
aggregate_function
    Choose from:

    - None: no aggregation takes place, will raise error if multiple values are in group.
    - A predefined aggregate function string, one of
      {'min', 'max', 'first', 'last', 'sum', 'mean', 'median', 'len'}
    - An expression to do the aggregation. The expression can only access data from the respective
      'values' columns as generated by pivot, through `pl.element()`.
maintain_order
    Ensure the values of `index` are sorted by discovery order.
sort_columns
    Sort the transposed columns by name. Default is by order of discovery.
separator
    Used as separator/delimiter in generated column names in case of multiple
    `values` columns.

Returns
-------
DataFrame

Notes
-----
In some other frameworks, you might know this operation as `pivot_wider`.

Examples
--------
You can use `pivot` to reshape a dataframe from "long" to "wide" format.

For example, suppose we have a dataframe of test scores achieved by some
students, where each row represents a distinct test.

>>> df = pl.DataFrame(
...     {
...         "name": ["Cady", "Cady", "Karen", "Karen"],
...         "subject": ["maths", "physics", "maths", "physics"],
...         "test_1": [98, 99, 61, 58],
...         "test_2": [100, 100, 60, 60],
...     }
... )
>>> df
shape: (4, 4)
┌───────┬─────────┬────────┬────────┐
│ name  ┆ subject ┆ test_1 ┆ test_2 │
│ ---   ┆ ---     ┆ ---    ┆ ---    │
│ str   ┆ str     ┆ i64    ┆ i64    │
╞═══════╪═════════╪════════╪════════╡
│ Cady  ┆ maths   ┆ 98     ┆ 100    │
│ Cady  ┆ physics ┆ 99     ┆ 100    │
│ Karen ┆ maths   ┆ 61     ┆ 60     │
│ Karen ┆ physics ┆ 58     ┆ 60     │
└───────┴─────────┴────────┴────────┘

Using `pivot`, we can reshape so we have one row per student, with different
subjects as columns, and their `test_1` scores as values:

>>> df.pivot("subject", index="name", values="test_1")
shape: (2, 3)
┌───────┬───────┬─────────┐
│ name  ┆ maths ┆ physics │
│ ---   ┆ ---   ┆ ---     │
│ str   ┆ i64   ┆ i64     │
╞═══════╪═══════╪═════════╡
│ Cady  ┆ 98    ┆ 99      │
│ Karen ┆ 61    ┆ 58      │
└───────┴───────┴─────────┘

You can use selectors too - here we include all test scores in the pivoted table:

>>> import polars.selectors as cs
>>> df.pivot("subject", values=cs.starts_with("test"))
shape: (2, 5)
┌───────┬──────────────┬────────────────┬──────────────┬────────────────┐
│ name  ┆ test_1_maths ┆ test_1_physics ┆ test_2_maths ┆ test_2_physics │
│ ---   ┆ ---          ┆ ---            ┆ ---          ┆ ---            │
│ str   ┆ i64          ┆ i64            ┆ i64          ┆ i64            │
╞═══════╪══════════════╪════════════════╪══════════════╪════════════════╡
│ Cady  ┆ 98           ┆ 99             ┆ 100          ┆ 100            │
│ Karen ┆ 61           ┆ 58             ┆ 60           ┆ 60             │
└───────┴──────────────┴────────────────┴──────────────┴────────────────┘

If you end up with multiple values per cell, you can specify how to aggregate
them with `aggregate_function`:

>>> df = pl.DataFrame(
...     {
...         "ix": [1, 1, 2, 2, 1, 2],
...         "col": ["a", "a", "a", "a", "b", "b"],
...         "foo": [0, 1, 2, 2, 7, 1],
...         "bar": [0, 2, 0, 0, 9, 4],
...     }
... )
>>> df.pivot("col", index="ix", aggregate_function="sum")
shape: (2, 5)
┌─────┬───────┬───────┬───────┬───────┐
│ ix  ┆ foo_a ┆ foo_b ┆ bar_a ┆ bar_b │
│ --- ┆ ---   ┆ ---   ┆ ---   ┆ ---   │
│ i64 ┆ i64   ┆ i64   ┆ i64   ┆ i64   │
╞═════╪═══════╪═══════╪═══════╪═══════╡
│ 1   ┆ 1     ┆ 7     ┆ 2     ┆ 9     │
│ 2   ┆ 4     ┆ 1     ┆ 0     ┆ 4     │
└─────┴───────┴───────┴───────┴───────┘

You can also pass a custom aggregation function using
:meth:`polars.element`:

>>> df = pl.DataFrame(
...     {
...         "col1": ["a", "a", "a", "b", "b", "b"],
...         "col2": ["x", "x", "x", "x", "y", "y"],
...         "col3": [6, 7, 3, 2, 5, 7],
...     }
... )
>>> df.pivot(
...     "col2",
...     index="col1",
...     values="col3",
...     aggregate_function=pl.element().tanh().mean(),
... )
shape: (2, 3)
┌──────┬──────────┬──────────┐
│ col1 ┆ x        ┆ y        │
│ ---  ┆ ---      ┆ ---      │
│ str  ┆ f64      ┆ f64      │
╞══════╪══════════╪══════════╡
│ a    ┆ 0.998347 ┆ null     │
│ b    ┆ 0.964028 ┆ 0.999954 │
└──────┴──────────┴──────────┘

Note that `pivot` is only available in eager mode. If you know the unique
column values in advance, you can use :meth:`polars.LazyFrame.group_by` to
get the same result as above in lazy mode:

>>> index = pl.col("col1")
>>> on = pl.col("col2")
>>> values = pl.col("col3")
>>> unique_column_values = ["x", "y"]
>>> aggregate_function = lambda col: col.tanh().mean()
>>> df.lazy().group_by(index).agg(
...     aggregate_function(values.filter(on == value)).alias(value)
...     for value in unique_column_values
... ).collect()  # doctest: +IGNORE_RESULT
shape: (2, 3)
┌──────┬──────────┬──────────┐
│ col1 ┆ x        ┆ y        │
│ ---  ┆ ---      ┆ ---      │
│ str  ┆ f64      ┆ f64      │
╞══════╪══════════╪══════════╡
│ a    ┆ 0.998347 ┆ null     │
│ b    ┆ 0.964028 ┆ 0.999954 │
└──────┴──────────┴──────────┘
Nfirstsumr+  r*  meanmedianlastr  countzd`aggregate_function='count'` input for `pivot` is deprecated. Please use `aggregate_function='len'`.z0.20.5r  z1invalid input for `aggregate_function` argument: )rf   r   r   rK  elementr  _pyexprr  r+  r*  r  r  r  r  r&   r   r   r   
pivot_expr)
r   r  r  r  r  r=  r  r  aggregate_exprr   s
             r   pivotDataFrame.pivot#  s   v t(&t4F%d2E(#..!W,!"!2!2!4!<!<#u,!"!2!:!:#u,!"!2!:!:#u,!"!2!:!:#v-!"!1!1!3!;!;#x/!"!3!3!5!=!=#v-!"!1!1!3!;!;#u,!"#w.)>$
 "#IJ\I_` o%'!N/77NHH

 
	
r   )r  variable_name
value_namec                   Uc  / O
[        X5      nUc  / O
[        X5      nU R                  U R                  R                  XXC5      5      $ )u  
Unpivot a DataFrame from wide to long format.

Optionally leaves identifiers set.

This function is useful to massage a DataFrame into a format where one or more
columns are identifier variables (index) while all other columns, considered
measured variables (on), are "unpivoted" to the row axis leaving just
two non-identifier columns, 'variable' and 'value'.

Parameters
----------
on
    Column(s) or selector(s) to use as values variables; if `on`
    is empty all columns that are not in `index` will be used.
index
    Column(s) or selector(s) to use as identifier variables.
variable_name
    Name to give to the `variable` column. Defaults to "variable"
value_name
    Name to give to the `value` column. Defaults to "value"

Notes
-----
If you're coming from pandas, this is similar to `pandas.DataFrame.melt`,
but with `index` replacing `id_vars` and `on` replacing `value_vars`.
In other frameworks, you might know this operation as `pivot_longer`.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": ["x", "y", "z"],
...         "b": [1, 3, 5],
...         "c": [2, 4, 6],
...     }
... )
>>> import polars.selectors as cs
>>> df.unpivot(cs.numeric(), index="a")
shape: (6, 3)
┌─────┬──────────┬───────┐
│ a   ┆ variable ┆ value │
│ --- ┆ ---      ┆ ---   │
│ str ┆ str      ┆ i64   │
╞═════╪══════════╪═══════╡
│ x   ┆ b        ┆ 1     │
│ y   ┆ b        ┆ 3     │
│ z   ┆ b        ┆ 5     │
│ x   ┆ c        ┆ 2     │
│ y   ┆ c        ┆ 4     │
│ z   ┆ c        ┆ 6     │
└─────┴──────────┴───────┘
)rf   r   r   unpivot)r   r  r  r  r  s        r   r	  DataFrame.unpivotq$  sG    z :R#4T#>m):4)Gtxx//:UVVr   vertical)rB  r  fill_valuesc                 ^ SSK nUb  U R                  U5      OU nUR                  nUS:X  a  UnUR                  Xx-  5      n	OUn	UR                  Xy-  5      nX-  U-
  =m(       a]  [	        U[
        5      (       d$  [        UR                  5       V
s/ s H  oPM     nn
UR                  U4S j[        Xd5       5       5      nUS:X  aW  UR                  [        R                  " SX-  SS9U	-  R                  S5      5      R                  S5      R                  S5      nUR                  UR!                  U	5      5      S	-   nU VVs/ s Ha  n[        U	5        HN  nUR#                  X-  U5      R                  UR$                  S
-   ['        U5      R)                  U5      -   5      PMP     Mc     nnn[+        U5      $ s  sn
f s  snnf )u  
Unstack a long table to a wide form without doing an aggregation.

This can be much faster than a pivot, because it can skip the grouping phase.

Parameters
----------
step
    Number of rows in the unstacked frame.
how : { 'vertical', 'horizontal' }
    Direction of the unstack.
columns
    Column name(s) or selector(s) to include in the operation.
    If set to `None` (default), use all columns.
fill_values
    Fill values that don't fit the new size with this value.

Examples
--------
>>> from string import ascii_uppercase
>>> df = pl.DataFrame(
...     {
...         "x": list(ascii_uppercase[0:8]),
...         "y": pl.int_range(1, 9, eager=True),
...     }
... ).with_columns(
...     z=pl.int_ranges(pl.col("y"), pl.col("y") + 2, dtype=pl.UInt8),
... )
>>> df
shape: (8, 3)
┌─────┬─────┬──────────┐
│ x   ┆ y   ┆ z        │
│ --- ┆ --- ┆ ---      │
│ str ┆ i64 ┆ list[u8] │
╞═════╪═════╪══════════╡
│ A   ┆ 1   ┆ [1, 2]   │
│ B   ┆ 2   ┆ [2, 3]   │
│ C   ┆ 3   ┆ [3, 4]   │
│ D   ┆ 4   ┆ [4, 5]   │
│ E   ┆ 5   ┆ [5, 6]   │
│ F   ┆ 6   ┆ [6, 7]   │
│ G   ┆ 7   ┆ [7, 8]   │
│ H   ┆ 8   ┆ [8, 9]   │
└─────┴─────┴──────────┘
>>> df.unstack(step=4, how="vertical")
shape: (4, 6)
┌─────┬─────┬─────┬─────┬──────────┬──────────┐
│ x_0 ┆ x_1 ┆ y_0 ┆ y_1 ┆ z_0      ┆ z_1      │
│ --- ┆ --- ┆ --- ┆ --- ┆ ---      ┆ ---      │
│ str ┆ str ┆ i64 ┆ i64 ┆ list[u8] ┆ list[u8] │
╞═════╪═════╪═════╪═════╪══════════╪══════════╡
│ A   ┆ E   ┆ 1   ┆ 5   ┆ [1, 2]   ┆ [5, 6]   │
│ B   ┆ F   ┆ 2   ┆ 6   ┆ [2, 3]   ┆ [6, 7]   │
│ C   ┆ G   ┆ 3   ┆ 7   ┆ [3, 4]   ┆ [7, 8]   │
│ D   ┆ H   ┆ 4   ┆ 8   ┆ [4, 5]   ┆ [8, 9]   │
└─────┴─────┴─────┴─────┴──────────┴──────────┘
>>> df.unstack(step=2, how="horizontal")
shape: (4, 6)
┌─────┬─────┬─────┬─────┬──────────┬──────────┐
│ x_0 ┆ x_1 ┆ y_0 ┆ y_1 ┆ z_0      ┆ z_1      │
│ --- ┆ --- ┆ --- ┆ --- ┆ ---      ┆ ---      │
│ str ┆ str ┆ i64 ┆ i64 ┆ list[u8] ┆ list[u8] │
╞═════╪═════╪═════╪═════╪══════════╪══════════╡
│ A   ┆ B   ┆ 1   ┆ 2   ┆ [1, 2]   ┆ [2, 3]   │
│ C   ┆ D   ┆ 3   ┆ 4   ┆ [3, 4]   ┆ [4, 5]   │
│ E   ┆ F   ┆ 5   ┆ 6   ┆ [5, 6]   ┆ [6, 7]   │
│ G   ┆ H   ┆ 7   ┆ 8   ┆ [7, 8]   ┆ [8, 9]   │
└─────┴─────┴─────┴─────┴──────────┴──────────┘
>>> import polars.selectors as cs
>>> df.unstack(step=5, columns=cs.numeric(), fill_values=0)
shape: (5, 2)
┌─────┬─────┐
│ y_0 ┆ y_1 │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 6   │
│ 2   ┆ 7   │
│ 3   ┆ 8   │
│ 4   ┆ 0   │
│ 5   ┆ 0   │
└─────┴─────┘
r   Nr  c              3  L   >#    U  H  u  pUR                  UT5      v   M     g 7fr   )r  )rq  rd  	next_filln_fills      r   rs  $DataFrame.unstack.<locals>.<genexpr>%  s*      $8LA !!)V44$8s   !$r@  T)rF  __sort_orderr  r  )mathrJ  r  ceilr   r   rY  r   r#  r]  rK  	int_rangerZ  r@  rb  r_  log10ra  r  r   zfillr   )r   steprB  r  r  r  r   r  r  n_colsr  	zfill_valrd  	slice_nbrslicesr  s                  @r   unstackDataFrame.unstack$  s   v 	%,%8T[[!d*FYYv/FFYYv/F_v--6-k40049"((ODOq{OD $'$8 B
 ,[[FO4@6IPP&
 n%n%  JJtzz&12Q6	
 	
 "6]		 GGI&/55s9~33I>> +	 	 	 
   7 E&
s   F?	A(G)r=  include_keyas_dictc                   g r   r  r   r`  r=  r  r   rA  s         r   rS  DataFrame.partition_by9%  s     r   )r=  r  c                   g r   r  r"  s         r   rS  r#  C%  s     ,/r   c                   g r   r  r"  s         r   rS  r#  M%  s     >Ar   c                  [        X/UQ76 nU R                  R                  XbU5       Vs/ s H  nU R                  U5      PM     nnU(       a  U(       a0  U V	s/ s H"  oR	                  U5      R                  S5      PM$     n
n	O@U(       d  Sn[        U5      eU R	                  U5      R                  SS9R                  5       n
[        [        X5      5      $ U$ s  snf s  sn	f )uZ  
Group by the given columns and return the groups as separate dataframes.

Parameters
----------
by
    Column name(s) or selector(s) to group by.
*more_by
    Additional names of columns to group by, specified as positional arguments.
maintain_order
    Ensure that the order of the groups is consistent with the input data.
    This is slower than a default partition by operation.
include_key
    Include the columns used to partition the DataFrame in the output.
as_dict
    Return a dictionary instead of a list. The dictionary keys are tuples of
    the distinct group values that identify each group.

Examples
--------
Pass a single column name to partition by that column.

>>> df = pl.DataFrame(
...     {
...         "a": ["a", "b", "a", "b", "c"],
...         "b": [1, 2, 1, 3, 3],
...         "c": [5, 4, 3, 2, 1],
...     }
... )
>>> df.partition_by("a")  # doctest: +IGNORE_RESULT
[shape: (2, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ a   ┆ 1   ┆ 5   │
│ a   ┆ 1   ┆ 3   │
└─────┴─────┴─────┘,
shape: (2, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ b   ┆ 2   ┆ 4   │
│ b   ┆ 3   ┆ 2   │
└─────┴─────┴─────┘,
shape: (1, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ c   ┆ 3   ┆ 1   │
└─────┴─────┴─────┘]

Partition by multiple columns by either passing a list of column names, or by
specifying each column name as a positional argument.

>>> df.partition_by("a", "b")  # doctest: +IGNORE_RESULT
[shape: (2, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ a   ┆ 1   ┆ 5   │
│ a   ┆ 1   ┆ 3   │
└─────┴─────┴─────┘,
shape: (1, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ b   ┆ 2   ┆ 4   │
└─────┴─────┴─────┘,
shape: (1, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ b   ┆ 3   ┆ 2   │
└─────┴─────┴─────┘,
shape: (1, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ c   ┆ 3   ┆ 1   │
└─────┴─────┴─────┘]

Return the partitions as a dictionary by specifying `as_dict=True`.

>>> import polars.selectors as cs
>>> df.partition_by(cs.string(), as_dict=True)  # doctest: +IGNORE_RESULT
{('a',): shape: (2, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ a   ┆ 1   ┆ 5   │
│ a   ┆ 1   ┆ 3   │
└─────┴─────┴─────┘,
('b',): shape: (2, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ b   ┆ 2   ┆ 4   │
│ b   ┆ 3   ┆ 2   │
└─────┴─────┴─────┘,
('c',): shape: (1, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ c   ┆ 3   ┆ 1   │
└─────┴─────┴─────┘}
r   zVcannot use `partition_by` with `maintain_order=False, include_key=False, as_dict=True`Tr  )rf   r   rS  r   rJ  r  r   uniquer-  r   r#  )r   r`  r=  r  r   rA  	by_parsedr   
partitionspr  r   s               r   rS  r#  W%  s    L &d99	 xx,,YT
T OOC T 	 

 =GHZ),003ZH%rC$S/)I.55T5JOOQE.//!
 Is   C)C!
fill_valuec               ~    SSK Jn  U R                  5       R                  XS9R	                  UR                  5       S9$ )u  
Shift values by the given number of indices.

Parameters
----------
n
    Number of indices to shift forward. If a negative value is passed, values
    are shifted in the opposite direction instead.
fill_value
    Fill the resulting null values with this value. Accepts scalar expression
    input. Non-expression inputs are parsed as literals.

Notes
-----
This method is similar to the `LAG` operation in SQL when the value for `n`
is positive. With a negative value for `n`, it is similar to `LEAD`.

Examples
--------
By default, values are shifted forward by one index.

>>> df = pl.DataFrame(
...     {
...         "a": [1, 2, 3, 4],
...         "b": [5, 6, 7, 8],
...     }
... )
>>> df.shift()
shape: (4, 2)
┌──────┬──────┐
│ a    ┆ b    │
│ ---  ┆ ---  │
│ i64  ┆ i64  │
╞══════╪══════╡
│ null ┆ null │
│ 1    ┆ 5    │
│ 2    ┆ 6    │
│ 3    ┆ 7    │
└──────┴──────┘

Pass a negative value to shift in the opposite direction instead.

>>> df.shift(-2)
shape: (4, 2)
┌──────┬──────┐
│ a    ┆ b    │
│ ---  ┆ ---  │
│ i64  ┆ i64  │
╞══════╪══════╡
│ 3    ┆ 7    │
│ 4    ┆ 8    │
│ null ┆ null │
│ null ┆ null │
└──────┴──────┘

Specify `fill_value` to fill the resulting null values.

>>> df.shift(-2, fill_value=100)
shape: (4, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 3   ┆ 7   │
│ 4   ┆ 8   │
│ 100 ┆ 100 │
│ 100 ┆ 100 │
└─────┴─────┘
r   r  r+  r  )r  r  r  shiftr  r  )r   rQ  r,  r  s       r   r.  DataFrame.shift%  s9    N 	= IIKU1U,W=#7#7#9W:	
r   c                H    [        U R                  R                  5       5      $ )u  
Get a mask of all duplicated rows in this DataFrame.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [1, 2, 3, 1],
...         "b": ["x", "y", "z", "x"],
...     }
... )
>>> df.is_duplicated()
shape: (4,)
Series: '' [bool]
[
        true
        false
        false
        true
]

This mask can be used to visualize the duplicated lines like this:

>>> df.filter(df.is_duplicated())
shape: (2, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ str │
╞═════╪═════╡
│ 1   ┆ x   │
│ 1   ┆ x   │
└─────┴─────┘
)r9   r   is_duplicatedr  s    r   r1  DataFrame.is_duplicated@&  s    F dhh,,.//r   c                H    [        U R                  R                  5       5      $ )up  
Get a mask of all unique rows in this DataFrame.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [1, 2, 3, 1],
...         "b": ["x", "y", "z", "x"],
...     }
... )
>>> df.is_unique()
shape: (4,)
Series: '' [bool]
[
        false
        true
        true
        false
]

This mask can be used to visualize the unique lines like this:

>>> df.filter(df.is_unique())
shape: (2, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ str │
╞═════╪═════╡
│ 2   ┆ y   │
│ 3   ┆ z   │
└─────┴─────┘
)r9   r   	is_uniquer  s    r   r4  DataFrame.is_uniquee&  s    F dhh((*++r   c                H    [        U R                  R                  5       5      $ )aS  
Start a lazy query from this point. This returns a `LazyFrame` object.

Operations on a `LazyFrame` are not executed until this is triggered
by calling one of:

* :meth:`.collect() <polars.LazyFrame.collect>`
    (run on all data)
* :meth:`.explain() <polars.LazyFrame.explain>`
    (print the query plan)
* :meth:`.show_graph() <polars.LazyFrame.show_graph>`
    (show the query plan as graphviz graph)
* :meth:`.collect_schema() <polars.LazyFrame.collect_schema>`
    (return the final frame schema)

Lazy operations are recommended because they allow for query optimization and
additional parallelism.

Returns
-------
LazyFrame

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [None, 2, 3, 4],
...         "b": [0.5, None, 2.5, 13],
...         "c": [True, True, False, None],
...     }
... )
>>> df.lazy()
<LazyFrame at ...>
)r8   r   r  r  s    r   r  DataFrame.lazy&  s    F ((r   c                    SSK Jn  U R                  5       R                  " U0 UD6R	                  UR                  5       S9$ )u  
Select columns from this DataFrame.

Parameters
----------
*exprs
    Column(s) to select, specified as positional arguments.
    Accepts expression input. Strings are parsed as column names,
    other non-expression inputs are parsed as literals.
**named_exprs
    Additional columns to select, specified as keyword arguments.
    The columns will be renamed to the keyword used.

Examples
--------
Pass the name of a column to select that column.

>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.select("foo")
shape: (3, 1)
┌─────┐
│ foo │
│ --- │
│ i64 │
╞═════╡
│ 1   │
│ 2   │
│ 3   │
└─────┘

Multiple columns can be selected by passing a list of column names.

>>> df.select(["foo", "bar"])
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 6   │
│ 2   ┆ 7   │
│ 3   ┆ 8   │
└─────┴─────┘

Multiple columns can also be selected using positional arguments instead of a
list. Expressions are also accepted.

>>> df.select(pl.col("foo"), pl.col("bar") + 1)
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 7   │
│ 2   ┆ 8   │
│ 3   ┆ 9   │
└─────┴─────┘

Use keyword arguments to easily name your expression inputs.

>>> df.select(threshold=pl.when(pl.col("foo") > 2).then(10).otherwise(0))
shape: (3, 1)
┌───────────┐
│ threshold │
│ ---       │
│ i32       │
╞═══════════╡
│ 0         │
│ 0         │
│ 10        │
└───────────┘
r   r  r  )r  r  r  rJ  r  r  r   exprsnamed_exprsr  s       r   rJ  DataFrame.select&  sG    d 	= IIKV+)+W=#7#7#9W:	
r   c                    SSK Jn  U R                  5       R                  " U0 UD6R	                  UR                  5       S9$ )a  
Select columns from this DataFrame.

This will run all expression sequentially instead of in parallel.
Use this when the work per expression is cheap.

Parameters
----------
*exprs
    Column(s) to select, specified as positional arguments.
    Accepts expression input. Strings are parsed as column names,
    other non-expression inputs are parsed as literals.
**named_exprs
    Additional columns to select, specified as keyword arguments.
    The columns will be renamed to the keyword used.

See Also
--------
select
r   r  r  )r  r  r  
select_seqr  r  r9  s       r   r>  DataFrame.select_seq	'  sF    . 	= IIKZ/"-/W=#7#7#9W:	
r   c                    SSK Jn  U R                  5       R                  " U0 UD6R	                  UR                  5       S9$ )u?  
Add columns to this DataFrame.

Added columns will replace existing columns with the same name.

Parameters
----------
*exprs
    Column(s) to add, specified as positional arguments.
    Accepts expression input. Strings are parsed as column names, other
    non-expression inputs are parsed as literals.
**named_exprs
    Additional columns to add, specified as keyword arguments.
    The columns will be renamed to the keyword used.

Returns
-------
DataFrame
    A new DataFrame with the columns added.

Notes
-----
Creating a new DataFrame using this method does not create a new copy of
existing data.

Examples
--------
Pass an expression to add it as a new column.

>>> df = pl.DataFrame(
...     {
...         "a": [1, 2, 3, 4],
...         "b": [0.5, 4, 10, 13],
...         "c": [True, True, False, True],
...     }
... )
>>> df.with_columns((pl.col("a") ** 2).alias("a^2"))
shape: (4, 4)
┌─────┬──────┬───────┬─────┐
│ a   ┆ b    ┆ c     ┆ a^2 │
│ --- ┆ ---  ┆ ---   ┆ --- │
│ i64 ┆ f64  ┆ bool  ┆ i64 │
╞═════╪══════╪═══════╪═════╡
│ 1   ┆ 0.5  ┆ true  ┆ 1   │
│ 2   ┆ 4.0  ┆ true  ┆ 4   │
│ 3   ┆ 10.0 ┆ false ┆ 9   │
│ 4   ┆ 13.0 ┆ true  ┆ 16  │
└─────┴──────┴───────┴─────┘

Added columns will replace existing columns with the same name.

>>> df.with_columns(pl.col("a").cast(pl.Float64))
shape: (4, 3)
┌─────┬──────┬───────┐
│ a   ┆ b    ┆ c     │
│ --- ┆ ---  ┆ ---   │
│ f64 ┆ f64  ┆ bool  │
╞═════╪══════╪═══════╡
│ 1.0 ┆ 0.5  ┆ true  │
│ 2.0 ┆ 4.0  ┆ true  │
│ 3.0 ┆ 10.0 ┆ false │
│ 4.0 ┆ 13.0 ┆ true  │
└─────┴──────┴───────┘

Multiple columns can be added using positional arguments.

>>> df.with_columns(
...     (pl.col("a") ** 2).alias("a^2"),
...     (pl.col("b") / 2).alias("b/2"),
...     (pl.col("c").not_()).alias("not c"),
... )
shape: (4, 6)
┌─────┬──────┬───────┬─────┬──────┬───────┐
│ a   ┆ b    ┆ c     ┆ a^2 ┆ b/2  ┆ not c │
│ --- ┆ ---  ┆ ---   ┆ --- ┆ ---  ┆ ---   │
│ i64 ┆ f64  ┆ bool  ┆ i64 ┆ f64  ┆ bool  │
╞═════╪══════╪═══════╪═════╪══════╪═══════╡
│ 1   ┆ 0.5  ┆ true  ┆ 1   ┆ 0.25 ┆ false │
│ 2   ┆ 4.0  ┆ true  ┆ 4   ┆ 2.0  ┆ false │
│ 3   ┆ 10.0 ┆ false ┆ 9   ┆ 5.0  ┆ true  │
│ 4   ┆ 13.0 ┆ true  ┆ 16  ┆ 6.5  ┆ false │
└─────┴──────┴───────┴─────┴──────┴───────┘

Multiple columns can also be added by passing a list of expressions.

>>> df.with_columns(
...     [
...         (pl.col("a") ** 2).alias("a^2"),
...         (pl.col("b") / 2).alias("b/2"),
...         (pl.col("c").not_()).alias("not c"),
...     ]
... )
shape: (4, 6)
┌─────┬──────┬───────┬─────┬──────┬───────┐
│ a   ┆ b    ┆ c     ┆ a^2 ┆ b/2  ┆ not c │
│ --- ┆ ---  ┆ ---   ┆ --- ┆ ---  ┆ ---   │
│ i64 ┆ f64  ┆ bool  ┆ i64 ┆ f64  ┆ bool  │
╞═════╪══════╪═══════╪═════╪══════╪═══════╡
│ 1   ┆ 0.5  ┆ true  ┆ 1   ┆ 0.25 ┆ false │
│ 2   ┆ 4.0  ┆ true  ┆ 4   ┆ 2.0  ┆ false │
│ 3   ┆ 10.0 ┆ false ┆ 9   ┆ 5.0  ┆ true  │
│ 4   ┆ 13.0 ┆ true  ┆ 16  ┆ 6.5  ┆ false │
└─────┴──────┴───────┴─────┴──────┴───────┘

Use keyword arguments to easily name your expression inputs.

>>> df.with_columns(
...     ab=pl.col("a") * pl.col("b"),
...     not_c=pl.col("c").not_(),
... )
shape: (4, 5)
┌─────┬──────┬───────┬──────┬───────┐
│ a   ┆ b    ┆ c     ┆ ab   ┆ not_c │
│ --- ┆ ---  ┆ ---   ┆ ---  ┆ ---   │
│ i64 ┆ f64  ┆ bool  ┆ f64  ┆ bool  │
╞═════╪══════╪═══════╪══════╪═══════╡
│ 1   ┆ 0.5  ┆ true  ┆ 0.5  ┆ false │
│ 2   ┆ 4.0  ┆ true  ┆ 8.0  ┆ false │
│ 3   ┆ 10.0 ┆ false ┆ 30.0 ┆ true  │
│ 4   ┆ 13.0 ┆ true  ┆ 52.0 ┆ false │
└─────┴──────┴───────┴──────┴───────┘
r   r  r  )r  r  r  r]  r  r  r9  s       r   r]  DataFrame.with_columns('  sG    ~ 	= IIK\ 1$/1W=#7#7#9W:	
r   c                    SSK Jn  U R                  5       R                  " U0 UD6R	                  UR                  5       S9$ )a  
Add columns to this DataFrame.

Added columns will replace existing columns with the same name.

This will run all expression sequentially instead of in parallel.
Use this when the work per expression is cheap.

Parameters
----------
*exprs
    Column(s) to add, specified as positional arguments.
    Accepts expression input. Strings are parsed as column names, other
    non-expression inputs are parsed as literals.
**named_exprs
    Additional columns to add, specified as keyword arguments.
    The columns will be renamed to the keyword used.

Returns
-------
DataFrame
    A new DataFrame with the columns added.

See Also
--------
with_columns
r   r  r  )r  r  r  with_columns_seqr  r  r9  s       r   rC  DataFrame.with_columns_seq'  sH    @ 	= IIK$5(35W=#7#7#9W:	
r   c                    g r   r  r   r  s     r   n_chunksDataFrame.n_chunks'  s    ADr   c                    g r   r  rF  s     r   rG  rH  '  s    ?Br   c                    US:X  a  U R                   R                  5       $ US:X  a.  U R                  5        Vs/ s H  o"R                  5       PM     sn$ SU< S3n[        U5      es  snf )a  
Get number of chunks used by the ChunkedArrays of this DataFrame.

Parameters
----------
strategy : {'first', 'all'}
    Return the number of chunks of the 'first' column,
    or 'all' columns in this DataFrame.


Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [1, 2, 3, 4],
...         "b": [0.5, 4, 10, 13],
...         "c": [True, True, False, True],
...     }
... )
>>> df.n_chunks()
1
>>> df.n_chunks(strategy="all")
[1, 1, 1]
r  rL  z!unexpected input for `strategy`: z 

Choose one of {'first', 'all'})r   rG  r  r   )r   r  rd  r   s       r   rG  rH  '  sm    2 w88$$&&*.--/:/QJJL/:: 4H<68  S/! ;s   A'c                    SSK Jn  U R                  5       R                  5       R	                  UR                  5       S9$ )u  
Aggregate the columns of this DataFrame to their maximum value.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.max()
shape: (1, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 3   ┆ 8   ┆ c   │
└─────┴─────┴─────┘
r   r  r  )r  r  r  r+  r  r  r   r  s     r   r+  DataFrame.max(  1    . 	=yy{ ((}7K7K7M(NNr   c                    U R                  [        R                  " [        R                  " 5       5      S9R	                  5       $ )aN  
Get the maximum value horizontally across columns.

Returns
-------
Series
    A Series named `"max"`.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [4.0, 5.0, 6.0],
...     }
... )
>>> df.max_horizontal()
shape: (3,)
Series: 'max' [f64]
[
        4.0
        5.0
        6.0
]
)r+  )rJ  rK  max_horizontalrL  r  r  s    r   rP  DataFrame.max_horizontal(  /    4 {{q//8{9CCEEr   c                    SSK Jn  U R                  5       R                  5       R	                  UR                  5       S9$ )u  
Aggregate the columns of this DataFrame to their minimum value.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.min()
shape: (1, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ 6   ┆ a   │
└─────┴─────┴─────┘
r   r  r  )r  r  r  r*  r  r  rL  s     r   r*  DataFrame.min8(  rN  r   c                    U R                  [        R                  " [        R                  " 5       5      S9R	                  5       $ )aN  
Get the minimum value horizontally across columns.

Returns
-------
Series
    A Series named `"min"`.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [4.0, 5.0, 6.0],
...     }
... )
>>> df.min_horizontal()
shape: (3,)
Series: 'min' [f64]
[
        1.0
        2.0
        3.0
]
)r*  )rJ  rK  min_horizontalrL  r  r  s    r   rV  DataFrame.min_horizontalS(  rR  r   c                    SSK Jn  U R                  5       R                  5       R	                  UR                  5       S9$ )u'  
Aggregate the columns of this DataFrame to their sum value.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.sum()
shape: (1, 3)
┌─────┬─────┬──────┐
│ foo ┆ bar ┆ ham  │
│ --- ┆ --- ┆ ---  │
│ i64 ┆ i64 ┆ str  │
╞═════╪═════╪══════╡
│ 6   ┆ 21  ┆ null │
└─────┴─────┴──────┘
r   r  r  )r  r  r  r  r  r  rL  s     r   r  DataFrame.sumo(  rN  r   ignore_nullsc                   U R                  [        R                  " [        R                  " 5       US9S9R	                  5       $ )a  
Sum all values horizontally across columns.

Parameters
----------
ignore_nulls
    Ignore null values (default).
    If set to `False`, any null value in the input will lead to a null output.

Returns
-------
Series
    A Series named `"sum"`.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [4.0, 5.0, 6.0],
...     }
... )
>>> df.sum_horizontal()
shape: (3,)
Series: 'sum' [f64]
[
        5.0
        7.0
        9.0
]
rZ  )r  )rJ  rK  sum_horizontalrL  r  r   r[  s     r   r]  DataFrame.sum_horizontal(  s8    @ {{  |D  

)+	r   c                    SSK Jn  U R                  5       R                  5       R	                  UR                  5       S9$ )u  
Aggregate the columns of this DataFrame to their mean value.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...         "spam": [True, False, None],
...     }
... )
>>> df.mean()
shape: (1, 4)
┌─────┬─────┬──────┬──────┐
│ foo ┆ bar ┆ ham  ┆ spam │
│ --- ┆ --- ┆ ---  ┆ ---  │
│ f64 ┆ f64 ┆ str  ┆ f64  │
╞═════╪═════╪══════╪══════╡
│ 2.0 ┆ 7.0 ┆ null ┆ 0.5  │
└─────┴─────┴──────┴──────┘
r   r  r  )r  r  r  r  r  r  rL  s     r   r  DataFrame.mean(  s3    0 	=yy{!))8L8L8N)OOr   c                   U R                  [        R                  " [        R                  " 5       US9S9R	                  5       $ )a  
Take the mean of all values horizontally across columns.

Parameters
----------
ignore_nulls
    Ignore null values (default).
    If set to `False`, any null value in the input will lead to a null output.

Returns
-------
Series
    A Series named `"mean"`.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [4.0, 5.0, 6.0],
...     }
... )
>>> df.mean_horizontal()
shape: (3,)
Series: 'mean' [f64]
[
        2.5
        3.5
        4.5
]
rZ  )r  )rJ  rK  mean_horizontalrL  r  r^  s     r   rc  DataFrame.mean_horizontal(  s8    @ {{""1557F  

)+	r   c                    SSK Jn  U R                  5       R                  U5      R	                  UR                  5       S9$ )u  
Aggregate the columns of this DataFrame to their standard deviation value.

Parameters
----------
ddof
    “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
    where N represents the number of elements.
    By default ddof is 1.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.std()
shape: (1, 3)
┌─────┬─────┬──────┐
│ foo ┆ bar ┆ ham  │
│ --- ┆ --- ┆ ---  │
│ f64 ┆ f64 ┆ str  │
╞═════╪═════╪══════╡
│ 1.0 ┆ 1.0 ┆ null │
└─────┴─────┴──────┘
>>> df.std(ddof=0)
shape: (1, 3)
┌──────────┬──────────┬──────┐
│ foo      ┆ bar      ┆ ham  │
│ ---      ┆ ---      ┆ ---  │
│ f64      ┆ f64      ┆ str  │
╞══════════╪══════════╪══════╡
│ 0.816497 ┆ 0.816497 ┆ null │
└──────────┴──────────┴──────┘
r   r  r  )r  r  r  stdr  r  r   ddofr  s      r   rf  DataFrame.std(  4    N 	=yy{t$,,=;O;O;Q,RRr   c                    SSK Jn  U R                  5       R                  U5      R	                  UR                  5       S9$ )u  
Aggregate the columns of this DataFrame to their variance value.

Parameters
----------
ddof
    “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
    where N represents the number of elements.
    By default ddof is 1.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.var()
shape: (1, 3)
┌─────┬─────┬──────┐
│ foo ┆ bar ┆ ham  │
│ --- ┆ --- ┆ ---  │
│ f64 ┆ f64 ┆ str  │
╞═════╪═════╪══════╡
│ 1.0 ┆ 1.0 ┆ null │
└─────┴─────┴──────┘
>>> df.var(ddof=0)
shape: (1, 3)
┌──────────┬──────────┬──────┐
│ foo      ┆ bar      ┆ ham  │
│ ---      ┆ ---      ┆ ---  │
│ f64      ┆ f64      ┆ str  │
╞══════════╪══════════╪══════╡
│ 0.666667 ┆ 0.666667 ┆ null │
└──────────┴──────────┴──────┘
r   r  r  )r  r  r  varr  r  rg  s      r   rl  DataFrame.var)  rj  r   c                    SSK Jn  U R                  5       R                  5       R	                  UR                  5       S9$ )u-  
Aggregate the columns of this DataFrame to their median value.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.median()
shape: (1, 3)
┌─────┬─────┬──────┐
│ foo ┆ bar ┆ ham  │
│ --- ┆ --- ┆ ---  │
│ f64 ┆ f64 ┆ str  │
╞═════╪═════╪══════╡
│ 2.0 ┆ 7.0 ┆ null │
└─────┴─────┴──────┘
r   r  r  )r  r  r  r  r  r  rL  s     r   r  DataFrame.medianD)  s3    . 	=yy{!!#++-:N:N:P+QQr   c                   / nU R                   R                  5        H  u  p#UR                  5       (       d  [        U[        5      (       a5  UR                  [        R                  " U5      R                  5       5        Md  UR                  [        R                  " S5      R                  U5      5        M     U R                  U5      $ )u2  
Aggregate the columns of this DataFrame to their product values.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [1, 2, 3],
...         "b": [0.5, 4, 10],
...         "c": [True, True, False],
...     }
... )

>>> df.product()
shape: (1, 3)
┌─────┬──────┬─────┐
│ a   ┆ b    ┆ c   │
│ --- ┆ ---  ┆ --- │
│ i64 ┆ f64  ┆ i64 │
╞═════╪══════╪═════╡
│ 6   ┆ 20.0 ┆ 0   │
└─────┴──────┴─────┘
N)r   ra  
is_numericr   r@   r  rK  ra   productrb   rZ  rJ  )r   r:  r  r  s       r   rr  DataFrame.product_)  s    0 ))+HD}}*R"9"9QUU4[0023QUU4[..t45	 , {{5!!r   c                    SSK Jn  U R                  5       R                  X5      R	                  UR                  5       S9$ )u  
Aggregate the columns of this DataFrame to their quantile value.

Parameters
----------
quantile
    Quantile between 0.0 and 1.0.
interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable'}
    Interpolation method.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.quantile(0.5, "nearest")
shape: (1, 3)
┌─────┬─────┬──────┐
│ foo ┆ bar ┆ ham  │
│ --- ┆ --- ┆ ---  │
│ f64 ┆ f64 ┆ str  │
╞═════╪═════╪══════╡
│ 2.0 ┆ 7.0 ┆ null │
└─────┴─────┴──────┘
r   r  r  )r  r  r  quantiler  r  )r   ru  r0  r  s       r   ru  DataFrame.quantile)  s7    @ 	= IIKXh.W=#7#7#9W:	
r   )r  
drop_firstru  c               t    Ub  [        X5      nU R                  U R                  R                  XX45      5      $ )u  
Convert categorical variables into dummy/indicator variables.

Parameters
----------
columns
    Column name(s) or selector(s) that should be converted to dummy
    variables. If set to `None` (default), convert all columns.
separator
    Separator/delimiter used when generating column names.
drop_first
    Remove the first category from the variables being encoded.
drop_nulls
    If there are `None` values in the series, a `null` column is not generated

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2],
...         "bar": [3, 4],
...         "ham": ["a", "b"],
...     }
... )
>>> df.to_dummies()
shape: (2, 6)
┌───────┬───────┬───────┬───────┬───────┬───────┐
│ foo_1 ┆ foo_2 ┆ bar_3 ┆ bar_4 ┆ ham_a ┆ ham_b │
│ ---   ┆ ---   ┆ ---   ┆ ---   ┆ ---   ┆ ---   │
│ u8    ┆ u8    ┆ u8    ┆ u8    ┆ u8    ┆ u8    │
╞═══════╪═══════╪═══════╪═══════╪═══════╪═══════╡
│ 1     ┆ 0     ┆ 1     ┆ 0     ┆ 1     ┆ 0     │
│ 0     ┆ 1     ┆ 0     ┆ 1     ┆ 0     ┆ 1     │
└───────┴───────┴───────┴───────┴───────┴───────┘

>>> df.to_dummies(drop_first=True)
shape: (2, 3)
┌───────┬───────┬───────┐
│ foo_2 ┆ bar_4 ┆ ham_b │
│ ---   ┆ ---   ┆ ---   │
│ u8    ┆ u8    ┆ u8    │
╞═══════╪═══════╪═══════╡
│ 0     ┆ 0     ┆ 0     │
│ 1     ┆ 1     ┆ 1     │
└───────┴───────┴───────┘

>>> import polars.selectors as cs
>>> df.to_dummies(cs.integer(), separator=":")
shape: (2, 5)
┌───────┬───────┬───────┬───────┬─────┐
│ foo:1 ┆ foo:2 ┆ bar:3 ┆ bar:4 ┆ ham │
│ ---   ┆ ---   ┆ ---   ┆ ---   ┆ --- │
│ u8    ┆ u8    ┆ u8    ┆ u8    ┆ str │
╞═══════╪═══════╪═══════╪═══════╪═════╡
│ 1     ┆ 0     ┆ 1     ┆ 0     ┆ a   │
│ 0     ┆ 1     ┆ 0     ┆ 1     ┆ b   │
└───────┴───────┴───────┴───────┴─────┘

>>> df.to_dummies(cs.integer(), drop_first=True, separator=":")
shape: (2, 3)
┌───────┬───────┬─────┐
│ foo:2 ┆ bar:4 ┆ ham │
│ ---   ┆ ---   ┆ --- │
│ u8    ┆ u8    ┆ str │
╞═══════╪═══════╪═════╡
│ 0     ┆ 0     ┆ a   │
│ 1     ┆ 1     ┆ b   │
└───────┴───────┴─────┘
)rf   r   r   
to_dummies)r   r  r  rw  ru  s        r   ry  DataFrame.to_dummies)  s:    Z '6GHHJK
 	
r   any)r  r=  c                   SSK Jn  U R                  5       R                  XUS9R	                  UR                  5       S9$ )u  
Drop duplicate rows from this dataframe.

Parameters
----------
subset
    Column name(s) or selector(s), to consider when identifying
    duplicate rows. If set to `None` (default), use all columns.
keep : {'first', 'last', 'any', 'none'}
    Which of the duplicate rows to keep.

    * 'any': Does not give any guarantee of which row is kept.
             This allows more optimizations.
    * 'none': Don't keep duplicate rows.
    * 'first': Keep first unique row.
    * 'last': Keep last unique row.
maintain_order
    Keep the same order as the original DataFrame. This is more expensive to
    compute.
    Settings this to `True` blocks the possibility
    to run on the streaming engine.

Returns
-------
DataFrame
    DataFrame with unique rows.

Warnings
--------
This method will fail if there is a column of type `List` in the DataFrame or
subset.

Notes
-----
If you're coming from pandas, this is similar to
`pandas.DataFrame.drop_duplicates`.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3, 1],
...         "bar": ["a", "a", "a", "a"],
...         "ham": ["b", "b", "b", "b"],
...     }
... )
>>> df.unique(maintain_order=True)
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ a   ┆ b   │
│ 2   ┆ a   ┆ b   │
│ 3   ┆ a   ┆ b   │
└─────┴─────┴─────┘
>>> df.unique(subset=["bar", "ham"], maintain_order=True)
shape: (1, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ str │
╞═════╪═════╪═════╡
│ 1   ┆ a   ┆ b   │
└─────┴─────┴─────┘
>>> df.unique(keep="last", maintain_order=True)
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ str │
╞═════╪═════╪═════╡
│ 2   ┆ a   ┆ b   │
│ 3   ┆ a   ┆ b   │
│ 1   ┆ a   ┆ b   │
└─────┴─────┴─────┘
r   r  )rr  r  r=  r  )r  r  r  r'  r  r  )r   rr  r  r=  r  s        r   r'  DataFrame.unique)  s;    j 	= IIKV6^VLW=#7#7#9W:	
r   c                h   [        U[        5      (       a  [        R                  " U5      nO[        U[        R
                  5      (       a  UnOl[        U[        5      (       a'  [        U5      S:X  a  [        [        US   5      5      nO0Uc  [        R                  " 5       OUn[        R                  " U5      nSSKJn  U R                  5       R                  UR!                  5       5      R#                  UR%                  5       S9nUR'                  5       (       a  S$ UR)                  S5      S   $ )a2  
Return the number of unique rows, or the number of unique row-subsets.

Parameters
----------
subset
    One or more columns/expressions that define what to count;
    omit to return the count of unique rows.

Notes
-----
This method operates at the `DataFrame` level; to operate on subsets at the
expression level you can make use of struct-packing instead, for example:

>>> expr_unique_subset = pl.struct("a", "b").n_unique()

If instead you want to count the number of unique values per-column, you can
also use expression-level syntax to return a new frame containing that result:

>>> df = pl.DataFrame(
...     [[1, 2, 3], [1, 2, 4]], schema=["a", "b", "c"], orient="row"
... )
>>> df_nunique = df.select(pl.all().n_unique())

In aggregate context there is also an equivalent method for returning the
unique values per-group:

>>> df_agg_nunique = df.group_by("a").n_unique()

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [1, 1, 2, 3, 4, 5],
...         "b": [0.5, 0.5, 1.0, 2.0, 3.0, 3.0],
...         "c": [True, True, True, False, True, True],
...     }
... )
>>> df.n_unique()
5

Simple columns subset.

>>> df.n_unique(subset=["b", "c"])
4

Expression subset.

>>> df.n_unique(
...     subset=[
...         (pl.col("a") // 2),
...         (pl.col("c") | (pl.col("b") >= 2)),
...     ],
... )
3
r  r   r  r  )r   r   rK  ra   r   rs   r   r  r7   r)   rL  r6  r  r  r  rJ  n_uniquer  r  r5  r  )r   rr  rR  struct_fieldsr  r   s         r   r  DataFrame.n_uniqueX*  s    r fc""55=D((D))c&kQ.>26!9=>D(.AEEGVM88M*D< IIKVDMMO$W=#7#7#9W: 	
 KKMMq3rvvay|3r   z\`DataFrame.approx_n_unique` is deprecated; use `select(pl.all().approx_n_unique())` instead.c                    SSK Jn  U R                  5       R                  5       R	                  UR                  5       S9$ )uN  
Approximate count of unique values.

.. deprecated:: 0.20.11
    Use the `select(pl.all().approx_n_unique())` method instead.

This is done using the HyperLogLog++ algorithm for cardinality estimation.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [1, 2, 3, 4],
...         "b": [1, 2, 1, 1],
...     }
... )
>>> df.approx_n_unique()  # doctest: +SKIP
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 4   ┆ 2   │
└─────┴─────┘
r   r  r  )r  r  r  approx_n_uniquer  r  rL  s     r   r  DataFrame.approx_n_unique*  s7    > 	= IIK'')11@T@T@V1W	
r   c                T    U R                  U R                  R                  5       5      $ )z
Rechunk the data in this DataFrame to a contiguous allocation.

This will make sure all subsequent operations have optimal and predictable
performance.
)r   r   r   r  s    r   r   DataFrame.rechunk*  s      txx//122r   c                T    U R                  U R                  R                  5       5      $ )u)  
Create a new DataFrame that shows the null counts per column.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, None, 3],
...         "bar": [6, 7, None],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.null_count()
shape: (1, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ u32 ┆ u32 ┆ u32 │
╞═════╪═════╪═════╡
│ 1   ┆ 1   ┆ 0   │
└─────┴─────┴─────┘
)r   r   
null_countr  s    r   r  DataFrame.null_count*  s     . txx22455r   )fractionwith_replacementshuffleseedc               &   Ub  Ub  Sn[        U5      eUc  [        R                  " SS5      nUcp  Ubm  [        U[        R
                  5      (       d  [        R
                  " SU/5      nU R                  U R                  R                  UR                  X4U5      5      $ Uc  Sn[        U[        R
                  5      (       d  [        R
                  " SU/5      nU R                  U R                  R                  UR                  X4U5      5      $ )u}  
Sample from this DataFrame.

Parameters
----------
n
    Number of items to return. Cannot be used with `fraction`. Defaults to 1 if
    `fraction` is None.
fraction
    Fraction of items to return. Cannot be used with `n`.
with_replacement
    Allow values to be sampled more than once.
shuffle
    If set to True, the order of the sampled rows will be shuffled. If
    set to False (default), the order of the returned rows will be
    neither stable nor fully random.
seed
    Seed for the random number generator. If set to None (default), a
    random seed is generated for each sample operation.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.sample(n=2, seed=0)  # doctest: +IGNORE_RESULT
shape: (2, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 3   ┆ 8   ┆ c   │
│ 2   ┆ 7   ┆ b   │
└─────┴─────┴─────┘
z&cannot specify both `n` and `fraction`r   '  fracr  r  )r   randomrandintr   r   ru   r   r   sample_fracr   sample_n)r   rQ  r  r  r  r  r   s          r   sampleDataFrame.sample*  s    b =X1:CS/!<>>!U+D9-h		2299VhZ8??$$X[[2BTR  9A!RYY''		"qc"Atxx007GRVWXXr   c                    U R                  S5      n[        SU R                  5       H  nU" X R                  U5      5      nM     U$ )a3  
Apply a horizontal reduction on a DataFrame.

This can be used to effectively determine aggregations on a row level, and can
be applied to any DataType that can be supercast (cast to a similar parent
type).

An example of the supercast rules when applying an arithmetic operation on two
DataTypes are for instance:

- Int8 + String = String
- Float32 + Int64 = Float32
- Float32 + Float64 = Float64

Examples
--------
A horizontal sum operation:

>>> df = pl.DataFrame(
...     {
...         "a": [2, 1, 3],
...         "b": [1, 2, 3],
...         "c": [1.0, 2.0, 3.0],
...     }
... )
>>> df.fold(lambda s1, s2: s1 + s2)
shape: (3,)
Series: 'a' [f64]
[
    4.0
    5.0
    9.0
]

A horizontal minimum operation:

>>> df = pl.DataFrame({"a": [2, 1, 3], "b": [1, 2, 3], "c": [1.0, 2.0, 3.0]})
>>> df.fold(lambda s1, s2: s1.zip_with(s1 < s2, s2))
shape: (3,)
Series: 'a' [f64]
[
    1.0
    1.0
    3.0
]

A horizontal string concatenation:

>>> df = pl.DataFrame(
...     {
...         "a": ["foo", "bar", None],
...         "b": [1, 2, 3],
...         "c": [1.0, 2.0, 3.0],
...     }
... )
>>> df.fold(lambda s1, s2: s1 + s2)
shape: (3,)
Series: 'a' [str]
[
    "foo11.0"
    "bar22.0"
    null
]

A horizontal boolean or, similar to a row-wise .any():

>>> df = pl.DataFrame(
...     {
...         "a": [False, False, True],
...         "b": [False, True, False],
...     }
... )
>>> df.fold(lambda s1, s2: s1 | s2)
shape: (3,)
Series: 'a' [bool]
[
        false
        true
        true
]

Parameters
----------
operation
    function that takes two `Series` and returns a `Series`.
r   r  )r  rY  r   )r   	operationaccre  s       r   foldDataFrame.fold3+  s@    n nnQq$**%AC!23C &
r   )by_predicater,  c                   g r   r  r   r  r  r,  s       r   r  DataFrame.row+  s     r   )r  c                   g r   r  r  s       r   r  r  +  s     r   c                  Ub  Ub  Sn[        U5      e[        U[        R                  5      (       a  Sn[	        U5      eUbC  U R
                  R                  U5      nU(       a  [        [        U R                  U5      5      $ U$ Ub  [        U[        R                  5      (       d  S[        U5      < 3n[	        U5      eU R                  U5      R                  5       n[        U5      nUS:  a  SU< SU S3n[        U5      eUS:X  a  SU< S	3n[        U5      eUS   nU(       a  [        [        U R                  U5      5      $ U$ S
n[        U5      e)a  
Get the values of a single row, either by index or by predicate.

Parameters
----------
index
    Row index.
by_predicate
    Select the row according to a given expression/predicate.
named
    Return a dictionary instead of a tuple. The dictionary is a mapping of
    column name to row value. This is more expensive than returning a regular
    tuple, but allows for accessing values by column name.

Returns
-------
tuple (default) or dictionary of row values

Notes
-----
The `index` and `by_predicate` params are mutually exclusive. Additionally,
to ensure clarity, the `by_predicate` parameter must be supplied by keyword.

When using `by_predicate` it is an error condition if anything other than
one row is returned; more than one row raises `TooManyRowsReturnedError`, and
zero rows will raise `NoRowsReturnedError` (both inherit from `RowsError`).

Warnings
--------
You should NEVER use this method to iterate over a DataFrame; if you require
row-iteration you should strongly prefer use of `iter_rows()` instead.

See Also
--------
iter_rows : Row iterator over frame data (does not materialise all rows).
rows : Materialise all frame data as a list of rows (potentially expensive).
item: Return dataframe element as a scalar.

Examples
--------
Specify an index to return the row at the given index as a tuple.

>>> df = pl.DataFrame(
...     {
...         "foo": [1, 2, 3],
...         "bar": [6, 7, 8],
...         "ham": ["a", "b", "c"],
...     }
... )
>>> df.row(2)
(3, 8, 'c')

Specify `named=True` to get a dictionary instead with a mapping of column
names to row values.

>>> df.row(2, named=True)
{'foo': 3, 'bar': 8, 'ham': 'c'}

Use `by_predicate` to return the row that matches the given predicate.

>>> df.row(by_predicate=(pl.col("ham") == "b"))
(2, 7, 'b')
z>cannot set both 'index' and 'by_predicate'; mutually exclusivez<expressions should be passed to the `by_predicate` parameterz1expected `by_predicate` to be an expression, got r  zpredicate <z> returned z rowsr   z> returned no rowsz,one of `index` or `by_predicate` must be set)r   r   r   rs   r   r   	row_tupler   r#  r  r3   r  r-  r  r`   r_   )r   r  r  r,  r   r  r-  r  s           r   r  r  +  sK   L !9RCS/!rww''PCC. (($$U+CCc233
%lBGG44IJ]^jJkInon$;;|,113DYFz#L#3;vheL.s331#L#33EF)#..q'CCc233
@CS/!r   r+  c                   g r   r  r   r,  s     r   r-  DataFrame.rows,  r&  r   c                   g r   r  r  s     r   r-  r  ,  s    EHr   c          	         U(       aM  [         [        U R                  pCnU R                  R	                  5        Vs/ s H  oR" U" XE5      5      PM     sn$ U R                  R	                  5       $ s  snf )u  
Returns all data in the DataFrame as a list of rows of python-native values.

By default, each row is returned as a tuple of values given in the same order
as the frame columns. Setting `named=True` will return rows of dictionaries
instead.

Parameters
----------
named
    Return dictionaries instead of tuples. The dictionaries are a mapping of
    column name to row value. This is more expensive than returning a regular
    tuple, but allows for accessing values by column name.

Notes
-----
If you have `ns`-precision temporal values you should be aware that Python
natively only supports up to `μs`-precision; `ns`-precision values will be
truncated to microseconds on conversion to Python. If this matters to your
use-case you should export to a different format (such as Arrow or NumPy).

Warnings
--------
Row-iteration is not optimal as the underlying data is stored in columnar form;
where possible, prefer export via one of the dedicated export/output methods.
You should also consider using `iter_rows` instead, to avoid materialising all
the data at once; there is little performance difference between the two, but
peak memory can be reduced if processing rows in batches.

Returns
-------
list of row value tuples (default), or list of dictionaries (if `named=True`).

See Also
--------
iter_rows : Row iterator over frame data (does not materialise all rows).
rows_by_key : Materialises frame data as a key-indexed dictionary.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "x": ["a", "b", "b", "a"],
...         "y": [1, 2, 3, 4],
...         "z": [0, 3, 6, 9],
...     }
... )
>>> df.rows()
[('a', 1, 0), ('b', 2, 3), ('b', 3, 6), ('a', 4, 9)]
>>> df.rows(named=True)
[{'x': 'a', 'y': 1, 'z': 0},
 {'x': 'b', 'y': 2, 'z': 3},
 {'x': 'b', 'y': 3, 'z': 6},
 {'x': 'a', 'y': 4, 'z': 9}]
)r   r#  r  r   
row_tuples)r   r,  dict_zip_r  r  s         r   r-  r  ,  s^    t #'dllE9=9L9L9NO9N#E$w,-9NOO88&&(( Ps   A/)r,  r  r'  c                   g r   r  r   r  r,  r  r'  s        r   rows_by_keyDataFrame.rows_by_keyS,  s      #r   )r,  r  c                   g r   r  r  s        r   r  r  ],  s     r   )r  r'  c                   g r   r  r  s        r   r  r  g,  s     +.r   )r  c                   g r   r  r  s        r   r  r  q,  s     %(r   c                  [        X5      n[        U5      S:X  a  [        U R                  US   5      5      OU R	                  U5      R                  5       nU(       a  U nO4U R                   Vs/ s H  owU;  d  M
  UPM     nnU R	                  U5      n[        XVR                  US95      n	U(       a  [        U	5      n
U
$ [        [        5      n
U	 H  u  pX   R                  U5        M     U
$ s  snf )u  
Returns all data as a dictionary of python-native values keyed by some column.

This method is like `rows`, but instead of returning rows in a flat list, rows
are grouped by the values in the `key` column(s) and returned as a dictionary.

Note that this method should not be used in place of native operations, due to
the high cost of materializing all frame data out into a dictionary; it should
be used only when you need to move the values out into a Python data structure
or other object that cannot operate directly with Polars/Arrow.

Parameters
----------
key
    The column(s) to use as the key for the returned dictionary. If multiple
    columns are specified, the key will be a tuple of those values, otherwise
    it will be a string.
named
    Return dictionary rows instead of tuples, mapping column name to row value.
include_key
    Include key values inline with the associated data (by default the key
    values are omitted as a memory/performance optimisation, as they can be
    reoconstructed from the key).
unique
    Indicate that the key is unique; this will result in a 1:1 mapping from
    key to a single associated row. Note that if the key is *not* actually
    unique the last row with the given key will be returned.

Notes
-----
If you have `ns`-precision temporal values you should be aware that Python
natively only supports up to `μs`-precision; `ns`-precision values will be
truncated to microseconds on conversion to Python. If this matters to your
use-case you should export to a different format (such as Arrow or NumPy).

See Also
--------
rows : Materialize all frame data as a list of rows (potentially expensive).
iter_rows : Row iterator over frame data (does not materialize all rows).
to_dict : Convert DataFrame to a dictionary mapping column name to values.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "w": ["a", "b", "b", "a"],
...         "x": ["q", "q", "q", "k"],
...         "y": [1.0, 2.5, 3.0, 4.5],
...         "z": [9, 8, 7, 6],
...     }
... )

Group rows by the given key column(s):

>>> df.rows_by_key(key=["w"])
defaultdict(<class 'list'>,
    {'a': [('q', 1.0, 9), ('k', 4.5, 6)],
     'b': [('q', 2.5, 8), ('q', 3.0, 7)]})

Return the same row groupings as dictionaries:

>>> df.rows_by_key(key=["w"], named=True)
defaultdict(<class 'list'>,
    {'a': [{'x': 'q', 'y': 1.0, 'z': 9},
           {'x': 'k', 'y': 4.5, 'z': 6}],
     'b': [{'x': 'q', 'y': 2.5, 'z': 8},
           {'x': 'q', 'y': 3.0, 'z': 7}]})

Return row groupings, assuming keys are unique:

>>> df.rows_by_key(key=["z"], unique=True)
{9: ('a', 'q', 1.0),
 8: ('b', 'q', 2.5),
 7: ('b', 'q', 3.0),
 6: ('a', 'k', 4.5)}

Return row groupings as dictionaries, assuming keys are unique:

>>> df.rows_by_key(key=["z"], named=True, unique=True)
{9: {'w': 'a', 'x': 'q', 'y': 1.0},
 8: {'w': 'b', 'x': 'q', 'y': 2.5},
 7: {'w': 'b', 'x': 'q', 'y': 3.0},
 6: {'w': 'a', 'x': 'k', 'y': 4.5}}

Return dictionary rows grouped by a compound key, including key values:

>>> df.rows_by_key(key=["w", "x"], named=True, include_key=True)
defaultdict(<class 'list'>,
    {('a', 'q'): [{'w': 'a', 'x': 'q', 'y': 1.0, 'z': 9}],
     ('b', 'q'): [{'w': 'b', 'x': 'q', 'y': 2.5, 'z': 8},
                  {'w': 'b', 'x': 'q', 'y': 3.0, 'z': 7}],
     ('a', 'k'): [{'w': 'a', 'x': 'k', 'y': 4.5, 'z': 6}]})
r  r   r+  )rf   r  iterr  rJ  	iter_rowsr   r#  r   r   r   r  )r   r  r,  r  r'  keysr  rV  	data_colszippedr-  r   s               r   r  r  {,  s    J  * 3x1} Q()S!++- 	 F$(KK@KqC<KI@[[+FT++%+89 <D 	 t$D#		  & $  As   /	C0<C0)r,  buffer_sizec                   g r   r  r   r,  r  s      r   r  DataFrame.iter_rows,  s     %(r   )r  c                   g r   r  r  s      r   r  r   -  s     $'r   i   c          	   #  H  #    U R                   U R                  [        [        4u  p4pV[        U R
                  ;   nU(       a}  U(       dv  [        SU R                  U5       HZ  nU R                  X5      n	U(       a(  U	R                  SS9 H  n
U" U" X:5      5      v   M     MC  U	R                  SS9 Sh  vN   M\     gU(       a3  [        U R                  5       H  nU" U" X4" U5      5      5      v   M     g[        U R                  5       H  nU" U5      v   M     g Nj7f)u  
Returns an iterator over the DataFrame of rows of python-native values.

Parameters
----------
named
    Return dictionaries instead of tuples. The dictionaries are a mapping of
    column name to row value. This is more expensive than returning a regular
    tuple, but allows for accessing values by column name.
buffer_size
    Determines the number of rows that are buffered internally while iterating
    over the data; you should only modify this in very specific cases where the
    default value is determined not to be a good fit to your access pattern, as
    the speedup from using the buffer is significant (~2-4x). Setting this
    value to zero disables row buffering (not recommended).

Notes
-----
If you have `ns`-precision temporal values you should be aware that Python
natively only supports up to `μs`-precision; `ns`-precision values will be
truncated to microseconds on conversion to Python. If this matters to your
use-case you should export to a different format (such as Arrow or NumPy).

Warnings
--------
Row iteration is not optimal as the underlying data is stored in columnar form;
where possible, prefer export via one of the dedicated export/output methods
that deals with columnar data.

Returns
-------
iterator of tuples (default) or dictionaries (if named) of python row values

See Also
--------
rows : Materialises all frame data as a list of rows (potentially expensive).
rows_by_key : Materialises frame data as a key-indexed dictionary.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [1, 3, 5],
...         "b": [2, 4, 6],
...     }
... )
>>> [row[0] for row in df.iter_rows()]
[1, 3, 5]
>>> [row["b"] for row in df.iter_rows(named=True)]
[2, 4, 6]
r   Fr+  N)
r  r  r   r#  rF   r  rY  r  ra  r-  )r   r,  r  r  get_rowr  r  
has_objectrb  zerocopy_slicer  re  s               r   r  r  -  s     n )-dhhc(I%%t{{*
 z4;;<!%F!@-222?#D$677  @  .222??? = 4;;'D'!*566 ( 4;;'aj  ( @s   B3D"5D 6A+D"c              #  h   #    U R                   R                  5        H  n[        U5      v   M     g7f)uD  
Returns an iterator over the columns of this DataFrame.

Yields
------
Series

Notes
-----
Consider whether you can use :func:`all` instead.
If you can, it will be more efficient.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [1, 3, 5],
...         "b": [2, 4, 6],
...     }
... )
>>> [s.name for s in df.iter_columns()]
['a', 'b']

If you're using this to modify a dataframe's columns, e.g.

>>> # Do NOT do this
>>> pl.DataFrame(column * 2 for column in df.iter_columns())
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 2   ┆ 4   │
│ 6   ┆ 8   │
│ 10  ┆ 12  │
└─────┴─────┘

then consider whether you can use :func:`all` instead:

>>> df.select(pl.all() * 2)
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 2   ┆ 4   │
│ 6   ┆ 8   │
│ 10  ┆ 12  │
└─────┴─────┘
Nr  r  s     r   r  DataFrame.iter_columnsP-  s(     j %%'A)O (s   02c              #  n   #    [        SU R                  U5       H  nU R                  X!5      v   M     g7f)a  
Returns a non-copying iterator of slices over the underlying DataFrame.

Parameters
----------
n_rows
    Determines the number of rows contained in each DataFrame slice.

Examples
--------
>>> from datetime import date
>>> df = pl.DataFrame(
...     data={
...         "a": range(17_500),
...         "b": date(2023, 1, 1),
...         "c": "klmnoopqrstuvwxyz",
...     },
...     schema_overrides={"a": pl.Int32},
... )
>>> for idx, frame in enumerate(df.iter_slices()):
...     print(f"{type(frame).__name__}:[{idx}]:{len(frame)}")
DataFrame:[0]:10000
DataFrame:[1]:7500

Using `iter_slices` is an efficient way to chunk-iterate over DataFrames and
any supported frame export/conversion types; for example, as RecordBatches:

>>> for frame in df.iter_slices(n_rows=15_000):
...     record_batch = frame.to_arrow().to_batches()[0]
...     print(f"{record_batch.schema}\n<< {len(record_batch)}")
a: int32
b: date32[day]
c: large_string
<< 15000
a: int32
b: date32[day]
c: large_string
<< 2500

See Also
--------
iter_rows : Row iterator over frame data (does not materialise all rows).
partition_by : Split into multiple DataFrames, partitioned by groups.
r   N)rY  r  ra  )r   r  rb  s      r   iter_slicesDataFrame.iter_slices-  s.     Z At{{F3F**V,, 4s   35c                   U(       a  U R                   R                  5         U $ U R                  5       nUR                   R                  5         U$ )z\
Shrink DataFrame memory usage.

Shrinks to fit the exact capacity needed to hold the data.
)r   shrink_to_fitr  )r   r  r   s      r   r  DataFrame.shrink_to_fit-  s<     HH""$KBFF  "Ir   c                j    U R                  [        R                  " S5      R                  X5      5      $ )u  
Take every nth row in the DataFrame and return as a new DataFrame.

Parameters
----------
n
    Gather every *n*-th row.
offset
    Starting index.

Examples
--------
>>> s = pl.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
>>> s.gather_every(2)
shape: (2, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 5   │
│ 3   ┆ 7   │
└─────┴─────┘

>>> s.gather_every(2, offset=1)
shape: (2, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 2   ┆ 6   │
│ 4   ┆ 8   │
└─────┴─────┘
r  )rJ  rK  ra   gather_every)r   rQ  rb  s      r   r  DataFrame.gather_every-  s'    H {{155:221=>>r   c                z    UnUb  UOUnUb  UOUnUb  UOUn[        U R                  R                  XVXx5      5      $ )aT  
Hash and combine the rows in this DataFrame.

The hash value is of type `UInt64`.

Parameters
----------
seed
    Random seed parameter. Defaults to 0.
seed_1
    Random seed parameter. Defaults to `seed` if not set.
seed_2
    Random seed parameter. Defaults to `seed` if not set.
seed_3
    Random seed parameter. Defaults to `seed` if not set.

Notes
-----
This implementation of `hash_rows` does not guarantee stable results
across different Polars versions. Its stability is only guaranteed within a
single version.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, None, 3, 4],
...         "ham": ["a", "b", None, "d"],
...     }
... )
>>> df.hash_rows(seed=42)  # doctest: +IGNORE_RESULT
shape: (4,)
Series: '' [u64]
[
    10783150408545073287
    1438741209321515184
    10047419486152048166
    2047317070637311557
]
)r9   r   	hash_rows)	r   r  seed_1seed_2seed_3k0k1k2k3s	            r   r  DataFrame.hash_rows-  sJ    ^ )Vt)Vt)Vtdhh((899r   c                h    U R                  [        R                  " S5      R                  5       5      $ )u:  
Interpolate intermediate values. The interpolation method is linear.

Nulls at the beginning and end of the series remain null.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "foo": [1, None, 9, 10],
...         "bar": [6, 7, 9, None],
...         "baz": [1, None, None, 9],
...     }
... )
>>> df.interpolate()
shape: (4, 3)
┌──────┬──────┬──────────┐
│ foo  ┆ bar  ┆ baz      │
│ ---  ┆ ---  ┆ ---      │
│ f64  ┆ f64  ┆ f64      │
╞══════╪══════╪══════════╡
│ 1.0  ┆ 6.0  ┆ 1.0      │
│ 5.0  ┆ 7.0  ┆ 3.666667 │
│ 9.0  ┆ 9.0  ┆ 6.333333 │
│ 10.0 ┆ null ┆ 9.0      │
└──────┴──────┴──────────┘
r  )rJ  rK  ra   interpolater  s    r   r  DataFrame.interpolate!.  s$    8 {{155:11344r   c                6    U R                   R                  5       $ )z
Returns `True` if the DataFrame contains no rows.

Examples
--------
>>> df = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
>>> df.is_empty()
False
>>> df.filter(pl.col("foo") > 99).is_empty()
True
)r   r5  r  s    r   r5  DataFrame.is_empty?.  s     xx  ""r   c                L    [        U R                  R                  U/ 5      5      $ )a  
Convert a `DataFrame` to a `Series` of type `Struct`.

Parameters
----------
name
    Name for the struct Series

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "a": [1, 2, 3, 4, 5],
...         "b": ["one", "two", "three", "four", "five"],
...     }
... )
>>> df.to_struct("nums")
shape: (5,)
Series: 'nums' [struct[2]]
[
    {1,"one"}
    {2,"two"}
    {3,"three"}
    {4,"four"}
    {5,"five"}
]
)r9   r   	to_structr  s     r   r  DataFrame.to_structM.  s     8 dhh((r233r   c                    SSK Jn  U R                  5       R                  " U/UQ76 R	                  UR                  5       S9$ )u?  
Decompose struct columns into separate columns for each of their fields.

The new columns will be inserted into the dataframe at the location of the
struct column.

Parameters
----------
columns
    Name of the struct column(s) that should be unnested.
*more_columns
    Additional columns to unnest, specified as positional arguments.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "before": ["foo", "bar"],
...         "t_a": [1, 2],
...         "t_b": ["a", "b"],
...         "t_c": [True, None],
...         "t_d": [[1, 2], [3]],
...         "after": ["baz", "womp"],
...     }
... ).select("before", pl.struct(pl.col("^t_.$")).alias("t_struct"), "after")
>>> df
shape: (2, 3)
┌────────┬─────────────────────┬───────┐
│ before ┆ t_struct            ┆ after │
│ ---    ┆ ---                 ┆ ---   │
│ str    ┆ struct[4]           ┆ str   │
╞════════╪═════════════════════╪═══════╡
│ foo    ┆ {1,"a",true,[1, 2]} ┆ baz   │
│ bar    ┆ {2,"b",null,[3]}    ┆ womp  │
└────────┴─────────────────────┴───────┘
>>> df.unnest("t_struct")
shape: (2, 6)
┌────────┬─────┬─────┬──────┬───────────┬───────┐
│ before ┆ t_a ┆ t_b ┆ t_c  ┆ t_d       ┆ after │
│ ---    ┆ --- ┆ --- ┆ ---  ┆ ---       ┆ ---   │
│ str    ┆ i64 ┆ str ┆ bool ┆ list[i64] ┆ str   │
╞════════╪═════╪═════╪══════╪═══════════╪═══════╡
│ foo    ┆ 1   ┆ a   ┆ true ┆ [1, 2]    ┆ baz   │
│ bar    ┆ 2   ┆ b   ┆ null ┆ [3]       ┆ womp  │
└────────┴─────┴─────┴──────┴───────────┴───────┘
r   r  r  )r  r  r  r7  r  r  r  s       r   r7  DataFrame.unnestk.  sG    f 	= IIKV,*,W=#7#7#9W:	
r   c                    [         R                  " U R                  5       4SS0UD6nU R                  S:X  a  [         R                  " U/5      n[        X R                  S9$ )uY  
Return pairwise Pearson product-moment correlation coefficients between columns.

See numpy `corrcoef` for more information:
https://numpy.org/doc/stable/reference/generated/numpy.corrcoef.html

Notes
-----
This functionality requires numpy to be installed.

Parameters
----------
**kwargs
    Keyword arguments are passed to numpy `corrcoef`.

Examples
--------
>>> df = pl.DataFrame({"foo": [1, 2, 3], "bar": [3, 2, 1], "ham": [7, 8, 9]})
>>> df.corr()
shape: (3, 3)
┌──────┬──────┬──────┐
│ foo  ┆ bar  ┆ ham  │
│ ---  ┆ ---  ┆ ---  │
│ f64  ┆ f64  ┆ f64  │
╞══════╪══════╪══════╡
│ 1.0  ┆ -1.0 ┆ 1.0  │
│ -1.0 ┆ 1.0  ┆ -1.0 │
│ 1.0  ┆ -1.0 ┆ 1.0  │
└──────┴──────┴──────┘
rowvarFr  r  )r   corrcoefr*  r   r  r   r  )r   r  correlation_matrixs      r   corrDataFrame.corr.  sQ    >  [[QQ&Q::?!#+=*>!?+LLAAr   c                    SSK Jn  [        X5        U R                  5       R	                  UR                  5       U5      R                  UR                  5       S9$ )u4  
Take two sorted DataFrames and merge them by the sorted key.

The output of this operation will also be sorted.
It is the callers responsibility that the frames
are sorted in ascending order by that key otherwise
the output will not make sense.

The schemas of both DataFrames must be equal.

Parameters
----------
other
    Other DataFrame that must be merged
key
    Key that is sorted.

Examples
--------
>>> df0 = pl.DataFrame(
...     {"name": ["steve", "elise", "bob"], "age": [42, 44, 18]}
... ).sort("age")
>>> df0
shape: (3, 2)
┌───────┬─────┐
│ name  ┆ age │
│ ---   ┆ --- │
│ str   ┆ i64 │
╞═══════╪═════╡
│ bob   ┆ 18  │
│ steve ┆ 42  │
│ elise ┆ 44  │
└───────┴─────┘
>>> df1 = pl.DataFrame(
...     {"name": ["anna", "megan", "steve", "thomas"], "age": [21, 33, 42, 20]}
... ).sort("age")
>>> df1
shape: (4, 2)
┌────────┬─────┐
│ name   ┆ age │
│ ---    ┆ --- │
│ str    ┆ i64 │
╞════════╪═════╡
│ thomas ┆ 20  │
│ anna   ┆ 21  │
│ megan  ┆ 33  │
│ steve  ┆ 42  │
└────────┴─────┘
>>> df0.merge_sorted(df1, key="age")
shape: (7, 2)
┌────────┬─────┐
│ name   ┆ age │
│ ---    ┆ --- │
│ str    ┆ i64 │
╞════════╪═════╡
│ bob    ┆ 18  │
│ thomas ┆ 20  │
│ anna   ┆ 21  │
│ megan  ┆ 33  │
│ steve  ┆ 42  │
│ steve  ┆ 42  │
│ elise  ┆ 44  │
└────────┴─────┘

Notes
-----
No guarantee is given over the output row order when the key is equal
between the both dataframes.

The key must be sorted in ascending order.
r   r  r  )r  r  r4   r  merge_sortedr  r  )r   r:  r  r  s       r   r  DataFrame.merge_sorted.  sG    P 	=$& IIK\%**,,W=#7#7#9W:	
r   r:  c               ~    SSK Jn  U R                  5       R                  XS9R	                  UR                  5       S9$ )a  
Flag a column as sorted.

This can speed up future operations.

Parameters
----------
column
    Column that is sorted
descending
    Whether the column is sorted in descending order.

Warnings
--------
This can lead to incorrect results if the data is NOT sorted!!
Use with care!

r   r  r  r  )r  r  r  
set_sortedr  r  )r   r   r:  r  s       r   r  DataFrame.set_sorted/  s8    4 	= IIKZZ6W=#7#7#9W:	
r   r  r  include_nullsr=  c          
         SSK Jn  [        X5        U R                  5       R	                  UR                  5       UUUUUUS9R                  UR                  5       S9$ )u
  
Update the values in this `DataFrame` with the values in `other`.

.. warning::
    This functionality is considered **unstable**. It may be changed
    at any point without it being considered a breaking change.

Parameters
----------
other
    DataFrame that will be used to update the values
on
    Column names that will be joined on. If set to `None` (default),
    the implicit row index of each frame is used as a join key.
how : {'left', 'inner', 'full'}
    * 'left' will keep all rows from the left table; rows may be duplicated
      if multiple rows in the right frame match the left row's key.
    * 'inner' keeps only those rows where the key exists in both frames.
    * 'full' will update existing rows where the key matches while also
      adding any new rows contained in the given frame.
left_on
   Join column(s) of the left DataFrame.
right_on
   Join column(s) of the right DataFrame.
include_nulls
    Overwrite values in the left frame with null values from the right frame.
    If set to `False` (default), null values in the right frame are ignored.
maintain_order : {'none', 'left', 'right', 'left_right', 'right_left'}
    Which order of rows from the inputs to preserve. See :func:`~DataFrame.join`
    for details. Unlike `join` this function preserves the left order by
    default.

Notes
-----
This is syntactic sugar for a left/inner join that preserves the order
of the left `DataFrame` by default, with an optional coalesce when
`include_nulls = False`.

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "A": [1, 2, 3, 4],
...         "B": [400, 500, 600, 700],
...     }
... )
>>> df
shape: (4, 2)
┌─────┬─────┐
│ A   ┆ B   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 400 │
│ 2   ┆ 500 │
│ 3   ┆ 600 │
│ 4   ┆ 700 │
└─────┴─────┘
>>> new_df = pl.DataFrame(
...     {
...         "B": [-66, None, -99],
...         "C": [5, 3, 1],
...     }
... )

Update `df` values with the non-null values in `new_df`, by row index:

>>> df.update(new_df)
shape: (4, 2)
┌─────┬─────┐
│ A   ┆ B   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ -66 │
│ 2   ┆ 500 │
│ 3   ┆ -99 │
│ 4   ┆ 700 │
└─────┴─────┘

Update `df` values with the non-null values in `new_df`, by row index,
but only keeping those rows that are common to both frames:

>>> df.update(new_df, how="inner")
shape: (3, 2)
┌─────┬─────┐
│ A   ┆ B   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ -66 │
│ 2   ┆ 500 │
│ 3   ┆ -99 │
└─────┴─────┘

Update `df` values with the non-null values in `new_df`, using a full
outer join strategy that defines explicit join columns in each frame:

>>> df.update(new_df, left_on=["A"], right_on=["C"], how="full")
shape: (5, 2)
┌─────┬─────┐
│ A   ┆ B   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ -99 │
│ 2   ┆ 500 │
│ 3   ┆ 600 │
│ 4   ┆ 700 │
│ 5   ┆ -66 │
└─────┴─────┘

Update `df` values including null values in `new_df`, using a full outer
join strategy that defines explicit join columns in each frame:

>>> df.update(new_df, left_on="A", right_on="C", how="full", include_nulls=True)
shape: (5, 2)
┌─────┬──────┐
│ A   ┆ B    │
│ --- ┆ ---  │
│ i64 ┆ i64  │
╞═════╪══════╡
│ 1   ┆ -99  │
│ 2   ┆ 500  │
│ 3   ┆ null │
│ 4   ┆ 700  │
│ 5   ┆ -66  │
└─────┴──────┘
r   r  r  r  )r  r  r4   r  updater  r  )	r   r:  r  rB  r  r  r  r=  r  s	            r   r  DataFrame.update>/  s_    Z 	=$&IIKV

!+-   W=#7#7#9W:	
r   c                    SSK Jn  U R                  5       R                  5       R	                  UR                  5       S9$ )u  
Return the number of non-null elements for each column.

Examples
--------
>>> df = pl.DataFrame(
...     {"a": [1, 2, 3, 4], "b": [1, 2, 1, None], "c": [None, None, None, None]}
... )
>>> df.count()
shape: (1, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ u32 ┆ u32 ┆ u32 │
╞═════╪═════╪═════╡
│ 4   ┆ 3   ┆ 0   │
└─────┴─────┴─────┘
r   r  r  )r  r  r  r  r  r  rL  s     r   r  DataFrame.count/  s3    & 	=yy{  "**9M9M9O*PPr   z`DataFrame.melt` is deprecated; use `DataFrame.unpivot` instead, with `index` instead of `id_vars` and `on` instead of `value_vars`c                &    U R                  UUUUS9$ )a@  
Unpivot a DataFrame from wide to long format.

Optionally leaves identifiers set.

This function is useful to massage a DataFrame into a format where one or more
columns are identifier variables (id_vars) while all other columns, considered
measured variables (value_vars), are "unpivoted" to the row axis leaving just
two non-identifier columns, 'variable' and 'value'.

.. deprecated:: 1.0.0
    Use the :meth:`.unpivot` method instead.

Parameters
----------
id_vars
    Column(s) or selector(s) to use as identifier variables.
value_vars
    Column(s) or selector(s) to use as values variables; if `value_vars`
    is empty all columns that are not in `id_vars` will be used.
variable_name
    Name to give to the `variable` column. Defaults to "variable"
value_name
    Name to give to the `value` column. Defaults to "value"
)r  r  r  r  )r	  )r   id_vars
value_varsr  r  s        r   meltDataFrame.melt/  s'    H ||'!	  
 	
r   raiseforbid)missing_columnsmissing_struct_fieldsextra_columnsextra_struct_fieldsinteger_cast
float_castc          
         SSK Jn  U R                  5       R                  UUUUUUUS9R	                  UR                  5       S9$ )u  
Match or evolve the schema of a LazyFrame into a specific schema.

By default, match_to_schema returns an error if the input schema does not
exactly match the target schema. It also allows columns to be freely reordered,
with additional coercion rules available through optional parameters.

.. warning::
    This functionality is considered **unstable**. It may be changed
    at any point without it being considered a breaking change.

Parameters
----------
schema
    Target schema to match or evolve to.
missing_columns
    Raise of insert missing columns from the input with respect to the `schema`.

    This can also be an expression per column with what to insert if it is
    missing.
missing_struct_fields
    Raise of insert missing struct fields from the input with respect to the
    `schema`.
extra_columns
    Raise of ignore extra columns from the input with respect to the `schema`.
extra_struct_fields
    Raise of ignore extra struct fields from the input with respect to the
    `schema`.
integer_cast
    Forbid of upcast for integer columns from the input to the respective column
    in `schema`.
float_cast
    Forbid of upcast for float columns from the input to the respective column
    in `schema`.

Examples
--------
Ensuring the schema matches

>>> df = pl.DataFrame({"a": [1, 2, 3], "b": ["A", "B", "C"]})
>>> df.match_to_schema({"a": pl.Int64, "b": pl.String})
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ str │
╞═════╪═════╡
│ 1   ┆ A   │
│ 2   ┆ B   │
│ 3   ┆ C   │
└─────┴─────┘
>>> df.match_to_schema({"a": pl.Int64})  # doctest: +SKIP
polars.exceptions.SchemaError: extra columns in `match_to_schema`: "b"

Adding missing columns

>>> (
...     pl.DataFrame({"a": [1, 2, 3]}).match_to_schema(
...         {"a": pl.Int64, "b": pl.String},
...         missing_columns="insert",
...     )
... )
shape: (3, 2)
┌─────┬──────┐
│ a   ┆ b    │
│ --- ┆ ---  │
│ i64 ┆ str  │
╞═════╪══════╡
│ 1   ┆ null │
│ 2   ┆ null │
│ 3   ┆ null │
└─────┴──────┘
>>> (
...     pl.DataFrame({"a": [1, 2, 3]}).match_to_schema(
...         {"a": pl.Int64, "b": pl.String},
...         missing_columns={"b": pl.col.a.cast(pl.String)},
...     )
... )
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ str │
╞═════╪═════╡
│ 1   ┆ 1   │
│ 2   ┆ 2   │
│ 3   ┆ 3   │
└─────┴─────┘

Removing extra columns

>>> (
...     pl.DataFrame({"a": [1, 2, 3], "b": ["A", "B", "C"]}).match_to_schema(
...         {"a": pl.Int64},
...         extra_columns="ignore",
...     )
... )
shape: (3, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ 1   │
│ 2   │
│ 3   │
└─────┘

Upcasting integers and floats

>>> (
...     pl.DataFrame(
...         {"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]},
...         schema={"a": pl.Int32, "b": pl.Float32},
...     ).match_to_schema(
...         {"a": pl.Int64, "b": pl.Float64},
...         integer_cast="upcast",
...         float_cast="upcast",
...     )
... )
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪═════╡
│ 1   ┆ 1.0 │
│ 2   ┆ 2.0 │
│ 3   ┆ 3.0 │
└─────┴─────┘
r   r  )r   r  r  r  r  r  r   r  )r  r  r  match_to_schemar  r  )	r   r   r  r  r  r  r  r   r  s	            r   r  DataFrame.match_to_schema0  sS    h 	= IIK_ /&;+$7)%   W=#7#7#9W:	
r   c                $   U nUb)  [        U[        5      (       a  U/nUR                  U5      nU R                  UR                  R                  5       5      nUb5  [        U[        5      (       a  U/nSU;  a  S/U-   nUR                  U5      nU$ )z
Get all runtime metadata for each column.

This is unstable and is meant for debugging purposes.

Parameters
----------
columns
    Column(s) to show the information for
stats
    Statistics to show
column_name)r   r   rJ  r   r   _to_metadata)r   r  statsr   mds        r   r  DataFrame._to_metadata0  s    " '3''")7#B__RVV0023%%%E)&%/5!B	r   	unorderedr:  r;  c          	         U R                  [        R                  " [        R                  " 5       UUUS95      R	                  5       $ )z
Row encode the given DataFrame.

This is an internal function not meant for outside consumption and can
be changed or removed at any point in time.

fields have order:
- descending
- nulls_last
- no_order
r
  )r>  rK  _row_encoderL  r  )r   r  r:  r;  s       r   r  DataFrame._row_encode0  s<    $ MM#%%	
 )+	r   )r   )NN)r   zFrameInitTypes | Noner   SchemaDefinition | Noner   SchemaDict | Noner   r[  r   zOrientation | Noner   
int | Noner   r[  r  None)r   zstr | Path | IOBaser   r   r  r   )r   rg   r  r   r   )
r   zpa.Table | pa.RecordBatchr   r  r   r  r   r[  r  r   )r   pd.DataFramer   r  r   r  r   r[  r   r[  r   r[  r  r   )r   r   r   ru   r  r   )r   r  r   r  r  r   )r  r>   )r  ro   )r  ztuple[int, int])r  r  )r  z	list[str])r  zSequence[str]r  r  )r  zlist[DataType])r  zdict[str, dict[str, bool]])r  rd   )r+  znpt.DTypeLike | Noner.  bool | Noner  np.ndarray[Any, Any]r&  )r4  r[  r)  r[  r  r   )r:  r   r;  r}   r  r   )r:  r   r;  r}   r  r   )r:  r   rc  r[  r  r   )r   r   rl  zfrozenset[PolarsDataType]rm  r   r  r   )r:  z DataFrame | Series | int | floatr  r   )r  r   )r:  objectr  r   )r:  r   r  r   )r  bytes)r  r  r  r  )r:  int | floatr  r   )r:  z-DataFrame | Series | int | float | bool | strr  r   r  )r  r   r  r[  )r  zIterator[Series])r  z-tuple[SingleIndexSelector, SingleColSelector]r  r   )r  z2str | tuple[MultiIndexSelector, SingleColSelector]r  ru   )r  zSingleIndexSelector | MultiIndexSelector | MultiColSelector | tuple[SingleIndexSelector, MultiColSelector] | tuple[MultiIndexSelector, MultiColSelector]r  r   )r  a  SingleIndexSelector | SingleColSelector | MultiColSelector | MultiIndexSelector | tuple[SingleIndexSelector, SingleColSelector] | tuple[SingleIndexSelector, MultiColSelector] | tuple[MultiIndexSelector, SingleColSelector] | tuple[MultiIndexSelector, MultiColSelector]r  zDataFrame | Series | Any)r  z;str | Sequence[int] | Sequence[str] | tuple[Any, str | int]r  r   r  r  )r  r   )r  r  r  r   )r  zobject | Noner  r  )r  r[  r  r   )r  r  r   zint | str | Noner  r   )r  CompatLevel | Noner  zpa.Table)r   Literal[True]r  zdict[str, Series])r   Literal[False]r  zdict[str, list[Any]])r   r[  r  z(dict[str, Series] | dict[str, list[Any]])r  list[dict[str, Any]])r1  r   r(  r[  r)  r[  r2  r[  r3  r  r  r  ).)rF  zLiteral['array']rA  jax.Device | str | NonerB  (str | Expr | Sequence[str | Expr] | NonerC  r  r+  PolarsDataType | Noner1  r   r  z	jax.Array)rF  Literal['dict']rA  r  rB  r  rC  r  r+  r  r1  r   r  zdict[str, jax.Array])r  )rF  r   rA  r  rB  r  rC  r  r+  r  r1  r   r  z jax.Array | dict[str, jax.Array])
rF  zLiteral['tensor']rB  r  rC  r  r+  r  r  ztorch.Tensor)
rF  zLiteral['dataset']rB  r  rC  r  r+  r  r  r   )
rF  r   rB  r  rC  r  r+  r  r  zdict[str, torch.Tensor])rr  )
rF  r   rB  r  rC  r  r+  r  r  z6torch.Tensor | dict[str, torch.Tensor] | PolarsDataset)ry  r[  r  r   r  r  )r   r   ry  r[  r  r   r  r  )r   )r  r  r  ru   )i  )rQ  r  r  r   )r  r  r   zLiteral['binary']r  r  )r  r  r   zLiteral['json']r  r   )r  IOBase | str | Pathr   r   r  r  )r  IOBase | str | Path | Noner   r   r  zbytes | str | None)r  r  r  r   )r  r!  r  r  )r  r"  r  
str | None)r  z str | Path | IO[bytes] | IO[str]r  r  )r  z'str | Path | IO[bytes] | IO[str] | Noner  r#  )&r  r  r  r[  r  r[  r  r   r  r   r  r   r  r  r  r#  r  r#  r  r#  r  r  r  r  r  r[  r  r#  r  CsvQuoteStyle | Noner  dict[str, Any] | Noner  3CredentialProviderFunction | Literal['auto'] | Noner  r  r  r   )&r  z str | Path | IO[str] | IO[bytes]r  r[  r  r[  r  r   r  r   r  r   r  r  r  r#  r  r#  r  r#  r  r  r  r  r  r[  r  r#  r  r$  r  r%  r  r&  r  r  r  r  )&r  z'str | Path | IO[str] | IO[bytes] | Noner  r[  r  r[  r  r   r  r   r  r   r  r  r  r#  r  r#  r  r#  r  r  r  r  r  r[  r  r#  r  r$  r  r%  r  r&  r  r  r  r#  )r  r   r  r   r  r  )r  r  )r  str | Path | IO[bytes]r  rw   r  r   r  r  )0r.  z(str | Workbook | IO[bytes] | Path | Noner/  zstr | Worksheet | Noner  ztuple[int, int] | strr  zstr | dict[str, Any] | Noner  r#  r  zColumnFormatDict | Noner  z$dict[OneOrMoreDataTypes, str] | Noner  zConditionalFormatDict | Noner   r%  r  zColumnTotalsDefinition | Noner  zColumnWidthsDefinition | Noner  zRowTotalsDefinition | Noner  z-dict[int | tuple[int, ...], int] | int | Noner  z0dict[str, Sequence[str] | dict[str, Any]] | Noner  z&dict[str, str | dict[str, str]] | Noner  r  r  r[  r  r[  r  r[  r	  z#Sequence[str] | SelectorType | Noner
  r[  r  r  r  zOstr | tuple[int, int] | tuple[str, int, int] | tuple[int, int, int, int] | Noner  rp   )r  r  r  r   r  r  r  r%  r  r&  r  r  r  r
   )r  r'  r  r   r  r  r  r%  r  r&  r  r  r  r  )r  str | Path | IO[bytes] | Noner  r   r  r  r  r%  r  r&  r  r  r  BytesIO | None)r  r  r  r   r  r  r  r
   )r  r'  r  r   r  r  r  r  )r  r(  r  r   r  r  r  r)  ) r  r'  r  r   rN  r  rO  zbool | str | dict[str, bool]rP  r  rQ  r  r3  r[  rR  r%  rS  str | Sequence[str] | NonerT  r  r  r%  r  r&  r  r  rU  zParquetMetadata | NonerV  r[  r  r  )r  r   r  zConnectionOrCursor | strrl  r   r  zDbWriteEngine | Nonerm  r%  r  r  )rR  zstr | pyiceberg.table.Tabler  zLiteral['append', 'overwrite']r  r  )rR  !str | Path | deltalake.DeltaTabler  z1Literal['error', 'append', 'overwrite', 'ignore']r  r  r  dict[str, str] | Noner  r&  r  r%  r  r  )rR  r+  r  zLiteral['merge']r  r  r  r,  r  r&  r  dict[str, Any]r  zdeltalake.table.TableMerger)rR  r+  r  z:Literal['error', 'append', 'overwrite', 'ignore', 'merge']r  r  r  r,  r  r&  r  r%  r  r%  r  z"deltalake.table.TableMerger | None)b)r  r   r  r  )r  r[  r  r   r  zstr | Iterable[str] | Noner  r   )r  z(Mapping[str, str] | Callable[[str], str]r   r[  r  r   )r  r  r   r   r  r   )r  zTIntoExprColumn | Iterable[IntoExprColumn] | bool | list[bool] | np.ndarray[Any, Any]r  r   r  r   )r
  r  r  r  r  r  r  r  )r
  r  r  r  r  r  r  r   )r
  r  r  r  r  r[  r  r#  ))g      ?g      ?g      ?)r2  zSequence[float] | float | Noner0  r   r  r   )r  r   r  r  )r  r  r   ru   r  r   )r`  IntoExpr | Iterable[IntoExpr]rA  r   r:  bool | Sequence[bool]r;  r0  r<  r[  r=  r[  r  r   )rI  r   r  r   r  r   )rV  r  r`  r/  r  r0  r  r   )r:  r   r\  r[  r  r[  )rb  r  rW  r  r  r   )r  )rQ  r  r  r   )rr  >ColumnNameOrSelector | Collection[ColumnNameOrSelector] | Noner  r   )rx  z&Callable[Concatenate[DataFrame, P], T]ry  P.argsr  P.kwargsr  r   )
r  z!str | Sequence[str] | pl.Selectorrx  zCallable[[Series], Series]ry  r2  r  r3  r  r   )r  r   )r  r   rb  r  r  r   )row_nrr   )r`  r/  r=  r[  r  r   r  r<   )r  r   r  str | timedeltarb  str | timedelta | Noner  rx   r  $IntoExpr | Iterable[IntoExpr] | Noner  r=   )r  r   r  r5  r  r6  rb  r6  r  r[  r  rx   rB  r   r  r7  r  r   r  r;   )
r  r   r  r5  r  r*  r=  r[  r  r   ) r:  r   r  str | None | Exprr  r8  r  r8  r  r*  r  r*  r`  r*  r  rv   rM  r   r  z$str | int | float | timedelta | Noner  r[  r  r[  r  r[  r  r[  r  r[  r  r   )Ninner)r:  r   r  r  rB  r   r  r  r  r  rM  r   r  r   r  r[  r  r  r=  MaintainOrderJoin | Noner  r   )r:  r   r  zExpr | Iterable[Expr]rM  r   r  r   )rx  z Callable[[tuple[Any, ...]], Any]r  r  r  r  r  r   )r  zlist[Series] | DataFramer  r[  r  r   )r:  r   r  r[  r  r   )r:  r   r  r   )r  5ColumnNameOrSelector | Iterable[ColumnNameOrSelector]r   r[  r  r   )r  r   r  ru   )r  z`Mapping[ColumnNameOrSelector | PolarsDataType, PolarsDataType | PythonDataType] | PolarsDataTyper   r[  r  r   )r  zlist[Series])r  r   r  zSeries | NoDefaultr  ru   )r  r   r  r   r  r   )r  r   r  zAny | NoDefaultr  zSeries | Any)NNN)
r  zAny | Expr | Noner  zFillNullStrategy | Nonerm  r  r  r[  r  r   )r  zExpr | int | float | Noner  r   )r  r;  r  rz   r  r   )r  5ColumnNameOrSelector | Sequence[ColumnNameOrSelector]r  <ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | Noner  r=  r  zPivotAgg | Expr | Noner=  r[  r  r[  r  r   r  r   )
r  r=  r  r=  r  r#  r  r#  r  r   )
r  r  rB  r   r  r=  r  zlist[Any] | Noner  r   )r`  r<  rA  rz   r=  r[  r  r[  r   r  r  zlist[DataFrame])r`  r<  rA  rz   r=  r[  r  r[  r   r  r  z dict[tuple[Any, ...], DataFrame])r`  r<  rA  rz   r=  r[  r  r[  r   r[  r  z2list[DataFrame] | dict[tuple[Any, ...], DataFrame])r  )rQ  r  r,  zIntoExpr | Noner  r   )r  ru   )r  rt   )r:  r/  r;  r   r  r   )r  zLiteral['first']r  r  )r  zLiteral['all']r  z	list[int])r  )r  zLiteral['first', 'all']r  zint | list[int])r[  r[  r  ru   )rh  r  r  r   )r/  )ru  floatr0  r   r  r   )
r  r=  r  r   rw  r[  ru  r[  r  r   )rr  r1  r  r   r=  r[  r  r   )rr  r  r  r  )rQ  zint | Series | Noner  zfloat | Series | Noner  r[  r  r[  r  r  r  r   )r  z"Callable[[Series, Series], Series]r  ru   )r  r  r  Expr | Noner,  r  r  ztuple[Any, ...])r  r  r  r?  r,  r  r  r-  )r  r  r  r?  r,  r[  r  z tuple[Any, ...] | dict[str, Any])r,  r  r  zlist[tuple[Any, ...]])r,  r  r  r  )r,  r[  r  z,list[tuple[Any, ...]] | list[dict[str, Any]])
r  r<  r,  r  r  r[  r'  r  r  zdict[Any, list[Any]])
r  r<  r,  r  r  r[  r'  r  r  dict[Any, Any])
r  r<  r,  r  r  r[  r'  r  r  zdict[Any, list[dict[str, Any]]])
r  r<  r,  r  r  r[  r'  r  r  zdict[Any, dict[str, Any]])
r  r<  r,  r[  r  r[  r'  r[  r  r@  )r,  r  r  r  r  zIterator[tuple[Any, ...]])r,  r  r  r  r  zIterator[dict[str, Any]])r,  r[  r  r  r  z4Iterator[tuple[Any, ...]] | Iterator[dict[str, Any]])r  )r  r  r  zIterator[DataFrame])r  r[  r  r   )rQ  r  rb  r  r  r   )r   NNN)
r  r  r  r  r  r  r  r  r  ru   )r  r[  )r  )r  z7ColumnNameOrSelector | Collection[ColumnNameOrSelector]r  rz   r  r   )r  r   r  r   )r:  r   r  r   r  r   )r   r   r:  r[  r  r   )Nr  )r:  r   r  r*  rB  z Literal['left', 'inner', 'full']r  r*  r  r*  r  r[  r=  r:  r  r   )NNNN)
r  r=  r  r=  r  r#  r  r#  r  r   )r   zSchemaDict | Schemar  zLLiteral['insert', 'raise'] | Mapping[str, Literal['insert', 'raise'] | Expr]r  zELiteral['insert', 'raise'] | Mapping[str, Literal['insert', 'raise']]r  zLiteral['ignore', 'raise']r  zELiteral['ignore', 'raise'] | Mapping[str, Literal['ignore', 'raise']]r  GLiteral['upcast', 'forbid'] | Mapping[str, Literal['upcast', 'forbid']]r   rA  r  r   )r  None | str | list[str]r  rB  r  r   )r  r[  r:  list[bool] | Noner;  rC  r  ru   )r   r  __qualname____firstlineno____doc____annotations__r   r?   r   classmethodr   r   r   r   r   r   propertyr.   r   r   r  r  r   r  setterr  r  r   r-  r5  r<  r8  r9  ri  r[  rt  rw  rz  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r   r  r	  r  r$   r  r#  r.  r*  rG  rl  r  r~  r  r  r  r  r  r  r  r  r  r<  r@  rH  ri  r  r  r  r  r  r  r  r  r  r  r  r3  r(  r  r@  rK  rT  rY  r^  ra  re  rj  rm  rp  ru  rz  r  r  r%   r  r  r  r  r  r  r  r  r  r  r  r  rb  r  r   r  r  r  r  r0   r  r  r  r  r	  r  rS  r.  r1  r4  r  rJ  r>  r]  rC  rG  r+  rP  r*  rV  r  r]  r  rc  rf  rl  r  rr  ru  ry  r'  r  r  r   r  r  r  r  r-  r  r  r  r  r  r  r  r  r5  r  r7  r  r  r  r  r  r  r  r  r  __static_attributes__r  r   r   r   r      s(   Xt 
&,g%6J"6 '+*.e!
 /3%)*9!e!#e! (e!
 ,e! e! #e! (e! e! 
e!N KS94(945H94	94 94v    +/)

 /3)
')
 ()

 ,)
 )
 
)
 )
V  +/.

 /3 #.
.
 (.

 ,.
 .
 .
 .
 
.
 .
`
 K K Z]#  ]#~ Z3%  3%j 
  
  ! !     * $" $"L ^^
) 
) &! &!P 	A 	A J J& GK ) 8C 	 H "0<0< 0< 
	0<d6!%!% !% 
	!%F"" " 
	".>77$=7CQ7	7/0'(''** 877B7	7B	77!##,
 @	  E	  	:	 
	 	I-:I- 
"I-Va!Ha! a! 
	a!F = = 38 
 
6)V1' 1'f !>5I=A )5 J)5V 47R RP P7 7	17 7
 $(]7 ]7	1]7~%, & #'WR WR 	WR
 WR WR !WR 
WRr  ),	 +.:==@'*	%	 (		
 8	 ;	 %	 	 
	 	 
 +.:==@'*	#$	# (		#
 8	# ;	# %	# 	# 
	# 	# Z &-u& +/:>=A'+%u&"u& (	u&
 8u& ;u& %u& u& 
*u& u&n  *- ;>=@'*& 8	
 ; % 
  
 ;>=@'*' 8	
 ; % 
  
 ;>=@'*&$& 8	&
 ;& %& 
!& & Z (0K" ;?=A'+K"$K" 8	K"
 ;K" %K" 
@K" K"` -2`
 &*`
 	`

 
`
D# &*# 	#
 
#JFF &*	F
 F 
F.1 1B7! 7!r ?B+<	  Q QJM'4G	  ,0>A '/	>A(>A $	>A
 
>A@ 6 6@ @, ,\ 9 9O O ?C.;.	.`    ""&)"%"%(+&)!!$,/14SV) 	
      $     & $   !" *#$ /%& Q'( )* 
+ . 
  ""&)"%"%(+&)!!$,/14SV). 	
      $     & $   !" *#$ /%& Q'( )* 
+ 2 9=l "##&*"&"&(,&*#!%,015 -l5l 	l
 l l l l l $l  l  l &l $l l  !l" *#l$ /%l( @)l, -l. 
/l\ 37 ( (0 (6	'5$'5 %'5 	'5
 
'5V >B,0n
 +/37!%26>B<@/37;7;15EIGK;? #>B$!% ?n:n *n
 (n 1n n 0n <n :n -n 5n 5n /n Cn  E!n" 9#n$ %n& 'n( )n* +n, <-n. /n0 1n45n@ 
An` 
 '5+/15  $	
 ) / @  
  
 '5+/15 $ $	
 ) / @  
  !>5I
 '5+/15 X0+X0 $	X0
 )X0 /X0 @X0 X0 
X0 JX0t 
 '5+/ $	
 ) 
  
 '5+/$ $	
 ) 
  !>5I
 '5+/8.+8. $	8.
 )8. 
8. J8.| +1(,37%)%)!1537*715 +/'l
$l
 (	l

 &l
 1l
 #l
 #l
 l
 /l
 1l
 %(l
 /l
 @l
" #l
$ )%l
& 'l
( 
)l
f (.'+04K!K! -K!
 %K! %K! .K! 
K!Z Z$"+$" -$" 
	$" $"L 
 CF(+14SV58	1	 @		
 &	 /	 Q	 3	 
	 	  ),14SV	*1	* 		*
 &	* /	* Q	* ,	* 
%	* 	* LS(,15 5959A1A I	A
 &A /AA 3A 3A 
,AF+% +%`  %#37xQ xQ 	xQ
 1xQ 
xQt16 TX2
?2
LP2
	2
hFPl
#l
 l
 
l
\O
#O
 O
 
O
b  %("%+. "  	
 ) 
   %("%	 "  	
 ( 
   %("%	 "  	
  
  %'"$!&U "U  	U
 U 
Ur 7Ic
 )2	c
3c
 &	c

 
c
J/*&X -2,1"$l
)l
 l
 *	l

 *l
 l
 l
 
l
\ 4: Z& Z&x !y'J */V
V
 *	V

 'V
 
V
 KV
p !y'J */V
V
 *	V

 'V
 
V
 KV
p >B %A %AN"? "?H21 21h21 21h' 'V RVX
NX
 
X
x RVw
Nw
 
w
rA/8A/ A/ 	A/
 
A/F[
7[
 -[
 	[

 [
 
[
zF, F,P 	S#1	#1P  %SM*SM SM 	SM
 
SMj !z9E *.!(9=Z
Z
  	Z

 'Z
 Z
 7Z
 
Z
 FZ
x !z9E *.)-#(!'9=$}
}
 	}

 '}
 '}
 !}
 }
 }
 7}
 }
 
}
 F}
~	 !z9E 04$c
c
 	c

 -c
 c
 
c
 Fc
R &*&* $.2/3)-%/:>#$$(!%#_
_
 #	_

 $_
 _
 ,_
 -_
 '_
 #_
 _
 8_
 _
 _
 _
  "!_
" #_
$ 
%_
B !}fM 8<#	t
 =A=A#(! $37t
t
 5t
 	t
 :t
 ;t
 t
 !t
 t
 t
 1t
 
t
 Nt
l Z
 	e
e
 +e
 	e

 
e
 e
T /3d*
 "d*2d* ,d*
 d* 
d*N FK)M/)M>B)M	)MV <A 1; 1;f7x Y
GY
 Y
 
	Y
v 4V Z
Z
 Z
 
Z
x8
 8
t!1F5;n EHX X@ @ 8B66%46	6t $(,0 	f
 #'f
 f
 *f
 	f
  f
 
f
P.Y`?
F?
 ,?
 
	?
B !D'B
 OSOS59#"j
Aj
 L	j

 Mj
 3j
 j
 j
 j
 
j
 Cj
\ LP@W OS$(!%@WH@W L	@W
 "@W @W 
@W @WL !+PT(,D! D! 	D!
 ND! &D! 
D!L 
  #"%A ' 	
    
  
  #/A/ '/ 	/
 / / 
*/ / 
  #AAA 'A 	A
 A A 
<A A  $ XAX 'X 	X
 X X 
<XtM
 M
 M
^#0J#,J#)JX
3X
DLX
	X
t
3
DL
	
>E
-E
  E
 
	E
N&
-&
  &
 
	&
P DD DB B"" ""HO6F8O6F8O6 6: " "HP8 7; " "H)S )SV)S )SVR6"D @I&
&
.<&
	&
T QUQ
   Q
MQ
 	Q

 Q
 Q
 
Q
 Q
j RV[
 $)$[
N[
 !	[

 [
 
[
 [
zJ4 J4X 	<
	
B366 "&FY +/!&FYFY (	FY
 FY FY FY 
FY FYP[z    %( # "	
  
     %(	 "	
  
  !h" %)h"h" "	h"
 h" 
*h" h"T .1PP PH H  %?)?)	5?)B 
 !$!$#B# 	#
 # # 
# # 
 !$B 	
   
    !$.B. 	.
 . . 
). .   (B( 	(
 ( ( 
#( ( !~B~ 	~
 ~ ~ 
~@ ),(&(;>(	"( ( :='%'47'	!' '
  %I!I!36I!	=I!V6p.- .-` 16  $? $?P !!!3:3: 3: 	3:
 3: 
3:j5<#4 4<9
H9
 ,9
 
	9
v"BHP
l !	 
 
 	 

 
 
D Z *.06	[
 /3/3#39[
[
 '[
 .	[
 ,[
 -[
 [
 1[
 
[
 [
zQ. 	H QUSW$(!%%
M%
 Q%
 "	%

 %
 
%
	%
N Z =D5<4;5<6>6>a
#a
:	a
 3a
 2a
3a
4a
4a
 
a
 a
J +/(,$'$ &$ 
	$R  (,(,  &	
 & 
 r   r   c                R   U n[        U [        R                  5      (       dP  [        U [        5      (       a  O"[        U [        5      (       a  Sn[        U5      e[        R                  " SU /5      n Ub2  US:  a  U R                  X!S-
  S9n U $ US:X  a  U R                  SS5      n U $ )Nzoperation not supportedr  r  )r  rQ  r   )r   r   ru   r   r   r   r  ra  )r:  rW  r  r   s       r   rX  rX  1  s    EeRYY''eS!!x((+CC. 		"ug&A:))!)DE L q[KK1%ELr   r   )r:  r   rW  r  r  ru   )rF  
__future__r   r]  r   r  collectionsr   collections.abcr   r   r   r   r	   ior
   r   pathlibr   typingr   r   r   r   r   r   r   r   r   r   polars._reexport	_reexportr   polarsr   rK  polars._typingr   r   r   polars._utils.constructionr   r   r   r   r   r    r!   r"   polars._utils.convertr#   polars._utils.deprecationr$   r%   r&   polars._utils.getitemr(   polars._utils.parser)   polars._utils.pycapsuler*   r+   polars._utils.serder,   polars._utils.unstabler-   r.   polars._utils.variousr/   r0   r1   r2   r3   r4   r5   r6   polars._utils.wrapr7   r8   r9   polars.dataframe._htmlr:   polars.dataframe.group_byr;   r<   r=   polars.dataframe.plottingr>   polars.datatypesr?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   polars.datatypes.grouprL   polars.dependenciesrM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   r   rZ   r   r[   r   polars.exceptionsr\   r]   r^   r_   r`   polars.functionsra   rb   polars.interchange.protocolrc   polars.schemard   r  re   rf   suppressImportErrorpolars._plrrg   rh   r  ri   r  sysrj   rk   datetimerl   rm   rn   r  rL  numpy.typingnpt	pyicebergro   r  rp   xlsxwriter.worksheetrq   rr   rs   rt   ru   rv   rw   rx   ry   rz   r{   r|   r}   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r3  r   polars.io.cloudr   rt  r   version_infor   r   typing_extensionswarningsr   r   r   rX  r  r   r   <module>rx     s   : "  	  #  !     ! F F	 	 	 ; 
 5 5 D 7 C	 	 	 ; : 4 M M 3    2    , , -  & 3   F%'=M &
 =="#.88. . . . . . . . . . . .^ 0<:-
7"11<
7"'0A#Ax@ x@vAeD &%s   .J99
K