
    chM                       S SK Jr  S SKrS SKJrJrJrJrJr  S SK	r	\	R                  S:  a  S SKJr  OS SKJr  S SKrS SKJr  S SKJr   " S S\R(                  5      r\\\\\4   r\R2                  " 5       \R4                  S	4\R6                  " 5       \R4                  S
4\R8                  " 5       \R4                  S4\R:                  " 5       \R4                  S4\R<                  " 5       \R>                  S4\R@                  " 5       \R>                  S4\RB                  " 5       \R>                  S4\RD                  " 5       \R>                  S4\RF                  " 5       \RH                  S4\RJ                  " 5       \RH                  S4\RL                  " 5       \RH                  S4\RN                  " 5       \RP                  S4\RR                  " 5       \RT                  S4\RV                  " 5       \RT                  S40r, " S S\R(                  5      r- " S S\5      r. " S S\5      r/ " S S5      r0 " S S \15      r2 " S! S"5      r3g)#    )annotationsN)AnyDictIterableOptionalTuple)      )	TypedDict)_PyArrowBufferc                  4    \ rS rSrSrSrSrSrSrSr	Sr
S	rS
rg)	DtypeKind(   a  
Integer enum for data types.

Attributes
----------
INT : int
    Matches to signed integer data type.
UINT : int
    Matches to unsigned integer data type.
FLOAT : int
    Matches to floating point data type.
BOOL : int
    Matches to boolean data type.
STRING : int
    Matches to string data type (UTF-8 encoded).
DATETIME : int
    Matches to datetime data type.
CATEGORICAL : int
    Matches to categorical data type.
r                      N)__name__
__module____qualname____firstlineno____doc__INTUINTFLOATBOOLSTRINGDATETIMECATEGORICAL__static_attributes__r       mC:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\venv\Lib\site-packages\pyarrow/interchange/column.pyr   r   (   s,    * CDEDFHKr$   r   csilCSILefgbuUc                  ,    \ rS rSrSrSrSrSrSrSr	Sr
g	)
ColumnNullType\   a  
Integer enum for null type representation.

Attributes
----------
NON_NULLABLE : int
    Non-nullable column.
USE_NAN : int
    Use explicit float NaN value.
USE_SENTINEL : int
    Sentinel value besides NaN.
USE_BITMASK : int
    The bit is set/unset representing a null on a certain position.
USE_BYTEMASK : int
    The byte is set/unset representing a null on a certain position.
r   r   r   r	      r   N)r   r   r   r   r   NON_NULLABLEUSE_NANUSE_SENTINELUSE_BITMASKUSE_BYTEMASKr#   r   r$   r%   r5   r5   \   s"    " LGLKLr$   r5   c                  4    \ rS rSr% S\S'   S\S'   S\S'   Srg)	ColumnBuffersu   zTuple[_PyArrowBuffer, Dtype]dataz&Optional[Tuple[_PyArrowBuffer, Dtype]]validityoffsetsr   Nr   r   r   r   __annotations__r#   r   r$   r%   r>   r>   u   s     '&
 54 43r$   r>   c                  4    \ rS rSr% S\S'   S\S'   S\S'   Srg)	CategoricalDescription   bool
is_orderedis_dictionaryzOptional[_PyArrowColumn]
categoriesr   NrC   r   r$   r%   rF   rF      s      )(r$   rF   c                  (    \ rS rSrSrSrSrSrSrSr	g)	
Endianness   z.Enum indicating the byte-order of a data-type.<>=|r   N)
r   r   r   r   r   LITTLEBIGNATIVENAr#   r   r$   r%   rM   rM      s    8F
CF	Br$   rM   c                      \ rS rSrSrSrg)NoBufferPresent   z6Exception to signal that there is no requested buffer.r   N)r   r   r   r   r   r#   r   r$   r%   rX   rX      s    @r$   rX   c                     \ rS rSrSr S     SS jjrSS jr\SS j5       r\SS j5       r	      SS jr
\SS j5       r\SS	 j5       r\SS
 j5       r\SS j5       rSS jr S   SS jjrSS jr  SS jrSS jrSS jrSrg) _PyArrowColumn   a  
A column object, with only the methods and properties required by the
interchange protocol defined.

A column can contain one or more chunks. Each chunk can contain up to three
buffers - a data buffer, a mask buffer (depending on null representation),
and an offsets buffer (if variable-size binary; e.g., variable-length
strings).

TBD: Arrow has a separate "null" dtype, and has no separate mask concept.
     Instead, it seems to use "children" for both columns with a bit mask,
     and for nested dtypes. Unclear whether this is elegant or confusing.
     This design requires checking the null representation explicitly.

     The Arrow design requires checking:
     1. the ARROW_FLAG_NULLABLE (for sentinel values)
     2. if a column has two children, combined with one of those children
        having a null dtype.

     Making the mask concept explicit seems useful. One null dtype would
     not be enough to cover both bit and byte masks, so that would mean
     even more checking if we did it the Arrow way.

TBD: there's also the "chunk" concept here, which is implicit in Arrow as
     multiple buffers per array (= column here). Semantically it may make
     sense to have both: chunks were meant for example for lazy evaluation
     of data which doesn't fit in memory, while multiple buffers per column
     could also come from doing a selection operation on a single
     contiguous buffer.

     Given these concepts, one would expect chunks to be all of the same
     size (say a 10,000 row dataframe could have 10 chunks of 1,000 rows),
     while multiple buffers could have data-dependent lengths. Not an issue
     in pandas if one column is backed by a single NumPy array, but in
     Arrow it seems possible.
     Are multiple chunks *and* multiple buffers per column necessary for
     the purposes of this interchange protocol, or must producers either
     reuse the chunk concept for this or copy the data?

Note: this Column object can only be produced by ``__dataframe__``, so
      doesn't need its own version or ``__column__`` protocol.
c                   [        U[        R                  5      (       aD  UR                  S:X  a  UR	                  S5      nO"U(       d  [        S5      eUR                  5       nX l        [        R                  R                  UR                  5      (       ac  U(       d  [        S5      eU R                  UR                  S5      U l        [        R                  " U[        R                  " 5       5      U l        gXl        U R                   R                  n UR"                  nU R                  X45      U l        g! [$         a    Sn N%f = f)z+
Handles PyArrow Arrays and ChunkedArrays.
r   r   zUChunks will be combined and a copy is required which is forbidden by allow_copy=FalsezdBoolean column will be casted to uint8 and a copy is required which is forbidden by allow_copy=Falser
   N)
isinstancepaChunkedArray
num_chunkschunkRuntimeErrorcombine_chunks_allow_copytypes
is_booleantype_dtype_from_arrowdtype_dtypepccastuint8_col	bit_width
ValueError)selfcolumn
allow_copydtypero   s        r%   __init___PyArrowColumn.__init__   s    fboo..  A%a!&;   ..0%88v{{++"I  55fkk1EDK
3DIIIINNE!OO	
 55eGDK	   	s   D; ;E
	E
c                ,    [        U R                  5      $ )a  
Size of the column, in elements.

Corresponds to DataFrame.num_rows() if column is a single chunk;
equal to size of this current chunk otherwise.

Is a method rather than a property because it may cause a (potentially
expensive) computation for some dataframe implementations.
)lenrn   rq   s    r%   size_PyArrowColumn.size   s     499~r$   c                .    U R                   R                  $ )z
Offset of first element.

May be > 0 if using chunks; for example for a column with N chunks of
equal size M (only the last chunk may be shorter),
``offset = n * M``, ``n = 0 .. N-1``.
)rn   offsetry   s    r%   r}   _PyArrowColumn.offset   s     yyr$   c                    U R                   $ )a:  
Dtype description as a tuple ``(kind, bit-width, format string,
endianness)``.

Bit-width : the number of bits as an integer
Format string : data type description format string in Apache Arrow C
                Data Interface format.
Endianness : current only native endianness (``=``) is supported

Notes:
    - Kind specifiers are aligned with DLPack where possible (hence the
      jump to 20, leave enough room for future extension)
    - Masks must be specified as boolean with either bit width 1 (for
      bit masks) or 8 (for byte masks).
    - Dtype width in bits was preferred over bytes
    - Endianness isn't too useful, but included now in case in the
      future we need to support non-native endianness
    - Went with Apache Arrow format strings over NumPy format strings
      because they're more complete from a dataframe perspective
    - Format strings are mostly useful for datetime specification, and
      for categoricals.
    - For categoricals, the format string describes the type of the
      categorical in the data buffer. In case of a separate encoding of
      the categorical (e.g. an integer to string mapping), this can
      be derived from ``self.describe_categorical``.
    - Data types not included: complex, Arrow-style null, binary,
      decimal, and nested (list, struct, map, union) dtypes.
)rj   ry   s    r%   rt   _PyArrowColumn.dtype  s    < {{r$   c                v   [         R                  R                  U5      (       aY  [        R                  nUR
                  S   nUR                  (       a  UR                  OSnSU SU 3nX2U[        R                  4$ [         R                  R                  U5      (       a\  [        R                  nU R                  nUR                  R                  n[        R                  U5      u  pX2U[        R                  4$ [        R                  US5      u  p6Uc  [!        SU S35      eX2U[        R                  4$ )z
See `self.dtype` for details.
r    ts:)NNz
Data type z& not supported by interchange protocol)r_   rf   is_timestampr   r!   unittzrM   rU   rJ   r"   rn   indicesrh   _PYARROW_KINDSgetrp   )
rq   rt   ro   kindr   r   f_stringarrindices_dtype_s
             r%   ri   %_PyArrowColumn._dtype_from_arrowdtype(  s    88  ''%%DAB"XX2BB4q~HHj.?.???XX##E**((D))CKK,,M(,,];KAHj.?.???+//|DND|  'MNP P Hj.?.???r$   c                    U R                   n[        R                  R                  UR                  5      (       d  [        S5      eU R                   R                  R                  S[        UR                  5      S.$ )aL  
If the dtype is categorical, there are two options:
- There are only values in the data buffer.
- There is a separate non-categorical Column encoding categorical
  values.

Raises TypeError if the dtype is not categorical

Returns the dictionary with description on how to interpret the
data buffer:
    - "is_ordered" : bool, whether the ordering of dictionary indices
                     is semantically meaningful.
    - "is_dictionary" : bool, whether a mapping of
                        categorical values to other objects exists
    - "categories" : Column representing the (implicit) mapping of
                     indices to category values (e.g. an array of
                     cat1, cat2, ...). None if not a dictionary-style
                     categorical.

TBD: are there any other in-memory representations that are needed?
zCdescribe_categorical only works on a column with categorical dtype!T)rI   rJ   rK   )	rn   r_   rf   rJ   rh   	TypeErrororderedr[   
dictionary)rq   r   s     r%   describe_categorical#_PyArrowColumn.describe_categoricalG  sd    . iixx%%chh//%  ))..00!(8
 	
r$   c                j    U R                   S:X  a  [        R                  S4$ [        R                  S4$ )a  
Return the missing value (or "null") representation the column dtype
uses, as a tuple ``(kind, value)``.

Value : if kind is "sentinel value", the actual value. If kind is a bit
mask or a byte mask, the value (0 or 1) indicating a missing value.
None otherwise.
r   N)
null_countr5   r8   r;   ry   s    r%   describe_null_PyArrowColumn.describe_nullk  s2     ??a!..44!--q00r$   c                J    U R                   R                  nUS:w  a  UnU$ SnU$ )zh
Number of null elements, if known.

Note: Arrow uses -1 to indicate "unknown", but None seems cleaner.
N)rn   r   )rq   arrow_null_countns      r%   r   _PyArrowColumn.null_count}  s3      99// 0B 6 =Ar$   c                    g)zI
The metadata for the column. See `DataFrame.metadata` for more details.
Nr   ry   s    r%   metadata_PyArrowColumn.metadata  s    
 	r$   c                    g)z5
Return the number of chunks the column consists of.
r   r   ry   s    r%   ra   _PyArrowColumn.num_chunks  s     r$   Nc              #  .  #    U(       a  US:  a  U R                  5       U-  nU R                  5       U-  S:w  a  US-  nU R                  nSn[        SX!-  U5       H/  n[        UR	                  XR5      U R
                  5      v   US-  nM1     gU v   g7f)zb
Return an iterator yielding the chunks.

See `DataFrame.get_chunks` for details on ``n_chunks``.
r   r   N)rz   rn   ranger[   slicere   )rq   n_chunks
chunk_sizearrayr(   starts         r%   
get_chunks_PyArrowColumn.get_chunks  s      10Jyy{X%*a
IIEAq*"7D$KK2D4D4D  Q	 E Js   BBc                    U R                  5       SSS.n U R                  5       US'    U R                  5       US'   U$ ! [         a     N"f = f! [         a     U$ f = f)a  
Return a dictionary containing the underlying buffers.

The returned dictionary has the following contents:

    - "data": a two-element tuple whose first element is a buffer
              containing the data and whose second element is the data
              buffer's associated dtype.
    - "validity": a two-element tuple whose first element is a buffer
                  containing mask values indicating missing data and
                  whose second element is the mask value buffer's
                  associated dtype. None if the null representation is
                  not a bit or byte mask.
    - "offsets": a two-element tuple whose first element is a buffer
                 containing the offset values for variable-size binary
                 data (e.g., variable-length strings) and whose second
                 element is the offsets buffer's associated dtype. None
                 if the data buffer does not have an associated offsets
                 buffer.
N)r@   rA   rB   rA   rB   )_get_data_buffer_get_validity_bufferrX   _get_offsets_buffer)rq   bufferss     r%   get_buffers_PyArrowColumn.get_buffers  s    , ))+"
	"&";";"=GJ	!%!9!9!;GI   		
  		s    ? A 
AA
AAc                   U R                   nU R                  n[        R                  R	                  UR
                  5      (       a!  UR                  n[        U5      R                  n[        UR                  5       5      nUS:X  a  [        UR                  5       S   5      U4$ US:X  a  [        UR                  5       S   5      U4$ g)zJ
Return the buffer containing the data and the buffer's
associated dtype.
r   r   r	   N)rn   rt   r_   rf   rJ   rh   r   r[   rx   r   r   )rq   r   rt   r   s       r%   r   _PyArrowColumn._get_data_buffer  s     		


 88!!%**--MME"5)//E 6!%--/!"45u<<!V!%--/!"45u<< r$   c                    [         R                  SS[        R                  4nU R                  nUR                  5       S   nU(       a  [        U5      U4$ [        S5      e)z
Return the buffer containing the mask values indicating missing data
and the buffer's associated dtype.
Raises NoBufferPresent if null representation is not a bit or byte
mask.
r   r1   r   z<There are no missing values so does not have a separate mask)r   r   rM   rU   rn   r   r   rX   )rq   rt   r   buffs       r%   r   #_PyArrowColumn._get_validity_buffer  sZ     C):):;		}}q!!$'..!01 1r$   c                   U R                   n[        UR                  5       5      nUS:X  a  [        S5      eUS:X  a  U R                   R                  n[
        R                  R                  U5      (       a#  [        R                  SS[        R                  4nO"[        R                  SS[        R                  4n[        UR                  5       S   5      U4$ g	)
z
Return the buffer containing the offset values for variable-size binary
data (e.g., variable-length strings) and the buffer's associated dtype.
Raises NoBufferPresent if the data buffer does not have an associated
offsets buffer.
r   zJThis column has a fixed-length dtype so it does not have an offsets bufferr	   @   r)       r(   r   N)rn   rx   r   rX   rh   r_   rf   is_large_stringr   r   rM   rU   r   )rq   r   r   rt   s       r%   r   "_PyArrowColumn._get_offsets_buffer  s     		 6!5  !VIINNExx''.."C1B1BC"C1B1BC!%--/!"45u<< r$   )re   rn   rj   )T)rr   zpa.Array | pa.ChunkedArrayrs   rH   returnNone)r   int)r   Tuple[DtypeKind, int, str, str])rt   zpa.DataTypero   r   r   r   )r   rF   )r   zTuple[ColumnNullType, Any])r   zDict[str, Any])N)r   zOptional[int]r   zIterable[_PyArrowColumn])r   r>   )r   zTuple[_PyArrowBuffer, Any])r   r   r   r   r   ru   rz   propertyr}   rt   ri   r   r   r   r   ra   r   r   r   r   r   r#   r   r$   r%   r[   r[      s   )X FJ%H0%H>B%H	%HN
      >@ @-0@	(@> !
 !
F 1 1"     )-%	!.%N=	#=.1$=r$   r[   )4
__future__r   enumtypingr   r   r   r   r   sysversion_infor   typing_extensionspyarrowr_   pyarrow.computecomputerk   pyarrow.interchange.bufferr   IntEnumr   r   strDtypeint8r   int16int32int64rm   r   uint16uint32uint64float16r   float32float64bool_r   stringr    large_stringr   r5   r>   rF   rM   	ExceptionrX   r[   r   r$   r%   <module>r      s  $ #   v +   5 > 	ic3&' GGI	s#HHJ$HHJ$HHJ$HHJ%IIK)..#&IIK)..#&IIK)..#&JJL9??C(JJL9??C(JJL9??C(HHJ%IIK)""C(OO	((#.$T\\ 24I 4")Y ) Ai As= s=r$   