
    ch2                         S SK Jr  S SKrS SKJr  S SKJr  S SKJr   " S S5      r	Sr
 " S S	5      rSS
 jrS\l        SSSSSSSSSSSS.S jrS\
 S3\l        g)    )IntegralN)Table)_resolve_filesystem_and_pathc                   L   \ rS rSrSrS r\S 5       r\S 5       r\S 5       r	\S 5       r
\S 5       r\S	 5       r\S
 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       rSS jrSS jrSS jrSrg)ORCFile   z
Reader interface for a single ORC file

Parameters
----------
source : str or pyarrow.NativeFile
    Readable source. For passing Python file objects or byte buffers,
    see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
c                 n    [         R                  " 5       U l        U R                  R                  U5        g N)_orc	ORCReaderreaderopen)selfsources     ^C:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\venv\Lib\site-packages\pyarrow/orc.py__init__ORCFile.__init__&   s"    nn&     c                 6    U R                   R                  5       $ )z/The file metadata, as an arrow KeyValueMetadata)r   metadatar   s    r   r   ORCFile.metadata*        {{##%%r   c                 6    U R                   R                  5       $ )z#The file schema, as an arrow schema)r   schemar   s    r   r   ORCFile.schema/   s     {{!!##r   c                 6    U R                   R                  5       $ )zThe number of rows in the file)r   nrowsr   s    r   r   ORCFile.nrows4   s     {{  ""r   c                 6    U R                   R                  5       $ )z!The number of stripes in the file)r   nstripesr   s    r   r!   ORCFile.nstripes9   r   r   c                 6    U R                   R                  5       $ )z4Format version of the ORC file, must be 0.11 or 0.12)r   file_versionr   s    r   r$   ORCFile.file_version>   s     {{''))r   c                 6    U R                   R                  5       $ )z2Software instance and version that wrote this file)r   software_versionr   s    r   r'   ORCFile.software_versionC        {{++--r   c                 6    U R                   R                  5       $ )zCompression codec of the file)r   compressionr   s    r   r+   ORCFile.compressionH        {{&&((r   c                 6    U R                   R                  5       $ )z?Number of bytes to buffer for the compression codec in the file)r   compression_sizer   s    r   r/   ORCFile.compression_sizeM   r)   r   c                 6    U R                   R                  5       $ )zkName of the writer that wrote this file.
If the writer is unknown then its Writer ID
(a number) is returned)r   writerr   s    r   r2   ORCFile.writerR   s    
 {{!!##r   c                 6    U R                   R                  5       $ )zVersion of the writer)r   writer_versionr   s    r   r5   ORCFile.writer_versionY        {{))++r   c                 6    U R                   R                  5       $ )zJNumber of rows per an entry in the row index or 0
if there is no row index)r   row_index_strider   s    r   r9   ORCFile.row_index_stride^   s     {{++--r   c                 6    U R                   R                  5       $ )zNumber of stripe statistics)r   nstripe_statisticsr   s    r   r<   ORCFile.nstripe_statisticsd        {{--//r   c                 6    U R                   R                  5       $ )z/Length of the data stripes in the file in bytes)r   content_lengthr   s    r   r@   ORCFile.content_lengthi   r7   r   c                 6    U R                   R                  5       $ )z<The number of compressed bytes in the file stripe statistics)r   stripe_statistics_lengthr   s    r   rC    ORCFile.stripe_statistics_lengthn   s     {{3355r   c                 6    U R                   R                  5       $ )z1The number of compressed bytes in the file footer)r   file_footer_lengthr   s    r   rF   ORCFile.file_footer_lengths   r>   r   c                 6    U R                   R                  5       $ )z*The number of bytes in the file postscript)r   file_postscript_lengthr   s    r   rI   ORCFile.file_postscript_lengthx   s     {{1133r   c                 6    U R                   R                  5       $ )zThe number of bytes in the file)r   file_lengthr   s    r   rL   ORCFile.file_length}   r-   r   Nc                 &   Uc  g U R                   n/ nU Hx  n[        U[        5      (       a^  [        U5      nSUs=::  a  [	        U5      :  a$  O  O!X$   R
                  nUR                  U5        M]  [        S[	        U5      U4-  5      eUs  $    U$ )Nr   z/Column indices must be in 0 <= ind < %d, got %d)r   
isinstancer   intlennameappend
ValueError)r   columnsr   namescols        r   _select_namesORCFile._select_names   s    ?C#x((#h)c&k) +**CLL%$ &/25f+s1C&D E E   r   c                 V    U R                  U5      nU R                  R                  XS9$ )ab  Read a single stripe from the file.

Parameters
----------
n : int
    The stripe index
columns : list
    If not None, only these columns will be read from the stripe. A
    column name may be a prefix of a nested field, e.g. 'a' will select
    'a.b', 'a.c', and 'a.d.e'

Returns
-------
pyarrow.RecordBatch
    Content of the stripe as a RecordBatch.
rU   )rX   r   read_stripe)r   nrU   s      r   r\   ORCFile.read_stripe   s,    " $$W-{{&&q&::r   c                 V    U R                  U5      nU R                  R                  US9$ )av  Read the whole file.

Parameters
----------
columns : list
    If not None, only these columns will be read from the file. A
    column name may be a prefix of a nested field, e.g. 'a' will select
    'a.b', 'a.c', and 'a.d.e'. Output always follows the
    ordering of the file and not the `columns` list.

Returns
-------
pyarrow.Table
    Content of the file as a Table.
r[   )rX   r   read)r   rU   s     r   r`   ORCFile.read   s,      $$W-{{00r   )r   r
   )__name__
__module____qualname____firstlineno____doc__r   propertyr   r   r   r!   r$   r'   r+   r/   r2   r5   r9   r<   r@   rC   rF   rI   rL   rX   r\   r`   __static_attributes__ r   r   r   r      so   ! & & $ $ # # & & * * . . ) ) . . $ $ , , . .
 0 0 , , 6 6 0 0 4 4 ) )(;(1r   r   a  file_version : {"0.11", "0.12"}, default "0.12"
    Determine which ORC file version to use.
    `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
    is the older version
    while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
    is the newer one.
batch_size : int, default 1024
    Number of rows the ORC writer writes at a time.
stripe_size : int, default 64 * 1024 * 1024
    Size of each ORC stripe in bytes.
compression : string, default 'uncompressed'
    The compression codec.
    Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
    Note that LZ0 is currently not supported.
compression_block_size : int, default 64 * 1024
    Size of each compression block in bytes.
compression_strategy : string, default 'speed'
    The compression strategy i.e. speed vs size reduction.
    Valid values: {'SPEED', 'COMPRESSION'}
row_index_stride : int, default 10000
    The row index stride i.e. the number of rows per
    an entry in the row index.
padding_tolerance : double, default 0.0
    The padding tolerance.
dictionary_key_size_threshold : double, default 0.0
    The dictionary key size threshold. 0 to disable dictionary encoding.
    1 to always enable dictionary encoding.
bloom_filter_columns : None, set-like or list-like, default None
    Columns that use the bloom filter.
bloom_filter_fpp : double, default 0.05
    Upper limit of the false-positive rate of the bloom filter.
c                   d    \ rS rSrS\ S3rSrSSSSS	S
SSSSSS.S jrS rS r	S r
S rS rSrg)	ORCWriter   a  
Writer interface for a single ORC file

Parameters
----------
where : str or pyarrow.io.NativeFile
    Writable target. For passing Python file objects or byte buffers,
    see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
    or pyarrow.io.FixedSizeBufferWriter.

F0.12      uncompressed   speed'          N皙?r$   
batch_sizestripe_sizer+   compression_block_sizecompression_strategyr9   padding_tolerancedictionary_key_size_thresholdbloom_filter_columnsbloom_filter_fppc                    [         R                  " 5       U l        U R                  R                  UUUUUUUUU	U
UUS9  SU l        g )Nrw   T)r   rk   r2   r   is_open)r   wherer$   rx   ry   r+   rz   r{   r9   r|   r}   r~   r   s                r   r   ORCWriter.__init__   sV     nn&%!###9!5-/*G!5- 	 	
 r   c                 $    U R                  5         g r
   closer   s    r   __del__ORCWriter.__del__      

r   c                     U $ r
   ri   r   s    r   	__enter__ORCWriter.__enter__  s    r   c                 $    U R                  5         g r
   r   )r   argskwargss      r   __exit__ORCWriter.__exit__  r   r   c                 `    U R                   (       d   eU R                  R                  U5        g)z
Write the table into an ORC file. The schema of the table must
be equal to the schema used when opening the ORC file.

Parameters
----------
table : pyarrow.Table
    The table to be written into the ORC file
N)r   r2   write)r   tables     r   r   ORCWriter.write  s"     |||% r   c                 j    U R                   (       a"  U R                  R                  5         SU l         gg)z
Close the ORC file
FN)r   r2   r   r   s    r   r   ORCWriter.close#  s'     <<KK DL r   )r   r2   )rb   rc   rd   re   _orc_writer_args_docsrf   r   r   r   r   r   r   r   rh   ri   r   r   rk   rk      sc    	   
G G % -+(-&-"'#&/2&*"&<!!r   rk   c                     [        X5      u  p#Ub  UR                  U5      n Ub9  [        U5      S:X  a*  [        U 5      R	                  5       R                  U5      nU$ [        U 5      R	                  US9nU$ )Nr   r[   )r   open_input_filerQ   r   r`   select)r   rU   
filesystempathresults        r   
read_tabler   ,  sx    3FGJ++D1s7|q0%%'..w7 M %%g%6Mr   a  
Read a Table from an ORC file.

Parameters
----------
source : str, pyarrow.NativeFile, or file-like object
    If a string passed, can be a single file name. For file-like objects,
    only read a single file. Use pyarrow.BufferReader to read a file
    contained in a bytes or buffer-like object.
columns : list
    If not None, only these columns will be read from the file. A column
    name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
    'a.c', and 'a.d.e'. Output always follows the ordering of the file and
    not the `columns` list. If empty, no columns will be read. Note
    that the table will still have the correct num_rows set despite having
    no columns.
filesystem : FileSystem, default None
    If nothing passed, will be inferred based on path.
    Path will try to be found in the local on-disk filesystem otherwise
    it will be parsed as an URI to determine the filesystem.
rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   c                    [        U[        5      (       a  [        R                  " S[        SS9  Xp[        UUUUUUUUU	U
UUS9 nUR                  U 5        S S S 5        g ! , (       d  f       g = f)NzThe order of the arguments has changed. Pass as 'write_table(table, where)' instead. The old order will raise an error in the future.   )
stacklevelrw   )rO   r   warningswarnFutureWarningrk   r   )r   r   r$   rx   ry   r+   rz   r{   r9   r|   r}   r~   r   r2   s                 r   write_tabler   P  s{     %&'4	

 u	!51)+&C1)
 
U
 
 
s   A!!
A/aZ  
Write a table into an ORC file.

Parameters
----------
table : pyarrow.lib.Table
    The table to be written into the ORC file
where : str or pyarrow.io.NativeFile
    Writable target. For passing Python file objects or byte buffers,
    see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
    or pyarrow.io.FixedSizeBufferWriter.
rm   )NN)numbersr   r   pyarrow.libr   pyarrow._orcr   
pyarrow.fsr   r   r   rk   r   rf   r   ri   r   r   <module>r      s   &     3`1 `1F DI! I!X

 0 $,*',%,!&"%.1%)!%!H    r   