
    HI                     f   S r SSKJr  SSKJr  SSKJr  SSKJr  SSKrSSKrSSKrSSK	r	SSK
r
SSKJr  SSKrSSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr  SrSrSrSrSrSrSrSrSr/ SQr Sr!Sr"S r#S r$S r%S r&S*S jr'S r(S  r)S! r*S" r+S# r,S$ r-S% r.S+S& jr/S,S' jr0 " S( S)\15      r2g)-z+Helper functions for hashing functionality.    )absolute_import)print_function)division)unicode_literalsN)config)CommandException)UsingCrcmodExtension)DEFAULT_FILE_BUFFER_SIZE)MIN_SIZE_COMPUTE_LOGGING)TRANSFER_BUFFER_SIZE)UTF8z
WARNING: You have requested checksumming but your crcmod installation isn't
using the module's C extension, so checksumming will run very slowly. For help
installing the extension, please see "gsutil help crcmod".
a  
WARNING: gsutil rsync uses hashes when modification time is not available at
both the source and destination. Your crcmod installation isn't using the
module's C extension, so checksumming will run very slowly. If this is your
first rsync since updating gsutil, this rsync can take significantly longer than
usual. For help installing the extension, please see "gsutil help crcmod".
a|  
WARNING: Downloading this composite object requires integrity checking with
CRC32c, but your crcmod installation isn't using the module's C extension,
so the hash computation will likely throttle download performance. For help
installing the extension, please see "gsutil help crcmod".

To disable slow integrity checking, see the "check_hashes" option in your
boto config file.
a:  
Downloading this composite object requires integrity checking with CRC32c,
but your crcmod installation isn't using the module's C extension, so the
hash computation will likely throttle download performance. For help
installing the extension, please see "gsutil help crcmod".

To download regardless of crcmod performance or to skip slow integrity
checks, see the "check_hashes" option in your boto config file.

NOTE: It is strongly recommended that you not disable integrity checks. Doing so
could allow data corruption to go undetected during uploading/downloading.aT  
WARNING: This download will not be validated since your crcmod installation
doesn't use the module's C extension, so the hash computation would likely
throttle download performance. For help in installing the extension, please
see "gsutil help crcmod".

To force integrity checking, see the "check_hashes" option in your boto config
file.
if_fast_else_failif_fast_else_skipalwaysnever)            i   iAoivE:iWiPYl   T/U l   ?O iNi.%i'hinTi\Fib&l   ", iGWL;igl   b! l   <c l   ! l   N,Q l   2! l   =X iD3iKS}i2=l   my ixBl   Ao=     c                 8    U(       d  U $ [        U SU-  5      U-  $ )am  Computes CRC32C for concat(A, B) given crc(A), crc(B) and len(B).

An explanation of the algorithm can be found at
crcutil.googlecode.com/files/crc-doc.1.0.pdf.

Args:
  crc_a: A 32-bit integer representing crc(A) with least-significant
         coefficient first.
  crc_b: Same as crc_a.
  num_bytes_in_b: Length of B in bytes.

Returns:
  CRC32C for concat(A, B)
   )_ExtendByZeros)crc_acrc_bnum_bytes_in_bs      -platform/gsutil/gslib/utils/hashing_helper.pyConcatCrc32cr   j   s#     
L	q>1	2U	::    c                     SnS[         -  n[        [         5       H-  nU S-  (       a  X!-  nUS-  nX-  (       a	  U[        -  nU S-  n M/     U$ )zMultiplies two polynomials together modulo CASTAGNOLI_POLY.

Args:
  p: The first polynomial.
  q: The second polynomial.

Returns:
  Result of the multiplication.
r      )DEGREErangeCASTAGNOLI_POLY)pqresulttop_bit_s        r   _CrcMultiplyr*      sV     &K'=a1ukf!GA{?a!GA  
-r   c                     S nU" U 5      n SnUS:w  a?  US-  (       a#  [        U [        U[        [        5      -     5      n US-  nUS-  nUS:w  a  M?  U" U 5      n U $ )zGiven crc representing polynomial P(x), compute P(x)*x^num_bits.

Args:
  crc: crc respresenting polynomial P(x).
  num_bits: number of bits in crc.

Returns:
  P(x)*x^num_bits
c                 B    [        SR                  U SS9S S S2   S5      $ )Nz{0:032b}r   )widthr   )intformat)crcs    r   _ReverseBits32&_ExtendByZeros.<locals>._ReverseBits32   s(    z  B /"5q99r   r   r!   )r*   X_POW_2K_TABLElen)r1   num_bitsr2   is       r   r   r      sm    : 	s#!A!|nQ^1D-DEFcFANH	 	A
 	s#	*r   c                     SU0nU R                  S5        [        X5        U R                  S5        US   R                  5       $ )a  Calculates a base64 digest of the contents of a seekable stream.

This function resets the file pointer to position 0.

Args:
  fp: An already-open file object.
  hash_alg: Instance of hashing class initialized to start state.

Returns:
  Hash of the stream in hex string format.
placeholderr   )seekCalculateHashesFromContents	hexdigest)fphash_alg	hash_dicts      r   _CalculateHashFromContentsr@      s@     h')''!*b,''!*	=	!	+	+	--r   c                 ^    U R                  [        5      nU(       d  g[        R                  (       a*  [	        U[
        5      (       a  UR                  [        5      n[        R                  " U5       H  nUR                  U5        M     U(       a  UR                  [        U5      5        M  )ao  Calculates hashes of the contents of a file.

Args:
  fp: An already-open file object (stream will be consumed).
  hash_dict: Dict of (string alg_name: initialized hashing class)
      Hashing class will be populated with digests upon return.
  callback_processor: Optional callback processing class that implements
      Progress(integer amount of bytes processed).
N)readr
   sixPY3
isinstancestrencoder   
itervaluesupdateProgressr5   )r=   r?   callback_processordatar>   s        r   r;   r;      sw     	77+,D
ww	D#		{{4 NN9-ood .!!#d), 	r   c                 T    [        U [        R                  R                  S5      5      $ )zCalculates a base64 CRC32c checksum of the contents of a seekable stream.

This function sets the stream position 0 before and after calculation.

Args:
  fp: An already-open file object.

Returns:
  CRC32c checksum of the file in base64 format.
crc-32c)$_CalculateB64EncodedHashFromContentscrcmod
predefinedCrcr=   s    r   %CalculateB64EncodedCrc32cFromContentsrT      s)     
.b.4.?.?.C.CI.N
P Pr   c                 *    [        U [        5       5      $ )zCalculates a base64 MD5 digest of the contents of a seekable stream.

This function sets the stream position 0 before and after calculation.

Args:
  fp: An already-open file object.

Returns:
  MD5 digest of the file in base64 format.
)rO   GetMd5rS   s    r   "CalculateB64EncodedMd5FromContentsrW      s     
.b&(	;;r   c                 *    [        U [        5       5      $ )zCalculates a base64 MD5 digest of the contents of a seekable stream.

This function sets the stream position 0 before and after calculation.

Args:
  fp: An already-open file object.

Returns:
  MD5 digest of the file in hex format.
)r@   rV   rS   s    r   CalculateMd5FromContentsrY      s     
$B	11r   c                     [         R                  " [        R                  " U 5      5      nUR	                  S5      R                  [        5      $ )zAReturns the base64-encoded version of the input hex digest value.   
)base64	b64encodebinascii	unhexlifyrstripdecoder   )digest_valueencoded_bytess     r   Base64EncodeHashrd     s9    ""8#5#5l#CD-			e	$	+	+D	11r   c                     [         R                  " U R                  S5      R                  [        5      5      n[
        R                  " U5      $ )zReturns the hex digest value of the input base64-encoded hash.

Args:
  base64_hash: Base64-encoded hash, which may contain newlines and single or
      double quotes.

Returns:
  Hex digest of the input argument.
z
"')r\   	b64decodestriprG   r   r^   hexlify)base64_hashdecoded_bytess     r   Base64ToHexHashrk     s;     "";#4#4W#=#D#DT#JK-			-	((r   c                 *    [        [        X5      5      $ )a&  Calculates a base64 digest of the contents of a seekable stream.

This function sets the stream position 0 before and after calculation.

Args:
  fp: An already-open file object.
  hash_alg: Instance of hashing class initialized to start state.

Returns:
  Hash of the stream in base64 format.
)rd   r@   )r=   r>   s     r   rO   rO     s     
4RB	CCr   c                  Z    [         R                  " SS[        5      n U S:X  a  0 $ S[        0$ )a3  Returns a dict of hash algorithms for validating an uploaded object.

This is for use only with single object uploads, not compose operations
such as those used by parallel composite uploads (though it can be used to
validate the individual components).

Returns:
  dict of (algorithm_name: hash_algorithm)
GSUtilcheck_hashesr   md5)r   getCHECK_HASH_IF_FAST_ELSE_FAILrV   )check_hashes_configs    r   GetUploadHashAlgsrt   (  s2     

8^#?AG#I
r   c                    [         R                  " SS[        5      nU[        :X  a  0 $ 0 nU(       a  [        US'   U$ U(       a  [        5       (       a  S US'   U$ U(       dl  U[        :X  a  [        [        5      eU[        :X  a  U R                  [        5        U$ U[        :X  a  U R                  [        5        S US'   U$ [        S5      eU$ )az  Returns a dict of hash algorithms for validating an object.

Args:
  logger: logging.Logger for outputting log messages.
  consider_md5: If True, consider using a md5 hash.
  consider_crc32c: If True, consider using a crc32c hash.

Returns:
  Dict of (string, hash algorithm).

Raises:
  CommandException if hash algorithms satisfying the boto config file
  cannot be returned.
rn   ro   rp   c                  @    [         R                  R                  S5      $ NrN   rP   rQ   rR    r   r   <lambda>%GetDownloadHashAlgs.<locals>.<lambda>U  s    F$5$5$9$9)$Dr   crc32cc                  @    [         R                  R                  S5      $ rw   rx   ry   r   r   rz   r{   ]  s    f&7&7&;&;I&Fr   z8Your boto config 'check_hashes' option is misconfigured.)r   rq   rr   CHECK_HASH_NEVERrV   r	   r   _SLOW_CRC_EXCEPTION_TEXTCHECK_HASH_IF_FAST_ELSE_SKIPwarn_NO_HASH_CHECK_WARNINGCHECK_HASH_ALWAYS_SLOW_CRCMOD_DOWNLOAD_WARNING)loggerconsider_md5consider_crc32crs   	hash_algss        r   GetDownloadHashAlgsr   9  s     

8^#?A,,I)Ie& 
%  Di 
 	 <	<788">>*+ 
 "3312F	(
 
 HJ 	J 
r   c                 z     [         R                  " U 5      $ ! [         a    [         R                  " U SS9s $ f = f)a>  Returns md5 object, avoiding incorrect FIPS error on Red Hat systems.

Examples: GetMd5(b'abc')
          GetMd5(bytes('abc', encoding='utf-8'))

Args:
  byte_string (bytes): String in bytes form to hash. Don't include for empty
    hash object, since md5(b'').digest() == md5().digest().

Returns:
  md5 hash object.
F)usedforsecurity)hashlibrp   
ValueError)byte_strings    r   rV   rV   e  s8    ;;;{##	 ; ;;{E::	;s    ::c                   j    \ rS rSrSrS r\S 5       rSS jrS r	S r
\R                  4S jrS	 rS
rg)HashingFileUploadWrapperi|  a  Wraps an input stream in a hash digester and exposes a stream interface.

This class provides integrity checking during file uploads via the
following properties:

Calls to read will appropriately update digesters with all bytes read.
Calls to seek (assuming it is supported by the wrapped stream) using
    os.SEEK_SET will catch up / reset the digesters to the specified
    position. If seek is called with a different os.SEEK mode, the caller
    must return to the original position using os.SEEK_SET before further
    reads.
Calls to seek are fast if the desired position is equal to the position at
    the beginning of the last read call (we only need to re-hash bytes
    from that point on).
c                 :   U(       d  [        S5      eU(       d  [        S5      eXl        X l        X@l        XPl        SU l        0 U l        U R                   H-  nU R                  U   R                  5       U R                  U'   M/     SU l        SU l	        X0l
        g)a  Initializes the wrapper.

Args:
  stream: Input stream.
  digesters: dict of {string: hash digester} containing digesters, where
      string is the name of the hash algorithm.
  hash_algs: dict of {string: hash algorithm} for resetting and
      recalculating digesters. String is the name of the hash algorithm.
  src_url: Source FileUrl that is being copied.
  logger: For outputting log messages.
z0HashingFileUploadWrapper used with no digesters.z0HashingFileUploadWrapper used with no hash_algs.Nr   )r   _orig_fp
_digesters_src_url_logger
_seek_away_digesters_previouscopy_digesters_previous_mark_digesters_current_mark
_hash_algs)selfstream	digestersr   src_urlr   algs          r   __init__!HashingFileUploadWrapper.__init__  s     OPPOPPMOMLDO!D&*ooc&:&?&?&Ads# $%D!#$D Or   c                 0    [        U R                  SS5      $ )z<Returns the mode of the underlying file descriptor, or None.modeN)getattrr   r   s    r   r   HashingFileUploadWrapper.mode  s     4==&$//r   c                    U R                   b  [        S5      eU R                  R                  U5      n[	        U[
        R                  5      (       a  UR                  [        5      nU R                  U l
        U R                   HK  nU R                  U   R                  5       U R                  U'   U R                  U   R                  U5        MM     U =R                  [        U5      -  sl	        U$ )aI  "Reads from the wrapped file pointer and calculates hash digests.

Args:
  size: The amount of bytes to read. If ommited or negative, the entire
      contents of the file will be read, hashed, and returned.

Returns:
  Bytes from the wrapped stream.

Raises:
  CommandException if the position of the wrapped stream is unknown.
z\Read called on hashing file pointer in an unknown position; cannot correctly compute digest.)r   r   r   rB   rE   rC   	text_typerG   r   r   r   r   r   r   rI   r5   )r   sizerL   r   s       r   rB   HashingFileUploadWrapper.read  s     " ' ( ( ==d#D$&&[[d$($@$@D!&*ooc&:&?&?&Ads#
ooc!!$'  	  CI- Kr   c                 6    U R                   R                  5       $ )z$Returns the current stream position.)r   tellr   s    r   r   HashingFileUploadWrapper.tell  s    ==r   c                 6    U R                   R                  5       $ )z'Returns true if the stream is seekable.)r   seekabler   s    r   r   !HashingFileUploadWrapper.seekable  s    ==!!##r   c                    U[         R                  :w  a!  U R                  R                  5       U l        GOSU l        XR
                  :  ai  U R                   H$  nU R                  U   " 5       U R                  U'   M&     SU l        U R                  R                  S5        U R                  U5        GO$XR
                  :X  aA  U R
                  U l        U R                   H  nU R                  U   U R                  U'   M!     OXR                  :  a  U R
                  U l        U R                   H  nU R                  U   U R                  U'   M!     U R                  R                  U R
                  5        U R                  XR
                  -
  5        OBU R                  R                  U R                  5        U R                  XR                  -
  5        U R                  R                  X5      $ )zSeeks in the wrapped file pointer and catches up hash digests.

Args:
  offset: The offset to seek to.
  whence: os.SEEK_CUR, or SEEK_END, SEEK_SET.

Returns:
  Return value from the wrapped stream's seek call.
Nr   )osSEEK_SETr   r   r   r   r   r   r   r:   _CatchUpr   )r   offsetwhencer   s       r   r:   HashingFileUploadWrapper.seek  s     **,do do	//	/ ??C!%!5!7$//#
 #'($1f222'+'D'D$??C!%!9!9#!>$//#
 # 000'+'D'D$??C!%!9!9#!>$//#
 #4889f<<<= 	4778f;;;<==f--r   c                 f   U R                   R                  5       U R                  :w  a5  [        SU R                   R                  5       < SU R                  < 35      eU R                   Hh  nU[
        :  a1  U R                  R                  SUU R                  R                  5        U R                  U   R                  5       U R                  U'   Mj     U R                  U l        Un[        U[        5      nU(       a  U R                   R                  U5      n[!        U["        R$                  5      (       a  UR'                  [(        5      nX4-  nU R                   H!  nU R                  U   R+                  U5        M#     [        U[        5      nU(       a  M  U =R                  U-  sl        g)aS  Catches up hashes, but does not return data and uses little memory.

Before calling this function, digesters_current_mark should be updated
to the current location of the original stream and the self._digesters
should be current to that point (but no further).

Args:
  bytes_to_read: Number of bytes to catch up from the original stream.
z6Invalid mark when catching up hashes. Stream position z, hash position zCatching up %s for %s...N)r   r   r   r   r   r   r   debugr   
url_stringr   r   r   minr   rB   rE   rC   r   rG   r   rI   )r   bytes_to_readr   bytes_remainingbytes_this_roundrL   s         r   r   !HashingFileUploadWrapper._CatchUp  s[    }}t;;;==--/1M1MOP P 	2	25s==33	5&*ooc&:&?&?&Ads#	  %)$@$@D!#O?,@A
]] 01d	D#--	(	({{4 )o###D) !_.BC 
 	  M1 r   )	r   r   r   r   r   r   r   r   r   N)r.   )__name__
__module____qualname____firstlineno____doc__r   propertyr   rB   r   r   r   r   r:   r   __static_attributes__ry   r   r   r   r   |  sC      < 0 08 $ !# ..` 2r   r   )N)FF)r   )3r   
__future__r   r   r   r   r\   r^   r   r   rC   botor   rP   gslib.exceptionr   gslib.utils.boto_utilr	   gslib.utils.constantsr
   r   r   r   SLOW_CRCMOD_WARNINGSLOW_CRCMOD_RSYNC_WARNINGr   r   r   rr   r   r   r~   r4   r$   r"   r   r*   r   r@   r;   rT   rW   rY   rd   rk   rO   rt   r   rV   objectr   ry   r   r   <module>r      s    2 & %  '    	 
   , 6 : : 6 &  ! 
N    3 2   
 	;*.4.&-.P<22)D")X;.h2v h2r   