
    3                        S r SSKJr  SSKJr  SSKJr  SSKJr  SSKrSSKrSSKrSSK	r	SSK
r
SSKrSSKrSSKrSSKrSSKrSSKrSSKJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr  \R:                  (       a  \rSSSSSSS.r \" S 5      r!S r"S r#S r$S r%S r&S r'S r(S r)S r*S r+S r,S  r-S! r.S" r/S# r0S$ r1S&S% jr2g)'z<Shared utility structures and methods for manipulating text.    )absolute_import)print_function)division)unicode_literalsN)urllib)rangeCommandException)LazyWrapper)UTF8)WINDOWS_1252)	IS_CP1252COLDLINEDURABLE_REDUCED_AVAILABILITYNEARLINESTANDARDARCHIVE)CLDRANLSSTDAc                  .    [         R                  " S5      $ )Nz,^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?)recompile     (platform/gsutil/gslib/utils/text_util.py<lambda>r    8   s    BJJFGr   c           	      V   [         R                  R                  U 5      u  p4pVn[         R                  R                  USS9nUR	                  X45        SR                  U V	V
s/ s H  u  pU	< SU
< 3PM     sn
n	5      n[         R                  R                  X4X[U45      nU$ s  sn
n	f )ai  Adds a query parameter to a URL string.

Appends a query parameter to the query string portion of a url. If a parameter
with the given name was already present, it is not removed; the new name/value
pair will be appended to the end of the query string. It is assumed that all
arguments will be of type `str` (either ASCII or UTF-8 encoded) or `unicode`.

Note that this method performs no URL-encoding. It is the caller's
responsibility to ensure proper URL encoding of the entire URL; i.e. if the
URL is already URL-encoded, you should pass in URL-encoded values for
param_name and param_value. If the URL is not URL-encoded, you should not pass
in URL-encoded parameters; instead, you could perform URL-encoding using the
URL string returned from this function.

Args:
  url_str: (str or unicode) String representing the URL.
  param_name: (str or unicode) String key of the query parameter.
  param_value: (str or unicode) String value of the query parameter.

Returns:
  (str or unicode) A string representing the modified url, of type `unicode`
  if the url_str argument was a `unicode`, otherwise a `str` encoded in UTF-8.
T)keep_blank_values&=)r   parseurlsplit	parse_qslappendjoin
urlunsplit)url_str
param_nameparam_valueschemenetlocpath	query_strfragmentquery_paramskvnew_query_strnew_urls                r   AddQueryParamToUrlr8   ;   s    0 /5ll.C.CG.L+&$8''	T'J,z/0((<H<!q!,<HI-LL##tH57'	.	 Is   $B%
c                    [        5       R                  [        U 5      5      n[        5       R                  [        U5      5      nU(       a  U(       d  g[        UR	                  S5      5      nUR	                  S5      (       a  [        UR	                  S5      5      OSnUR	                  S5      n[        UR	                  S5      5      nUR	                  S5      (       a  [        UR	                  S5      5      OSnUR	                  S5      n	XG:  a  gXG:X  a&  XX:  a  gXX:X  a  [        U	5      =(       a    U(       + S4$ g)	aR  Compares the first and second gsutil version strings.

For example, 3.33 > 3.7, and 4.1 is a greater major version than 3.33.
Does not handle multiple periods (e.g. 3.3.4) or complicated suffixes
(e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as
less than its non-suffix counterpart (e.g. 3.32 > 3.32pre).

Args:
  first: First gsutil version string.
  second: Second gsutil version string.

Returns:
  (g, m):
     g is True if first known to be greater than second, else False.
     m is True if first known to be greater by at least 1 major version,
       else False.
)FFmajminr   suffix)TT)TFF)VERSION_MATCHERmatchstrintgroupbool)
firstsecondm1m2
major_ver1
minor_ver1suffix_ver1
major_ver2
minor_ver2suffix_ver2s
             r   CompareVersionsrM   ^   s    $ s5z*"s6{+" 
2288E?#*')xxs288E?#A*"+288E?#*')xxs288E?#A*"+		!;3OU;;	r   c              #   ,   #    U  H
  nSU-  v   M     g7f)z:A generator that adds '**' to each url string in url_strs.z%s**Nr   )url_strsr+   s     r   ConvertRecursiveToFlatWildcardrP      s     g
7
 s   c                     [         R                  " [        U 5      SS R                  S5      5      n[        R
                  " U5      $ )a*  Decodes an encoded python long into an ASCII string.

This is used for modeling S3 version_id's as apitools generation.

Args:
  long_to_convert: long to convert to ASCII string. If this is already a
                   string, it is simply returned.

Returns:
  String decoded from the input long.
   NL)binascii	unhexlifyhexrstripsix
ensure_str)long_to_convertunhexeds     r   DecodeLongAsStringr\      s:     s?3AB7>>sCD'		  r   c                 p    [         R                  " [        R                  " U 5      S5      n[	        US5      $ )a  Encodes an ASCII string as a python long.

This is used for modeling S3 version_id's as apitools generation.  Because
python longs can be arbitrarily large, this works.

Args:
  string_to_convert: ASCII string to convert to a long.

Returns:
  Long that represents the input string.
	hex_codec   )codecsencoderX   ensure_binarylong)string_to_converthex_bytestrs     r   EncodeStringAsLongrf      s/     c//0ABKP+ 
k2	r   c                     [         (       a  [        R                  " U [        5      $ [        R                  " U [        5      $ )ap  Attempts to detect Windows CP1252 encoding and convert to UTF8.

Windows doesn't provide a way to set UTF-8 for string encodings; you can set
the system locale (see
http://windows.microsoft.com/en-us/windows/change-system-locale#1TC=windows-7)
but that takes you to a "Change system locale" dropdown that just lists
languages (e.g., "English (United States)". Instead, we're forced to check if
a encoding as UTF8 raises an exception and if so, try converting from CP1252
to Unicode.

Args:
  input_str: (str or bytes) The input string.
Returns:
  (unicode) The converted string or the original, if conversion wasn't needed.
)r   rX   ensure_textr   r   	input_strs    r   FixWindowsEncodingIfNeededrk      s+      Y??9l33??9d++r   c                     [         R                  " U 5      R                  [        5      =(       d     [         R                  " U R                  5      $ )z8Returns a short Unicode string describing the exception.)rX   	text_typera   r   	__class__)excs    r   GetPrintableExceptionStringrp      s.    	s		"	"4	(	HCMM#--,HHr   c                 H    [        S U  5       5      (       d  [        U5      eg)aQ  Ensures that the string passed in consists of only ASCII values.

Args:
  string: Union[str, unicode, bytes] Text that will be checked for
      ASCII values.
  message: Union[str, unicode, bytes] Error message, passed into the
      exception, in the event that the check on `string` fails.

Returns:
  None

Raises:
  CommandException
c              3   >   #    U  H  n[        U5      S :  v   M     g7f)   N)ord).0cs     r   	<genexpr>InsistAscii.<locals>.<genexpr>   s     *6aSVc\6s   N)allr
   )stringmessages     r   InsistAsciir|      s%     
*6*	*	*
7
## 
+r   c                 "    [        U SU -  5        g)zChecks for ASCII-only characters in `header`.

Also constructs an error message using `header` if the check fails.

Args:
  header: Union[str, binary, unicode] Text being checked for ASCII values.

Returns:
  None
zInvalid non-ASCII header (%s).N)r|   )headers    r   InsistAsciiHeaderr      s     f6?@r   c                 @    [        US[        U5      < SU < S35        g)a+  Checks for ASCII-only characters in `value`.

Also constructs an error message using `header` and `value` if the check
fails.

Args:
  header: Header name, only used in error message in case of an exception.
  value: Union[str, binary, unicode] Text being checked for ASCII values.

Returns:
  None
zInvalid non-ASCII value (z) was provided for header z^.
Only ASCII characters are allowed in headers other than x-goog-meta- and x-amz-meta- headersN)r|   repr)r~   values     r   InsistAsciiHeaderValuer      s       $E{F45r   c                 4    U S:w  a  U S:w  a  [        U5      egg)a=  Ensures that the value passed in consists of only "on" or "off"

Args:
  value: (unicode) Unicode string that will be checked for correct text.
  message: Union[str, unicode, bytes] Error message passed into the exception
      in the event that the check on value fails.

Returns:
  None

Raises:
  CommandException
onoffNr	   )r   r{   s     r   InsistOnOrOffr      s#     d]u~
7
## &]r   c                 L    U R                  5       n U [        ;   a	  [        U    n U $ )a  Returns a normalized form of the given storage class name.

Converts the given string to uppercase and expands valid abbreviations to
full storage class names (e.g 'std' would return 'STANDARD'). Note that this
method does not check if the given storage class is valid.

Args:
  sc: (str) String representing the storage class's full name or abbreviation.

Returns:
  (str) A string representing the full name of the given storage class.
)upper$STORAGE_CLASS_SHORTHAND_TO_FULL_NAME)scs    r   NormalizeStorageClassr     s'     
xxz"//	-b	1B	)r   c                     U $ )a  Return an UTF8-encoded string type, or None if `input_val` is None.

Args:
  input_val: (unicode, str, or None) A string-like object or None. This method
      simply calls encode() on `input_val` if it is not None; if `input_val`
      is not of type "unicode", this will implicitly call decode() with the
      default encoding for strings (for Python 2, this is ASCII), then call
      encode().

Returns:
  (str) A UTF-8 encoded string, or None.
r   )	input_vals    r   PrintableStrr   '  s
     
r   c                      S nS nU" S0 UD6u  pEn[         R                  " U5      n[         R                  " U5      nU" U 6 nUR                  U5      nXu-  n[        Xg5        g)zA Python 2/3 compatible analogue to the print function.

This function writes text to a file descriptor as the
builtin print function would, favoring unicode encoding.

Aguments and return values are the same as documented in
the Python 2 print function.
c            
      ,   [         R                  " SSS[        R                  4/5      nU R	                  5        HH  u  p#X!;  a:  Sn[        UR                  USR                  UR                  5       5      5      5      eX1U'   MJ     UR                  5       $ )a   Validates keyword arguments that would be used in Print

Valid keyword arguments, mirroring print(), are 'sep',
'end', and 'file'. These must be of types string, string,
and file / file interface respectively.

Returns the above kwargs of the above types.
)sep )end
filez9{} is not a valid keyword argument. Please use one of: {}r   )
collectionsOrderedDictsysstdoutitemsKeyErrorformatr)   keysvalues)kwargsexpected_keywordskeyr   	error_msgs        r   	_get_argsprint_to_fd.<locals>._get_argsA  s     $//}28#**1E1G H lln
		%-	y''(+1B1G1G1I(JL M 	M "'# % ##%%r   c                  >   / nU  H  n[        U[        R                  [        R                  45      (       d  [	        U5      n[        U[        R                  5      (       a  UR                  U5        Mo  UR                  [        R                  " U5      5        M     U$ )zCGets a `bytes` string for each item in a list of printable objects.)
isinstancerX   binary_typerm   r?   r(   rb   )objectsbyte_objectsitems      r   _get_byte_strings&print_to_fd.<locals>._get_byte_stringsX  sx    Ls>?? 4y	D#//	*	*D! 	C--d34  r   Nr   )rX   rb   r)   write_to_fd)r   r   r   r   r   r   r   datas           r   print_to_fdr   7  se    &." &v&.#D####	G	$$	$$+$dr   c                 T   [         R                  (       a  U R                  U5        g[        U[        5      (       a  [        U S5      (       a  SU R                  ;   d  [        U [        R                  5      (       a  U R                  U5        g[        U S5      (       a  U R                  R                  U5        gU R                  [         R                  " U5      5        gSU R                  ;   a&  U R                  [         R                  " U5      5        gU R                  U5        g)zGWrite given data to given file descriptor, doing any conversions neededNmodebbuffer)rX   PY2writer   byteshasattrr   ioBytesIOr   rh   rb   )fdr   s     r   r   r   r  s    WWHHTN
eFrww:b"**3M3Mhhtn	X		iioodhhst$%
bgg~HHSt$%HHTNr   c                 2    [         R                  " SSU 5      $ )z4Returns the input string with all \n and \r removed.z[\r\n] )r   subri   s    r   RemoveCRLFFromStringr     s    		2y	))r   c                     [         R                  " U5        [        [        U 5       Vs/ s H'  n[         R                  " [
        R                  5      PM)     sn5      n[        R                  " U5      n[         R                  " 5         U$ s  snf )aV  Generates binary string representation of a list of ASCII characters.

Args:
  size: Integer quantity of characters to generate.
  seed: A seed may be specified for deterministic behavior.
        Int 0 is used as the default value.

Returns:
  Binary encoded string representation of a list of characters of length
  equal to size argument.
)	randomseedr?   r   choicerz   ascii_lettersrX   rb   )sizer   _contentss       r   get_random_ascii_charsr     s`     	++duT{K{!&-- 4 45{KL(x((++-	/ Ls   .B)r   )3__doc__
__future__r   r   r   r   rT   r`   osr   r   r   localer   r   rX   rz   	six.movesr   r   gslib.exceptionr
   gslib.lazy_wrapperr   gslib.utils.constantsr   r   gslib.utils.system_utilr   PY3r@   rc   r   r=   r8   rM   rP   r\   rf   rk   rp   r|   r   r   r   r   r   r   r   r   r   r   r   r   <module>r      s    C & %  '   	 
 	 	    
    , * & . -77	$ )
		( $ GI F'T! $,,I
$&A5($$* 8v&*
r   