
    w                        S r SSKJr  SSKJr  SSKJr  SSKJr  SSKrSSKrSSKrSSK	r	SSK
r
SSKJr  SSKrSSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKrSSKJr   " S S\5      r " S S\5      r " S S\5      r      S&S jr " S S\5      r  " S S\5      r! " S S\5      r" " S S\5      r#\RH                  " SSS /5      r%\RH                  " S!S"S#/5      r& " S$ S%\
RN                  5      r(g)'aB  Name expansion iterator and result classes.

Name expansion support for the various ways gsutil lets users refer to
collections of data (via explicit wildcarding as well as directory,
bucket, and bucket subdir implicit wildcarding). This class encapsulates
the various rules for determining how these expansions are done.
    )absolute_import)print_function)division)unicode_literalsN)encoding)CommandException)NO_URLS_MATCHED_GENERIC)NO_URLS_MATCHED_TARGET)PluralityCheckableIterator)SeekAheadResult)storage_v1_messages)StorageUrlFromStringc                   $    \ rS rSrSrS rS rSrg)NameExpansionResult/   a  Holds one fully expanded result from iterating over NameExpansionIterator.

The member data in this class need to be pickleable because
NameExpansionResult instances are passed through Multiprocessing.Queue. In
particular, don't include any boto state like StorageUri, since that pulls
in a big tree of objects, some of which aren't pickleable (and even if
they were, pickling/unpickling such a large object tree would result in
significant overhead).

The state held in this object is needed for handling the various naming cases
(e.g., copying from a single source URL to a directory generates different
dest URL names than copying multiple URLs to a directory, to be consistent
with naming rules used by the Unix cp command). For more details see comments
in _NameExpansionIterator.
c                     Xl         X l        X0l        X@l        XPl        U(       a  [
        R                  " U5      U l        gSU l        g)a  Instantiates a result from name expansion.

Args:
  source_storage_url: StorageUrl that was being expanded.
  is_multi_source_request: bool indicator whether multiple input URLs or
      src_url_str expanded to more than one BucketListingRef.
  is_multi_top_level_source_request: same as is_multi_source_request but
      measured before recursion.
  names_container: Bool indicator whether src_url names a container.
  expanded_storage_url: StorageUrl that was expanded.
  expanded_result: cloud object metadata in MessageToJson form (for
      pickleability), if any was iterated; None otherwise.
      Consumers must call JsonToMessage to get an apitools Object.
N)source_storage_urlis_multi_source_request!is_multi_top_level_source_requestnames_containerexpanded_storage_urlr   MessageToJsonexpanded_result)selfr   r   r   r   r   r   s          'platform/gsutil/gslib/name_expansion.py__init__NameExpansionResult.__init__@   sH    " 1#: -N** 4+ $11D15 	    c                      SU R                   -  $ )Nz%s)r   r   s    r   __repr__NameExpansionResult.__repr__Y   s    $++++r   )r   r   r   r   r   r   N)__name__
__module____qualname____firstlineno____doc__r   r!   __static_attributes__ r   r   r   r   /   s     62,r   r   c                   :    \ rS rSrSr      SS jrS rS rSrg)	_NameExpansionIterator]   zhClass that iterates over all source URLs passed to the iterator.

See details in __iter__ function doc.
Nc                 "   Xl         X l        X0l        X@l        XPl        X`l        Xpl        U R                  R                  5       U R                  l        Xl	        Xl
        Xl        Xl        U(       d  [        S/5      OUU l        SSS.U l        g)a	  Creates a NameExpansionIterator.

Args:
  command_name: name of command being run.
  debug: Debug level to pass to underlying iterators (range 0..3).
  logger: logging.Logger object.
  gsutil_api: Cloud storage interface.  Settable for testing/mocking.
  url_strs: PluralityCheckableIterator of URL strings needing expansion.
  recursion_requested: True if -r specified on command-line.  If so,
      listings will be flattened so mapped-to results contain objects
      spanning subdirectories.
  all_versions: Bool indicating whether to iterate over all object versions.
  cmd_supports_recursion: Bool indicating whether this command supports a
      '-r' flag. Useful for printing helpful error messages.
  project_id: Project id to use for bucket retrieval.
  ignore_symlinks: If True, ignore symlinks during iteration.
  continue_on_error: If true, yield no-match exceptions encountered during
                     iteration instead of raising them.
  bucket_listing_fields: Iterable fields to include in expanded results.
      Ex. ['name', 'acl']. Underyling iterator is responsible for converting
      these to list-style format ['items/name', 'items/acl']. If this is
      None, only the object name is included in the result.

Examples of _NameExpansionIterator with recursion_requested=True:
  - Calling with one of the url_strs being 'gs://bucket' will enumerate all
    top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'.
  - 'gs://bucket/**' will enumerate all objects in the bucket.
  - 'gs://bucket/abc' will enumerate either the single object abc or, if
     abc is a subdirectory, all objects under abc and any of its
     subdirectories.
  - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its
    subdirectories.
  - 'file:///tmp' will enumerate all files under /tmp, as will
    'file:///tmp/*'
  - 'file:///tmp/**' will enumerate all files under /tmp or any of its
    subdirectories.

Example if recursion_requested=False:
  calling with gs://bucket/abc/* lists matching objects
  or subdirs, but not sub-subdirs or objects beneath subdirs.

Note: In step-by-step comments below we give examples assuming there's a
gs://bucket with object paths:
  abcd/o1.txt
  abcd/o2.txt
  xyz/o1.txt
  xyz/o2.txt
and a directory file://dir with file paths:
  dir/a.txt
  dir/b.txt
  dir/c/
namez***)TFN)command_namedebuglogger
gsutil_apiurl_strsrecursion_requestedall_versionsHasPluralityhas_pluralitycmd_supports_recursion
project_idignore_symlinkscontinue_on_errorsetbucket_listing_fields_flatness_wildcard)r   r0   r1   r2   r3   r4   r5   r6   r9   r:   r;   r<   r>   s                r   r   _NameExpansionIterator.__init__c   s    B %JK OM2$ #'--"<"<">DMM"8 O*.7L#vh-'< 	
 &*#6Dr   c              #   "  #    U R                    GH  n[        U5      nUR                  5       (       ab  UR                  5       (       d  UR	                  5       (       a8  U R                   R
                  (       a  [        S5      e[        USSSUSS9v   M  SnUR                  5       (       aO  UR                  5       (       a:  U R                  (       d)  [        U R                  U5      R                  S/S95      nO^[        U R                  U5      R                  U R                  SS95      nUR                  5       (       a  UR                  5       (       a  SnUR!                  5       nU R                   R
                  =(       d    UnU R"                  U R                     nU R                  (       a  [%        XUU R                  5      nO['        U5      n[        U5      nUR)                  5       (       a6  U R*                  (       a   [        [,        U-  5      e[        [,        U-  5      e[        [3        XR                  U R4                  U R6                  U R8                  5      5      n
U
R!                  5       nU R                   R
                  =(       d    UnU
 GH5  u  pU=(       d    UnUR;                  5       (       a&  [        UUUUUR<                  UR>                  S9v   ML  [        UR@                  5      nUR                  5       (       a  U< [B        RD                  < U< 3nOURG                  US
9n[        U R                  U5      RI                  U R                  S95      nU=(       d    UR!                  5       nU R                   R
                  =(       d    UnU H'  n[        UUUSUR<                  UR>                  S9v   M)     GM8     GM     g! [         a'  n	U	[.        R0                  " 5       S	   4v    Sn	A	GNSn	A	ff = f7f)a  Iterates over all source URLs passed to the iterator.

For each src url, expands wildcards, object-less bucket names,
subdir bucket names, and directory names, and generates a flat listing of
all the matching objects/files.

You should instantiate this object using the static factory function
NameExpansionIterator, because consumers of this iterator need the
PluralityCheckableIterator wrapper built by that function.

Yields:
  gslib.name_expansion.NameExpansionResult.

Raises:
  CommandException: if errors encountered.
zPMultiple URL strings are not supported with streaming ("-") URLs or named pipes.FN)r   r   r   r   r   r   id)bucket_fieldsT)r>   expand_top_level_buckets   wildcard_suffixr>   )%r4   r   	IsFileUrlIsStreamIsFifor8   r   r   
IsCloudUrlIsBucketr5   r   WildcardIteratorIterBucketsIterAllr>   r7   r?   _ImplicitBucketSubdirIterator_NonContainerTuplifyIteratorIsEmptyr<   r
   sysexc_info_OmitNonRecursiveIteratorr0   r9   r2   IsObjectstorage_urlroot_object
url_stringossepCreatePrefixUrlIterObjects)r   url_strrX   src_names_bucketpost_step1_itersrc_url_expands_to_multir   subdir_exp_wildcardpost_step2_iterepost_step3_iterr   r   blrsrc_names_containerexpanded_urlurl_to_iteratewc_iters                     r   __iter___NameExpansionIterator.__iter__   s    " ==(1k



!
!!![%7%7%9%9==&&  "M N N![:?DI277B268 	8 	 

 
 
"
"{';';'='=&&
 5!!'*66dV6LN 5!!'*22&*&@&@)- 3 /0 !!##(<(<(>(>!
!0!=!=!?+/==+F+F ,D+C ( !33D4L4LM		!	!7#6&&( 7G2?Co 
	 	 	"	"!!)"#9G#CDD !!7'!AB
B 3
#O5M5M$($5$5$($?$?NOo
 "1!=!=!?!%!<!< ":!9  %4
 ?.A/<<>>#!,&=/1#&??!oo/ / .cnn=,##%%),bff6IJN *99 3 : 5N /##N3??(,(B(B @ DE' '? '=&-&:&:&< #%)]]%@%@ &>%= " c%#.(?5 $%(__ #1 1 I %4s !D " ) cllnQ'((()s1   HPOGP
P%PPPPc           	          [         R                  R                  UU R                  U R                  U R
                  U R                  U R                  S9$ )a  Helper to instantiate gslib.WildcardIterator.

Args are same as gslib.WildcardIterator interface, but this method fills
in most of the values from instance state.

Args:
  url_string: URL string naming wildcard objects to iterate.

Returns:
  Wildcard iterator over URL string.
)r6   r:   r;   r2   )gslibwildcard_iteratorCreateWildcardIteratorr3   r6   r:   r;   r2   )r   rZ   s     r   rN   '_NameExpansionIterator.WildcardIteratorQ  sL     ""99&&??,,{{ :  r   )r?   r6   r>   r9   r0   r<   r1   r3   r;   r2   r:   r5   r4   FTNFFN)	r#   r$   r%   r&   r'   r   rl   rN   r(   r)   r   r   r+   r+   ]   s.     "&*$!&%)T7lV1pr   r+   c                   2    \ rS rSrSr     SS jrS rSrg)SeekAheadNameExpansionIteratorif  zCreates and wraps a _NameExpansionIterator and yields SeekAheadResults.

Unlike the NameExpansionIterator, which can make API calls upon __init__
to check for plurality, this iterator does no work until the first iteration
occurs.
Nc                     US;   =(       a    U
(       + U l         U R                   (       a  S/OSn[        UU[        R                  " S5      U[	        U5      UUUUU	SUS9U l        g)z5Initializes a _NameExpansionIterator with the inputs.)cpmvrewritesizeNdummyTr6   r9   r:   r;   r<   r>   )count_data_bytesr+   logging	getLoggerr   name_expansion_iterator)r   r0   r1   r3   r4   r5   r6   r9   r:   r;   file_size_will_changer>   s               r   r   'SeekAheadNameExpansionIterator.__init__n  su      *-DD 7!66 	
 )-(=(=VH4#9'""8,!5'3$5D r   c              #   0  #    U R                    H  nU R                  (       ab  UR                  (       aQ  [        R                  " [
        R                  UR                  5      nUR                  =(       d    Sn[        US9v   Mv  [        5       v   M     g 7f)Nr   )
data_bytes)	r   r}   r   r   JsonToMessageapitools_messagesObjectrz   r   )r   name_expansion_resultiterated_metadataiterated_sizes       r   rl   'SeekAheadNameExpansionIterator.__iter__  sp     !%!=!=			#8#H#H$22$$&;&K&KM)..3!77 ">s   BB)r}   r   )FTNFFr#   r$   r%   r&   r'   r   rl   r(   r)   r   r   ru   ru   f  s#     "&*$%*#5J r   ru   c                     [        U5      n[        U UUUUUUUUU	U
US9n[        U5      nUR                  5       (       a  [        [        5      eU$ )a8  Static factory function for instantiating _NameExpansionIterator.

This wraps the resulting iterator in a PluralityCheckableIterator and checks
that it is non-empty. Also, allows url_strs to be either an array or an
iterator.

Args:
  command_name: name of command being run.
  debug: Debug level to pass to underlying iterators (range 0..3).
  logger: logging.Logger object.
  gsutil_api: Cloud storage interface.  Settable for testing/mocking.
  url_strs: Iterable URL strings needing expansion.
  recursion_requested: True if -r specified on command-line.  If so,
      listings will be flattened so mapped-to results contain objects
      spanning subdirectories.
  all_versions: Bool indicating whether to iterate over all object versions.
  cmd_supports_recursion: Bool indicating whether this command supports a '-r'
      flag. Useful for printing helpful error messages.
  project_id: Project id to use for the current command.
  ignore_symlinks: If True, ignore symlinks during iteration.
  continue_on_error: If true, yield no-match exceptions encountered during
                     iteration instead of raising them.
  bucket_listing_fields: Iterable fields to include in expanded results.
      Ex. ['name', 'acl']. Underyling iterator is responsible for converting
      these to list-style format ['items/name', 'items/acl']. If this is
      None, only the object name is included in the result.

Raises:
  CommandException if underlying iterator is empty.

Returns:
  Name expansion iterator instance.

For example semantics, see comments in NameExpansionIterator.__init__.
r|   )r   r+   rS   r   r	   )r0   r1   r2   r3   r4   r5   r6   r9   r:   r;   r<   r>   r   s                r   NameExpansionIteratorr     sm    ^ (1(23%)13 77NO$$&&
2
33	  r   c                   $    \ rS rSrSrS rS rSrg)rR   i  zIterator that produces the tuple (False, blr) for each iterated value.

Used for cases where blr_iter iterates over a set of
BucketListingRefs known not to name containers.
c                     Xl         g)zHInstantiates iterator.

Args:
  blr_iter: iterator of BucketListingRef.
Nblr_iter)r   r   s     r   r   %_NonContainerTuplifyIterator.__init__  s	     Mr   c              #   >   #    U R                    H	  nSU4v   M     g 7f)NFr   )r   rg   s     r   rl   %_NonContainerTuplifyIterator.__iter__  s     }}CL s   r   Nr   r)   r   r   rR   rR     s    r   rR   c                   $    \ rS rSrSrS rS rSrg)rV   i  a  Iterator wrapper for that omits certain values for non-recursive requests.

This iterates over tuples of (names_container, BucketListingReference) and
omits directories, prefixes, and buckets from non-recurisve requests
so that we can properly calculate whether the source URL expands to multiple
URLs.

For example, if we have a bucket containing two objects: bucket/foo and
bucket/foo/bar and we do a non-recursive iteration, only bucket/foo will be
yielded.
c                 @    Xl         X l        X0l        X@l        XPl        g)ax  Instanties the iterator.

Args:
  tuple_iter: Iterator over names_container, BucketListingReference
              from step 2 in the NameExpansionIterator
  recursion_requested: If false, omit buckets, dirs, and subdirs
  command_name: Command name for user messages
  cmd_supports_recursion: Command recursion support for user messages
  logger: Log object for user messages
N)
tuple_iterr5   r0   r9   r2   )r   r   r5   r0   r9   r2   s         r   r   "_OmitNonRecursiveIterator.__init__  s      !O2$"8Kr   c              #     #    U R                    H  u  pU R                  (       d  UR                  5       (       d  [        UR                  5      nUR                  5       (       a  SnOUR                  nU R                  (       a3  U R                  R                  SXBR                  U R                  5        M  U R                  R                  SXBR                  5        M  X4v   M     g 7f)N	directoryz-Omitting %s "%s". (Did you mean to do %s -r?)zOmitting %s "%s".)r   r5   rW   r   rZ   rI   	type_namer9   r2   infor0   )r   r   rg   ri   descs        r   rl   "_OmitNonRecursiveIterator.__iter__  s     "&//%%cllnn ,CNN;!!##$$&&
++

J1B1BD ++

.nn
E$$ #2s   C'C))r9   r0   r2   r5   r   Nr   r)   r   r   rV   rV     s    
$%r   rV   c                   $    \ rS rSrSrS rS rSrg)rQ   i'  a  Iterator wrapper that performs implicit bucket subdir expansion.

Each iteration yields tuple (names_container, expanded BucketListingRefs)
  where names_container is true if URL names a directory, bucket,
  or bucket subdir.

For example, iterating over [BucketListingRef("gs://abc")] would expand to:
  [BucketListingRef("gs://abc/o1"), BucketListingRef("gs://abc/o2")]
if those subdir objects exist, and [BucketListingRef("gs://abc") otherwise.
c                 4    X l         Xl        X0l        X@l        g)a  Instantiates the iterator.

Args:
  name_exp_instance: calling instance of NameExpansion class.
  blr_iter: iterator over BucketListingRef prefixes and objects.
  subdir_exp_wildcard: wildcard for expanding subdirectories;
      expected values are ** if the mapped-to results should contain
      objects spanning subdirectories, or * if only one level should
      be listed.
  bucket_listing_fields: Fields requested in enumerated results.
N)r   name_exp_instancerc   r>   )r   r   r   rc   r>   s        r   r   &_ImplicitBucketSubdirIterator.__init__3  s     M.2!6r   c              #     #    U R                    H  nUR                  5       (       a  [        UR                  5      R	                  U R
                  S9n[        U R                  R                  U5      R                  U R                  S95      nUR                  5       (       d  U H	  nSU4v   M     M  SU4v   M  UR                  5       (       a  SU4v   M  [        SU-  5      e   g 7f)NrF   rH   TFz7_ImplicitBucketSubdirIterator got a bucket reference %s)r   IsPrefixr   rZ   r]   rc   r   r   rN   rP   r>   rS   rW   r   )r   rg   
prefix_urlimplicit_subdir_iteratorexp_blrs        r   rl   &_ImplicitBucketSubdirIterator.__iter__E  s     }}	)#..9II 44 J 6
#=""33J?GG&*&@&@ H B$C  (//111g/! 2
 
<<>>clEKM 	M% s   C(C*)r   r>   r   rc   Nr   r)   r   r   rQ   rQ   '  s    	7$Mr   rQ   c                       \ rS rSrSrS rSrg)CopyObjectInfoi\  zARepresents the information needed for copying a single object.
  c                     UR                   U l         UR                  U l        UR                  U l        UR                  U l        UR                  U l        UR
                  U l        X l        X0l        g)a-  Instantiates the object info from name expansion result and destination.

Args:
  name_expansion_result: StorageUrl that was being expanded.
  exp_dst_url: StorageUrl of the destination.
  have_existing_dst_container: Whether exp_url names an existing directory,
      bucket, or bucket subdirectory.
N)r   r   r   r   r   r   exp_dst_urlhave_existing_dst_container)r   r   r   r   s       r   r   CopyObjectInfo.__init__`  sg     4FFD#8#P#PD ?? 	*0@@D 5 J JD0@@D"'B$r   )r   r   r   r   r   r   r   r   N)r#   r$   r%   r&   r'   r   r(   r)   r   r   r   r   \  s    Cr   r   DestinationInfor   r   %NameExpansionIteratorDestinationTuplename_expansion_iterdestinationc                   *    \ rS rSrSrS rS rS rSrg)CopyObjectsIteratori  a  Iterator wrapper for copying objects and keeping track of source URL types.

This is used in the cp command for copying from multiple source to multiple
destinations. It takes a list of NameExpansionIteratorDestinationTuple. It
wraps them and return CopyObjectInfo objects that wraps NameExpansionResult
with the destination. It's used also for collecting analytics
PerformanceSummary info, because there may be multiple source URLs and we want
to know if any of them are file URLs, if any of them are cloud URLs, if any of
them require daisy chain operations, and if any use different providers. The
source URL type information will be aggregated at the end of _SequentialApply
or _ParallelApply.
c                     X l         SU l        SU l        / U l        Xl        [        U R                  5      nUR                  U l        UR                  U l	        g)a  Instantiates the iterator.

Args:
  name_expansion_dest_iter: NameExpansionIteratorDestinationTuple iterator.
  is_daisy_chain: The -D option in cp might have already been specified, in
      which case we do not need to check again for daisy chain operations.
FN)
is_daisy_chainhas_file_srchas_cloud_srcprovider_typesname_expansion_dest_iternextr   current_expansion_iterr   current_destination)r   r   r   name_expansion_dest_tuples       r   r   CopyObjectsIterator.__init__  sU     )DDD$<! $T%B%B C";"O"OD8DDDr   c                     U $ )Nr)   r    s    r   rl   CopyObjectsIterator.__iter__  s    Kr   c                     [        U R                  5      n[        UU R                  R                  U R                  R                  5      nU R                  (       d&  UR                  R                  5       (       a  SU l        U R                  (       d&  UR                  R                  5       (       a  SU l        U R                  R                  R                  5       (       a!  U R                  R                  R                   nOSnU R"                  (       dC  Ub@  UR                  R                  5       (       a!  UR                  R                   U:w  a  SU l        UR                  R                   U R$                  ;  a/  U R$                  R'                  UR                  R                   5        U$ ! [         aJ    [        U R                  5      nUR                  U l        UR
                  U l        U R                  5       s $ f = f)z@Keeps track of URL types as the command iterates over arguments.TN)r   r   StopIterationr   r   r   r   __next__r   r   r   r   r   rI   r   rL   schemer   r   append)r   r   r   eltdst_url_schemes        r   r   CopyObjectsIterator.__next__  s   "4#>#>? .11==11MMOC
 !7!7!A!A!C!Cd#"8"8"C"C"E"Ed ++6688//;;BBnnN$>))++%%7 d
$$D,?,??
  !7!7!>!>?JA  "&t'D'D"E
#
7
7 !!:!F!Fd]]_s   F/ /AHH)r   r   r   r   r   r   r   N)	r#   r$   r%   r&   r'   r   rl   r   r(   r)   r   r   r   r     s    E$$r   r   rs   ))r'   
__future__r   r   r   r   collectionsr~   r[   rT   sixapitools.base.pyr   ro   gslib.exceptionr   r	   r
   "gslib.plurality_checkable_iteratorr   gslib.seek_ahead_threadr   "gslib.third_party.storage_apitoolsr   r   gslib.wildcard_iteratorr   objectr   r+   ru   r   rR   rV   rQ   r   
namedtupler   r   Iteratorr   r)   r   r   <module>r      s   ' %  '   	 
 
 %  , 3 2 I 3 W  8+,& +,\FV FR5 V 5 | (-15%)*/,104@!F6 (/% /%d2MF 2MjCV C6 (( 	 	& )4(>(>+.) %G#,, Gr   