19
19
from readthedocs .builds import tasks as build_tasks
20
20
from readthedocs .builds .constants import (
21
21
ARTIFACT_TYPES ,
22
+ ARTIFACT_TYPES_WITHOUT_MULTIPLE_FILES_SUPPORT ,
22
23
BUILD_FINAL_STATES ,
23
24
BUILD_STATE_BUILDING ,
24
25
BUILD_STATE_CLONING ,
@@ -494,7 +495,6 @@ def on_failure(self, exc, task_id, args, kwargs, einfo):
494
495
if self .data .version :
495
496
version_type = self .data .version .type
496
497
497
- # NOTE: autoflake gets confused here. We need the NOQA for now.
498
498
status = BUILD_STATUS_FAILURE
499
499
if isinstance (exc , BuildUserSkip ):
500
500
# The build was skipped by returning the magic exit code,
@@ -513,55 +513,91 @@ def on_failure(self, exc, task_id, args, kwargs, einfo):
513
513
# Update build object
514
514
self .data .build ['success' ] = False
515
515
516
- def on_success (self , retval , task_id , args , kwargs ):
517
- time_before_store_build_artifacts = timezone .now ()
518
- # Store build artifacts to storage (local or cloud storage)
519
- #
520
- # TODO: move ``store_build_artifacts`` inside ``execute`` so we can
521
- # handle exceptions properly on ``on_failure``
522
- self .store_build_artifacts ()
523
- log .info (
524
- "Store build artifacts finished." ,
525
- time = (timezone .now () - time_before_store_build_artifacts ).seconds ,
526
- )
516
+ def get_valid_artifact_types (self ):
517
+ """
518
+ Return a list of all the valid artifact types for this build.
527
519
528
- # HTML are built successfully.
529
- html = os .path .exists (
530
- self .data .project .artifact_path (
531
- version = self .data .version .slug , type_ = "html"
532
- )
533
- )
534
- localmedia = os .path .exists (
535
- self .data .project .artifact_path (
536
- version = self .data .version .slug , type_ = "htmlzip"
537
- )
538
- )
539
- pdf = os .path .exists (
540
- self .data .project .artifact_path (version = self .data .version .slug , type_ = "pdf" )
541
- )
542
- epub = os .path .exists (
543
- self .data .project .artifact_path (
544
- version = self .data .version .slug , type_ = "epub"
520
+ It performs the following checks on each output format type path:
521
+ - it exists
522
+ - it is a directory
523
+ - does not contains more than 1 files (only PDF, HTMLZip, ePUB)
524
+
525
+ TODO: remove the limitation of only 1 file.
526
+ Add support for multiple PDF files in the output directory and
527
+ grab them by using glob syntaxt between other files that could be garbage.
528
+ """
529
+ valid_artifacts = []
530
+ for artifact_type in ARTIFACT_TYPES :
531
+ artifact_directory = self .data .project .artifact_path (
532
+ version = self .data .version .slug ,
533
+ type_ = artifact_type ,
545
534
)
546
- )
535
+ if not os .path .exists (artifact_directory ):
536
+ # There is no output directory.
537
+ # Skip this format.
538
+ continue
539
+
540
+ if not os .path .isdir (artifact_directory ):
541
+ log .error (
542
+ "The output path is not a directory." ,
543
+ output_format = artifact_type ,
544
+ )
545
+ raise BuildUserError (
546
+ BuildUserError .BUILD_OUTPUT_IS_NOT_A_DIRECTORY .format (
547
+ artifact_type = artifact_type
548
+ )
549
+ )
550
+
551
+ # Check if there are multiple files on artifact directories.
552
+ # These output format does not support multiple files yet.
553
+ # In case multiple files are found, the upload for this format is not performed.
554
+ if artifact_type in ARTIFACT_TYPES_WITHOUT_MULTIPLE_FILES_SUPPORT :
555
+ artifact_format_files = len (os .listdir (artifact_directory ))
556
+ if artifact_format_files > 1 :
557
+ log .error (
558
+ "Multiple files are not supported for this format. "
559
+ "Skipping this output format." ,
560
+ output_format = artifact_type ,
561
+ )
562
+ raise BuildUserError (
563
+ BuildUserError .BUILD_OUTPUT_HAS_MULTIPLE_FILES .format (
564
+ artifact_type = artifact_type
565
+ )
566
+ )
567
+ elif artifact_format_files == 0 :
568
+ raise BuildUserError (
569
+ BuildUserError .BUILD_OUTPUT_HAS_0_FILES .format (
570
+ artifact_type = artifact_type
571
+ )
572
+ )
573
+
574
+ # If all the conditions were met, the artifact is valid
575
+ valid_artifacts .append (artifact_type )
576
+
577
+ return valid_artifacts
578
+
579
+ def on_success (self , retval , task_id , args , kwargs ):
580
+ valid_artifacts = self .get_valid_artifact_types ()
547
581
548
582
# NOTE: we are updating the db version instance *only* when
549
- if html :
583
+ # TODO: remove this condition and *always* update the DB Version instance
584
+ if "html" in valid_artifacts :
550
585
try :
551
586
api_v2 .version (self .data .version .pk ).patch (
552
587
{
553
588
"built" : True ,
554
589
"documentation_type" : self .data .version .documentation_type ,
555
- "has_pdf" : pdf ,
556
- "has_epub" : epub ,
557
- "has_htmlzip" : localmedia ,
590
+ "has_pdf" : " pdf" in valid_artifacts ,
591
+ "has_epub" : " epub" in valid_artifacts ,
592
+ "has_htmlzip" : "htmlzip" in valid_artifacts ,
558
593
}
559
594
)
560
595
except HttpClientError :
561
596
# NOTE: I think we should fail the build if we cannot update
562
597
# the version at this point. Otherwise, we will have inconsistent data
563
598
log .exception (
564
- 'Updating version failed, skipping file sync.' ,
599
+ "Updating version db object failed. "
600
+ 'Files are synced in the storage, but "Version" object is not updated' ,
565
601
)
566
602
567
603
# Index search data
@@ -658,10 +694,13 @@ def update_build(self, state=None):
658
694
try :
659
695
api_v2 .build (self .data .build ['id' ]).patch (self .data .build )
660
696
except Exception :
661
- # NOTE: I think we should fail the build if we cannot update it
662
- # at this point otherwise, the data will be inconsistent and we
663
- # may be serving "new docs" but saying the "build have failed"
664
- log .exception ('Unable to update build' )
697
+ # NOTE: we are updating the "Build" object on each `state`.
698
+ # Only if the last update fails, there may be some inconsistency
699
+ # between the "Build" object in our db and the reality.
700
+ #
701
+ # The `state` argument will help us to track this more and understand
702
+ # at what state our updates are failing and decide what to do.
703
+ log .exception ("Error while updating the build object." , state = state )
665
704
666
705
def execute (self ):
667
706
self .data .build_director = BuildDirector (
@@ -709,6 +748,12 @@ def execute(self):
709
748
finally :
710
749
self .data .build_data = self .collect_build_data ()
711
750
751
+ # At this point, the user's build already succeeded.
752
+ # However, we cannot use `.on_success()` because we still have to upload the artifacts;
753
+ # which could fail, and we want to detect that and handle it properly at `.on_failure()`
754
+ # Store build artifacts to storage (local or cloud storage)
755
+ self .store_build_artifacts ()
756
+
712
757
def collect_build_data (self ):
713
758
"""
714
759
Collect data from the current build.
@@ -786,53 +831,30 @@ def store_build_artifacts(self):
786
831
787
832
Remove build artifacts of types not included in this build (PDF, ePub, zip only).
788
833
"""
834
+ time_before_store_build_artifacts = timezone .now ()
789
835
log .info ('Writing build artifacts to media storage' )
790
- # NOTE: I don't remember why we removed this state from the Build
791
- # object. I'm re-adding it because I think it's useful, but we can
792
- # remove it if we want
793
836
self .update_build (state = BUILD_STATE_UPLOADING )
794
837
838
+ valid_artifacts = self .get_valid_artifact_types ()
839
+ log .bind (artifacts = valid_artifacts )
840
+
795
841
types_to_copy = []
796
842
types_to_delete = []
797
843
798
844
for artifact_type in ARTIFACT_TYPES :
799
- if os .path .exists (
800
- self .data .project .artifact_path (
801
- version = self .data .version .slug ,
802
- type_ = artifact_type ,
803
- )
804
- ):
845
+ if artifact_type in valid_artifacts :
805
846
types_to_copy .append (artifact_type )
806
847
# Never delete HTML nor JSON (search index)
807
848
elif artifact_type not in UNDELETABLE_ARTIFACT_TYPES :
808
849
types_to_delete .append (artifact_type )
809
850
851
+ # Upload formats
810
852
for media_type in types_to_copy :
811
- # NOTE: here is where we get the correct FROM path to upload
812
- from_path = self .data .version .project .artifact_path (
853
+ from_path = self .data .project .artifact_path (
813
854
version = self .data .version .slug ,
814
855
type_ = media_type ,
815
856
)
816
-
817
- # Check if there are multiple files on source directories.
818
- # These output format does not support multiple files yet.
819
- # In case multiple files are found, the upload for this format is not performed.
820
- #
821
- # TODO: we should fail the build for these cases and clearly communicate this.
822
- # to the user. To do this, we need to call this method (``store_build_artifacts``)
823
- # since the ``execute`` method.
824
- # It will allow us to just `raise BuildUserError` and handle it at
825
- # Celery `on_failure` handler.
826
- if media_type in ("htmlzip" , "epub" , "pdf" ):
827
- if len (os .listdir (from_path )) > 1 :
828
- log .exception (
829
- "Multiple files are not supported for this format. "
830
- "Skipping this output format." ,
831
- output_format = media_type ,
832
- )
833
- continue
834
-
835
- to_path = self .data .version .project .get_storage_path (
857
+ to_path = self .data .project .get_storage_path (
836
858
type_ = media_type ,
837
859
version_slug = self .data .version .slug ,
838
860
include_file = False ,
@@ -844,15 +866,22 @@ def store_build_artifacts(self):
844
866
else :
845
867
build_media_storage .sync_directory (from_path , to_path )
846
868
except Exception :
847
- # Ideally this should just be an IOError
848
- # but some storage backends unfortunately throw other errors
869
+ # NOTE: the exceptions reported so far are:
870
+ # - botocore.exceptions:HTTPClientError
871
+ # - botocore.exceptions:ClientError
872
+ # - readthedocs.doc_builder.exceptions:BuildCancelled
849
873
log .exception (
850
- ' Error copying to storage (not failing build)' ,
874
+ " Error copying to storage" ,
851
875
media_type = media_type ,
852
876
from_path = from_path ,
853
877
to_path = to_path ,
854
878
)
879
+ # Re-raise the exception to fail the build and handle it
880
+ # automatically at `on_failure`.
881
+ # It will clearly communicate the error to the user.
882
+ raise BuildAppError ("Error uploading files to the storage." )
855
883
884
+ # Delete formats
856
885
for media_type in types_to_delete :
857
886
media_path = self .data .version .project .get_storage_path (
858
887
type_ = media_type ,
@@ -863,13 +892,21 @@ def store_build_artifacts(self):
863
892
try :
864
893
build_media_storage .delete_directory (media_path )
865
894
except Exception :
866
- # Ideally this should just be an IOError
867
- # but some storage backends unfortunately throw other errors
895
+ # NOTE: I didn't find any log line for this case yet
868
896
log .exception (
869
- ' Error deleting from storage (not failing build)' ,
897
+ " Error deleting files from storage" ,
870
898
media_type = media_type ,
871
899
media_path = media_path ,
872
900
)
901
+ # Re-raise the exception to fail the build and handle it
902
+ # automatically at `on_failure`.
903
+ # It will clearly communicate the error to the user.
904
+ raise BuildAppError ("Error deleting files from storage." )
905
+
906
+ log .info (
907
+ "Store build artifacts finished." ,
908
+ time = (timezone .now () - time_before_store_build_artifacts ).seconds ,
909
+ )
873
910
874
911
def send_notifications (self , version_pk , build_pk , event ):
875
912
"""Send notifications to all subscribers of `event`."""
0 commit comments