diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/util/LakeFSStorageClient.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/util/LakeFSStorageClient.scala index d01e820259d..09fa6f3eb30 100644 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/util/LakeFSStorageClient.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/util/LakeFSStorageClient.scala @@ -402,4 +402,23 @@ object LakeFSStorageClient { (bucket, key) } + /** + * Get file size. + * + * @param repoName Repository name. + * @param commitHash Commit hash of the version. + * @param filePath Path to the file in the repository. + * @return File size in bytes + */ + def getFileSize( + repoName: String, + commitHash: String, + filePath: String + ): Long = { + objectsApi + .statObject(repoName, commitHash, filePath) + .execute() + .getSizeBytes + .longValue() + } } diff --git a/file-service/src/main/scala/org/apache/texera/service/resource/DatasetResource.scala b/file-service/src/main/scala/org/apache/texera/service/resource/DatasetResource.scala index 44ce22dfb1d..59437f23a46 100644 --- a/file-service/src/main/scala/org/apache/texera/service/resource/DatasetResource.scala +++ b/file-service/src/main/scala/org/apache/texera/service/resource/DatasetResource.scala @@ -57,7 +57,7 @@ import org.jooq.{DSLContext, EnumType} import org.jooq.impl.DSL import org.jooq.impl.DSL.{inline => inl} import java.io.{InputStream, OutputStream} -import java.net.{HttpURLConnection, URL, URLDecoder} +import java.net.{HttpURLConnection, URI, URL, URLDecoder} import java.nio.charset.StandardCharsets import java.nio.file.{Files, Paths} import java.util @@ -70,6 +70,7 @@ import org.apache.texera.dao.jooq.generated.tables.DatasetUploadSession.DATASET_ import org.apache.texera.dao.jooq.generated.tables.DatasetUploadSessionPart.DATASET_UPLOAD_SESSION_PART import org.jooq.exception.DataAccessException import software.amazon.awssdk.services.s3.model.UploadPartResponse +import org.apache.commons.io.FilenameUtils import java.sql.SQLException import scala.util.Try @@ -144,6 +145,25 @@ object DatasetResource { .toScala } + /** + * Validates a file path using Apache Commons IO. + */ + def validateSafePath(path: String): String = { + if (path == null || path.trim.isEmpty) { + throw new BadRequestException("Path cannot be empty") + } + + val normalized = FilenameUtils.normalize(path, true) + if (normalized == null) { + throw new BadRequestException("Invalid path") + } + + if (FilenameUtils.getPrefixLength(normalized) > 0) { + throw new BadRequestException("Absolute paths not allowed") + } + normalized + } + case class DashboardDataset( dataset: Dataset, ownerEmail: String, @@ -177,6 +197,8 @@ object DatasetResource { fileNodes: List[DatasetFileNode], size: Long ) + + case class CoverImageRequest(coverImage: String) } @Produces(Array(MediaType.APPLICATION_JSON, "image/jpeg", "application/pdf")) @@ -186,6 +208,9 @@ class DatasetResource { private val ERR_DATASET_VERSION_NOT_FOUND_MESSAGE = "The version of the dataset not found" private val EXPIRATION_MINUTES = 5 + private val COVER_IMAGE_SIZE_LIMIT_BYTES: Long = 10 * 1024 * 1024 // 10 MB + private val ALLOWED_IMAGE_EXTENSIONS: Set[String] = Set(".jpg", ".jpeg", ".png", ".gif", ".webp") + /** * Helper function to get the dataset from DB with additional information including user access privilege and owner email */ @@ -1742,4 +1767,111 @@ class DatasetResource { Response.ok(Map("message" -> "Multipart upload aborted successfully")).build() } } + + /** + * Updates the cover image for a dataset. + * + * @param did Dataset ID + * @param request Cover image request containing the relative file path + * @param sessionUser Authenticated user session + * @return Response with updated cover image path + * + * Expected coverImage format: "version/folder/image.jpg" (relative to dataset root) + */ + @POST + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/{did}/update/cover") + @Consumes(Array(MediaType.APPLICATION_JSON)) + def updateDatasetCoverImage( + @PathParam("did") did: Integer, + request: CoverImageRequest, + @Auth sessionUser: SessionUser + ): Response = { + withTransaction(context) { ctx => + val uid = sessionUser.getUid + val dataset = getDatasetByID(ctx, did) + if (!userHasWriteAccess(ctx, did, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + + if (request.coverImage == null || request.coverImage.trim.isEmpty) { + throw new BadRequestException("Cover image path is required") + } + + val normalized = DatasetResource.validateSafePath(request.coverImage) + + val extension = FilenameUtils.getExtension(normalized) + if (extension == null || !ALLOWED_IMAGE_EXTENSIONS.contains(s".$extension".toLowerCase)) { + throw new BadRequestException("Invalid file type") + } + + val owner = getOwner(ctx, did) + val document = DocumentFactory + .openReadonlyDocument( + FileResolver.resolve(s"${owner.getEmail}/${dataset.getName}/$normalized") + ) + .asInstanceOf[OnDataset] + + val fileSize = LakeFSStorageClient.getFileSize( + document.getRepositoryName(), + document.getVersionHash(), + document.getFileRelativePath() + ) + + if (fileSize > COVER_IMAGE_SIZE_LIMIT_BYTES) { + throw new BadRequestException( + s"Cover image must be less than ${COVER_IMAGE_SIZE_LIMIT_BYTES / (1024 * 1024)} MB" + ) + } + + dataset.setCoverImage(normalized) + new DatasetDao(ctx.configuration()).update(dataset) + Response.ok(Map("coverImage" -> normalized)).build() + } + } + + /** + * Get the cover image for a dataset. + * Returns a 307 redirect to the presigned S3 URL. + * + * @param did Dataset ID + * @return 307 Temporary Redirect to cover image + */ + @GET + @Path("/{did}/cover") + def getDatasetCover( + @PathParam("did") did: Integer, + @Auth sessionUser: Optional[SessionUser] + ): Response = { + withTransaction(context) { ctx => + val dataset = getDatasetByID(ctx, did) + + val requesterUid = if (sessionUser.isPresent) Some(sessionUser.get().getUid) else None + + if (requesterUid.isEmpty && !dataset.getIsPublic) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } else if (requesterUid.exists(uid => !userHasReadAccess(ctx, did, uid))) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + + val coverImage = Option(dataset.getCoverImage).getOrElse( + throw new NotFoundException("No cover image") + ) + + val owner = getOwner(ctx, did) + val fullPath = s"${owner.getEmail}/${dataset.getName}/$coverImage" + + val document = DocumentFactory + .openReadonlyDocument(FileResolver.resolve(fullPath)) + .asInstanceOf[OnDataset] + + val presignedUrl = LakeFSStorageClient.getFilePresignedUrl( + document.getRepositoryName(), + document.getVersionHash(), + document.getFileRelativePath() + ) + + Response.temporaryRedirect(new URI(presignedUrl)).build() + } + } } diff --git a/file-service/src/test/scala/org/apache/texera/service/resource/DatasetResourceSpec.scala b/file-service/src/test/scala/org/apache/texera/service/resource/DatasetResourceSpec.scala index 3f72c574861..43ddbee1cb6 100644 --- a/file-service/src/test/scala/org/apache/texera/service/resource/DatasetResourceSpec.scala +++ b/file-service/src/test/scala/org/apache/texera/service/resource/DatasetResourceSpec.scala @@ -1328,4 +1328,180 @@ class DatasetResourceSpec val part1 = fetchPartRows(uploadId).find(_.getPartNumber == 1).get part1.getEtag.trim should not be "" } + + // =========================================================================== + // Cover Image Tests + // =========================================================================== + + "updateDatasetCoverImage" should "reject path traversal attempts" in { + val maliciousPaths = Seq( + "../../../etc/passwd", + "v1/../../secret.txt", + "../escape.jpg" + ) + + maliciousPaths.foreach { path => + val request = DatasetResource.CoverImageRequest(path) + + assertThrows[BadRequestException] { + datasetResource.updateDatasetCoverImage( + baseDataset.getDid, + request, + sessionUser + ) + } + } + } + + it should "reject absolute paths" in { + val absolutePaths = Seq( + "/etc/passwd", + "/var/log/system.log" + ) + + absolutePaths.foreach { path => + val request = DatasetResource.CoverImageRequest(path) + + assertThrows[BadRequestException] { + datasetResource.updateDatasetCoverImage( + baseDataset.getDid, + request, + sessionUser + ) + } + } + } + + it should "reject invalid file types" in { + val invalidPaths = Seq( + "v1/script.js", + "v1/document.pdf", + "v1/data.csv" + ) + + invalidPaths.foreach { path => + val request = DatasetResource.CoverImageRequest(path) + + assertThrows[BadRequestException] { + datasetResource.updateDatasetCoverImage( + baseDataset.getDid, + request, + sessionUser + ) + } + } + } + + it should "reject empty or null cover image path" in { + assertThrows[BadRequestException] { + datasetResource.updateDatasetCoverImage( + baseDataset.getDid, + DatasetResource.CoverImageRequest(""), + sessionUser + ) + } + + assertThrows[BadRequestException] { + datasetResource.updateDatasetCoverImage( + baseDataset.getDid, + DatasetResource.CoverImageRequest(null), + sessionUser + ) + } + } + + it should "reject when user lacks WRITE access" in { + val request = DatasetResource.CoverImageRequest("v1/cover.jpg") + + assertThrows[ForbiddenException] { + datasetResource.updateDatasetCoverImage( + baseDataset.getDid, + request, + sessionUser2 + ) + } + } + + "getDatasetCover" should "reject private dataset cover for anonymous users" in { + val dataset = datasetDao.fetchOneByDid(baseDataset.getDid) + dataset.setIsPublic(false) + dataset.setCoverImage("v1/cover.jpg") + datasetDao.update(dataset) + + assertThrows[ForbiddenException] { + datasetResource.getDatasetCover(baseDataset.getDid, Optional.empty()) + } + } + + it should "reject private dataset cover for users without access" in { + val dataset = datasetDao.fetchOneByDid(baseDataset.getDid) + dataset.setOwnerUid(ownerUser.getUid) + dataset.setIsPublic(false) + dataset.setCoverImage("v1/cover.jpg") + datasetDao.update(dataset) + + assertThrows[ForbiddenException] { + datasetResource.getDatasetCover(baseDataset.getDid, Optional.of(sessionUser2)) + } + } + + it should "return 404 when no cover image is set" in { + val dataset = datasetDao.fetchOneByDid(baseDataset.getDid) + dataset.setCoverImage(null) + dataset.setIsPublic(true) + datasetDao.update(dataset) + + assertThrows[NotFoundException] { + datasetResource.getDatasetCover(baseDataset.getDid, Optional.of(sessionUser)) + } + } + + "validateSafePath" should "accept valid relative paths" in { + DatasetResource.validateSafePath("v1/image.jpg") shouldEqual "v1/image.jpg" + DatasetResource.validateSafePath("v1/folder/photo.png") shouldEqual "v1/folder/photo.png" + } + + it should "normalize safe internal navigation" in { + DatasetResource.validateSafePath("v1/../v2/img.jpg") shouldEqual "v2/img.jpg" + DatasetResource.validateSafePath("./v1/image.jpg") shouldEqual "v1/image.jpg" + DatasetResource.validateSafePath("v1/./image.jpg") shouldEqual "v1/image.jpg" + } + + it should "reject path traversal" in { + assertThrows[BadRequestException] { + DatasetResource.validateSafePath("../escape.txt") + } + + assertThrows[BadRequestException] { + DatasetResource.validateSafePath("../../etc/passwd") + } + + assertThrows[BadRequestException] { + DatasetResource.validateSafePath("v1/../../escape.txt") + } + } + + it should "reject absolute paths" in { + assertThrows[BadRequestException] { + DatasetResource.validateSafePath("/etc/passwd") + } + + assertThrows[BadRequestException] { + DatasetResource.validateSafePath("C:\\windows\\system32") + } + } + + it should "reject empty or null paths" in { + assertThrows[BadRequestException] { + DatasetResource.validateSafePath(null) + } + + assertThrows[BadRequestException] { + DatasetResource.validateSafePath("") + } + + assertThrows[BadRequestException] { + DatasetResource.validateSafePath(" ") + } + } } diff --git a/frontend/src/app/common/type/dataset.ts b/frontend/src/app/common/type/dataset.ts index 7825ca27976..97ff370302c 100644 --- a/frontend/src/app/common/type/dataset.ts +++ b/frontend/src/app/common/type/dataset.ts @@ -38,4 +38,5 @@ export interface Dataset { storagePath: string | undefined; description: string; creationTime: number | undefined; + coverImage: string | undefined; } diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html index d4dddf94f6d..79ced02f864 100644 --- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html +++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html @@ -263,7 +263,8 @@