|
| 1 | +# Copyright (C) 2024 Intel Corporation |
| 2 | +# SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | +import logging |
| 5 | +import os |
| 6 | +import re |
| 7 | + |
| 8 | +from fastapi import HTTPException |
| 9 | + |
| 10 | +from comps import CustomLogger, LVMVideoDoc, OpeaComponentRegistry, SearchedMultimodalDoc, ServiceType |
| 11 | +from comps.cores.common.component import OpeaComponent |
| 12 | + |
| 13 | +logger = CustomLogger("video_reranking") |
| 14 | +logflag = os.getenv("LOGFLAG", False) |
| 15 | + |
| 16 | +chunk_duration = os.getenv("CHUNK_DURATION", "10") or "10" |
| 17 | +chunk_duration = float(chunk_duration) if chunk_duration.isdigit() else 10.0 |
| 18 | + |
| 19 | +file_server_endpoint = os.getenv("FILE_SERVER_ENDPOINT") or "http://0.0.0.0:6005" |
| 20 | + |
| 21 | +logging.basicConfig( |
| 22 | + level=logging.INFO, format="%(levelname)s: [%(asctime)s] %(message)s", datefmt="%d/%m/%Y %I:%M:%S" |
| 23 | +) |
| 24 | + |
| 25 | + |
| 26 | +def get_top_doc(top_n, videos) -> list: |
| 27 | + hit_score = {} |
| 28 | + if videos is None: |
| 29 | + return None |
| 30 | + for video_name in videos: |
| 31 | + try: |
| 32 | + if video_name not in hit_score.keys(): |
| 33 | + hit_score[video_name] = 0 |
| 34 | + hit_score[video_name] += 1 |
| 35 | + except KeyError as r: |
| 36 | + logging.info(f"no video name {r}") |
| 37 | + |
| 38 | + x = dict(sorted(hit_score.items(), key=lambda item: -item[1])) # sorted dict of video name and score |
| 39 | + top_n_names = list(x.keys())[:top_n] |
| 40 | + logging.info(f"top docs = {x}") |
| 41 | + logging.info(f"top n docs names = {top_n_names}") |
| 42 | + |
| 43 | + return top_n_names |
| 44 | + |
| 45 | + |
| 46 | +def find_timestamp_from_video(metadata_list, video): |
| 47 | + return next( |
| 48 | + (metadata["timestamp"] for metadata in metadata_list if metadata["video"] == video), |
| 49 | + None, |
| 50 | + ) |
| 51 | + |
| 52 | + |
| 53 | +def format_video_name(video_name): |
| 54 | + # Check for an existing file extension |
| 55 | + match = re.search(r"\.(\w+)$", video_name) |
| 56 | + |
| 57 | + if match: |
| 58 | + extension = match.group(1) |
| 59 | + # If the extension is not 'mp4', raise an error |
| 60 | + if extension != "mp4": |
| 61 | + raise ValueError(f"Invalid file extension: .{extension}. Only '.mp4' is allowed.") |
| 62 | + |
| 63 | + # Use regex to remove any suffix after the base name (e.g., '_interval_0', etc.) |
| 64 | + base_name = re.sub(r"(_interval_\d+)?(\.mp4)?$", "", video_name) |
| 65 | + |
| 66 | + # Add the '.mp4' extension |
| 67 | + formatted_name = f"{base_name}.mp4" |
| 68 | + |
| 69 | + return formatted_name |
| 70 | + |
| 71 | + |
| 72 | +@OpeaComponentRegistry.register("OPEA_VIDEO_RERANKING") |
| 73 | +class OpeaVideoReranking(OpeaComponent): |
| 74 | + """A specialized reranking component derived from OpeaComponent for OPEA Video native reranking services.""" |
| 75 | + |
| 76 | + def __init__(self, name: str, description: str, config: dict = None): |
| 77 | + super().__init__(name, ServiceType.RERANK.name.lower(), description, config) |
| 78 | + |
| 79 | + async def invoke(self, input: SearchedMultimodalDoc) -> LVMVideoDoc: |
| 80 | + """Invokes the reranking service to generate reranking for the provided input. |
| 81 | +
|
| 82 | + Args: |
| 83 | + input (SearchedMultimodalDoc): The input in OpenAI reranking format. |
| 84 | +
|
| 85 | + Returns: |
| 86 | + LVMVideoDoc: The response in OpenAI reranking format. |
| 87 | + """ |
| 88 | + try: |
| 89 | + # get top video name from metadata |
| 90 | + video_names = [meta["video"] for meta in input.metadata] |
| 91 | + top_video_names = get_top_doc(input.top_n, video_names) |
| 92 | + |
| 93 | + # only use the first top video |
| 94 | + timestamp = find_timestamp_from_video(input.metadata, top_video_names[0]) |
| 95 | + formatted_video_name = format_video_name(top_video_names[0]) |
| 96 | + video_url = f"{file_server_endpoint.rstrip('/')}/{formatted_video_name}" |
| 97 | + |
| 98 | + result = LVMVideoDoc( |
| 99 | + video_url=video_url, |
| 100 | + prompt=input.initial_query, |
| 101 | + chunk_start=timestamp, |
| 102 | + chunk_duration=float(chunk_duration), |
| 103 | + max_new_tokens=512, |
| 104 | + ) |
| 105 | + except ValueError as e: |
| 106 | + raise HTTPException(status_code=400, detail=str(e)) |
| 107 | + except Exception as e: |
| 108 | + logging.error(f"Unexpected error in reranking: {str(e)}") |
| 109 | + # Handle any other exceptions with a generic server error response |
| 110 | + raise HTTPException(status_code=500, detail="An unexpected error occurred.") |
| 111 | + |
| 112 | + return result |
| 113 | + |
| 114 | + def check_health(self) -> bool: |
| 115 | + """Checks the health of the reranking service. |
| 116 | +
|
| 117 | + Returns: |
| 118 | + bool: True if the service is reachable and healthy, False otherwise. |
| 119 | + """ |
| 120 | + |
| 121 | + return True |
0 commit comments