|
1 | 1 | ---
|
2 |
| -# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml |
3 | 2 | # Copyright (C) 2024 Intel Corporation
|
4 | 3 | # SPDX-License-Identifier: Apache-2.0
|
5 | 4 |
|
6 |
| -apiVersion: v1 |
7 |
| -kind: Service |
8 |
| -metadata: |
9 |
| - name: faqgen-tgi |
10 |
| - labels: |
11 |
| - helm.sh/chart: tgi-0.1.0 |
12 |
| - app.kubernetes.io/name: tgi |
13 |
| - app.kubernetes.io/instance: faqgen |
14 |
| - app.kubernetes.io/version: "1.4" |
15 |
| - app.kubernetes.io/managed-by: Helm |
16 |
| -spec: |
17 |
| - type: ClusterIP |
18 |
| - ports: |
19 |
| - - port: 80 |
20 |
| - targetPort: 80 |
21 |
| - protocol: TCP |
22 |
| - name: tgi |
23 |
| - selector: |
24 |
| - app.kubernetes.io/name: tgi |
25 |
| - app.kubernetes.io/instance: faqgen |
26 |
| ---- |
27 |
| -apiVersion: v1 |
28 |
| -kind: Service |
29 |
| -metadata: |
30 |
| - name: faqgen-llm-uservice |
31 |
| - labels: |
32 |
| - helm.sh/chart: llm-uservice-0.1.0 |
33 |
| - app.kubernetes.io/name: llm-uservice |
34 |
| - app.kubernetes.io/instance: faqgen |
35 |
| - app.kubernetes.io/version: "1.0.0" |
36 |
| - app.kubernetes.io/managed-by: Helm |
37 |
| -spec: |
38 |
| - type: ClusterIP |
39 |
| - ports: |
40 |
| - - port: 9000 |
41 |
| - targetPort: 9000 |
42 |
| - protocol: TCP |
43 |
| - name: llm-uservice |
44 |
| - selector: |
45 |
| - app.kubernetes.io/name: llm-uservice |
46 |
| - app.kubernetes.io/instance: faqgen |
47 |
| ---- |
48 |
| -apiVersion: v1 |
49 |
| -kind: Service |
50 |
| -metadata: |
51 |
| - name: faqgen |
52 |
| - labels: |
53 |
| - helm.sh/chart: faqgen-0.1.0 |
54 |
| - app.kubernetes.io/name: faqgen |
55 |
| - app.kubernetes.io/instance: faqgen |
56 |
| - app.kubernetes.io/version: "1.0.0" |
57 |
| - app.kubernetes.io/managed-by: Helm |
58 |
| -spec: |
59 |
| - type: ClusterIP |
60 |
| - ports: |
61 |
| - - port: 8888 |
62 |
| - targetPort: 8888 |
63 |
| - protocol: TCP |
64 |
| - name: faqgen |
65 |
| - selector: |
66 |
| - app.kubernetes.io/name: faqgen |
67 |
| - app.kubernetes.io/instance: faqgen |
68 |
| ---- |
69 | 5 | apiVersion: apps/v1
|
70 | 6 | kind: Deployment
|
71 | 7 | metadata:
|
72 |
| - name: faqgen-tgi |
73 |
| - labels: |
74 |
| - helm.sh/chart: tgi-0.1.0 |
75 |
| - app.kubernetes.io/name: tgi |
76 |
| - app.kubernetes.io/instance: faqgen |
77 |
| - app.kubernetes.io/version: "1.4" |
78 |
| - app.kubernetes.io/managed-by: Helm |
| 8 | + name: faq-tgi-cpu-deploy |
| 9 | + namespace: default |
79 | 10 | spec:
|
80 | 11 | replicas: 1
|
81 | 12 | selector:
|
82 | 13 | matchLabels:
|
83 |
| - app.kubernetes.io/name: tgi |
84 |
| - app.kubernetes.io/instance: faqgen |
| 14 | + app: faq-tgi-cpu-deploy |
85 | 15 | template:
|
86 | 16 | metadata:
|
| 17 | + annotations: |
| 18 | + sidecar.istio.io/rewriteAppHTTPProbers: 'true' |
87 | 19 | labels:
|
88 |
| - app.kubernetes.io/name: tgi |
89 |
| - app.kubernetes.io/instance: faqgen |
| 20 | + app: faq-tgi-cpu-deploy |
90 | 21 | spec:
|
| 22 | + hostIPC: true |
91 | 23 | securityContext: {}
|
92 | 24 | containers:
|
93 |
| - - name: tgi |
94 |
| - env: |
95 |
| - - name: MODEL_ID |
96 |
| - value: Intel/neural-chat-7b-v3-3 |
97 |
| - - name: PORT |
98 |
| - value: "80" |
99 |
| - - name: http_proxy |
100 |
| - value: |
101 |
| - - name: https_proxy |
102 |
| - value: |
103 |
| - - name: no_proxy |
104 |
| - value: |
105 |
| - securityContext: {} |
106 |
| - image: "ghcr.io/huggingface/text-generation-inference:1.4" |
107 |
| - imagePullPolicy: IfNotPresent |
108 |
| - volumeMounts: |
109 |
| - - mountPath: /data |
110 |
| - name: model-volume |
111 |
| - ports: |
112 |
| - - name: http |
113 |
| - containerPort: 80 |
114 |
| - protocol: TCP |
115 |
| - resources: {} |
| 25 | + - name: faq-tgi-cpu-deploy-demo |
| 26 | + env: |
| 27 | + - name: HUGGING_FACE_HUB_TOKEN |
| 28 | + value: "insert-your-huggingface-token-here" |
| 29 | + - name: PORT |
| 30 | + value: "80" |
| 31 | + image: ghcr.io/huggingface/text-generation-inference:1.4 |
| 32 | + imagePullPolicy: IfNotPresent |
| 33 | + securityContext: {} |
| 34 | + args: |
| 35 | + - --model-id |
| 36 | + - 'meta-llama/Meta-Llama-3-8B-Instruct' |
| 37 | + - --max-input-length |
| 38 | + - '3096' |
| 39 | + - --max-total-tokens |
| 40 | + - '4096' |
| 41 | + volumeMounts: |
| 42 | + - mountPath: /data |
| 43 | + name: model-volume |
| 44 | + - mountPath: /dev/shm |
| 45 | + name: shm |
| 46 | + ports: |
| 47 | + - containerPort: 80 |
| 48 | + serviceAccountName: default |
116 | 49 | volumes:
|
117 |
| - - name: model-volume |
118 |
| - hostPath: |
119 |
| - path: /mnt |
120 |
| - type: Directory |
| 50 | + - name: model-volume |
| 51 | + hostPath: |
| 52 | + path: /home/sdp/cesg |
| 53 | + type: Directory |
| 54 | + - name: shm |
| 55 | + emptyDir: |
| 56 | + medium: Memory |
| 57 | + sizeLimit: 1Gi |
| 58 | +--- |
| 59 | +kind: Service |
| 60 | +apiVersion: v1 |
| 61 | +metadata: |
| 62 | + name: faq-tgi-cpu-svc |
| 63 | +spec: |
| 64 | + type: ClusterIP |
| 65 | + selector: |
| 66 | + app: faq-tgi-cpu-deploy |
| 67 | + ports: |
| 68 | + - name: service |
| 69 | + port: 8011 |
| 70 | + targetPort: 80 |
121 | 71 | ---
|
122 | 72 | apiVersion: apps/v1
|
123 | 73 | kind: Deployment
|
124 | 74 | metadata:
|
125 |
| - name: faqgen-llm-uservice |
126 |
| - labels: |
127 |
| - helm.sh/chart: llm-uservice-0.1.0 |
128 |
| - app.kubernetes.io/name: llm-uservice |
129 |
| - app.kubernetes.io/instance: faqgen |
130 |
| - app.kubernetes.io/version: "1.0.0" |
131 |
| - app.kubernetes.io/managed-by: Helm |
| 75 | + name: faq-micro-cpu-deploy |
| 76 | + namespace: default |
132 | 77 | spec:
|
133 | 78 | replicas: 1
|
134 | 79 | selector:
|
135 | 80 | matchLabels:
|
136 |
| - app.kubernetes.io/name: llm-uservice |
137 |
| - app.kubernetes.io/instance: faqgen |
| 81 | + app: faq-micro-cpu-deploy |
138 | 82 | template:
|
139 | 83 | metadata:
|
| 84 | + annotations: |
| 85 | + sidecar.istio.io/rewriteAppHTTPProbers: 'true' |
140 | 86 | labels:
|
141 |
| - app.kubernetes.io/name: llm-uservice |
142 |
| - app.kubernetes.io/instance: faqgen |
| 87 | + app: faq-micro-cpu-deploy |
143 | 88 | spec:
|
144 |
| - securityContext: {} |
| 89 | + hostIPC: true |
145 | 90 | containers:
|
146 |
| - - name: faqgen |
| 91 | + - name: faq-micro-cpu-deploy |
147 | 92 | env:
|
148 | 93 | - name: TGI_LLM_ENDPOINT
|
149 |
| - value: "http://faqgen-tgi:80" |
| 94 | + value: "http://faq-tgi-cpu-svc.default.svc.cluster.local:8011" |
150 | 95 | - name: HUGGINGFACEHUB_API_TOKEN
|
151 | 96 | value: "insert-your-huggingface-token-here"
|
152 |
| - - name: http_proxy |
153 |
| - value: |
154 |
| - - name: https_proxy |
155 |
| - value: |
156 |
| - - name: no_proxy |
157 |
| - value: |
158 |
| - securityContext: {} |
159 |
| - image: "opea/llm-faqgen-tgi:latest" |
| 97 | + image: opea/llm-faqgen-tgi:latest |
160 | 98 | imagePullPolicy: IfNotPresent
|
| 99 | + args: null |
161 | 100 | ports:
|
162 |
| - - name: llm-uservice |
163 |
| - containerPort: 9000 |
164 |
| - protocol: TCP |
165 |
| - startupProbe: |
166 |
| - exec: |
167 |
| - command: |
168 |
| - - curl |
169 |
| - - http://faqgen-tgi:80 |
170 |
| - initialDelaySeconds: 5 |
171 |
| - periodSeconds: 5 |
172 |
| - failureThreshold: 120 |
173 |
| - resources: {} |
| 101 | + - containerPort: 9000 |
| 102 | + serviceAccountName: default |
| 103 | +--- |
| 104 | +kind: Service |
| 105 | +apiVersion: v1 |
| 106 | +metadata: |
| 107 | + name: faq-micro-cpu-svc |
| 108 | +spec: |
| 109 | + type: ClusterIP |
| 110 | + selector: |
| 111 | + app: faq-micro-cpu-deploy |
| 112 | + ports: |
| 113 | + - name: service |
| 114 | + port: 9004 |
| 115 | + targetPort: 9000 |
174 | 116 | ---
|
175 | 117 | apiVersion: apps/v1
|
176 | 118 | kind: Deployment
|
177 | 119 | metadata:
|
178 |
| - name: faqgen |
179 |
| - labels: |
180 |
| - helm.sh/chart: faqgen-0.1.0 |
181 |
| - app.kubernetes.io/name: faqgen |
182 |
| - app.kubernetes.io/instance: faqgen |
183 |
| - app.kubernetes.io/version: "1.0.0" |
184 |
| - app.kubernetes.io/managed-by: Helm |
| 120 | + name: faq-mega-server-cpu-deploy |
| 121 | + namespace: default |
185 | 122 | spec:
|
186 | 123 | replicas: 1
|
187 | 124 | selector:
|
188 | 125 | matchLabels:
|
189 |
| - app.kubernetes.io/name: faqgen |
190 |
| - app.kubernetes.io/instance: faqgen |
| 126 | + app: faq-mega-server-cpu-deploy |
191 | 127 | template:
|
192 | 128 | metadata:
|
| 129 | + annotations: |
| 130 | + sidecar.istio.io/rewriteAppHTTPProbers: 'true' |
193 | 131 | labels:
|
194 |
| - app.kubernetes.io/name: faqgen |
195 |
| - app.kubernetes.io/instance: faqgen |
| 132 | + app: faq-mega-server-cpu-deploy |
196 | 133 | spec:
|
197 |
| - securityContext: null |
| 134 | + hostIPC: true |
198 | 135 | containers:
|
199 |
| - - name: faqgen |
| 136 | + - name: faq-mega-server-cpu-deploy |
200 | 137 | env:
|
201 | 138 | - name: LLM_SERVICE_HOST_IP
|
202 |
| - value: faqgen-llm-uservice |
203 |
| - - name: http_proxy |
204 |
| - value: |
205 |
| - - name: https_proxy |
206 |
| - value: |
207 |
| - - name: no_proxy |
208 |
| - value: |
209 |
| - securityContext: null |
210 |
| - image: "opea/faqgen:latest" |
| 139 | + value: faq-micro-cpu-svc |
| 140 | + - name: LLM_SERVICE_PORT |
| 141 | + value: "9004" |
| 142 | + - name: MEGA_SERVICE_HOST_IP |
| 143 | + value: faq-mega-server-cpu-svc |
| 144 | + - name: MEGA_SERVICE_PORT |
| 145 | + value: "7777" |
| 146 | + image: opea/faqgen:latest |
211 | 147 | imagePullPolicy: IfNotPresent
|
| 148 | + args: null |
212 | 149 | ports:
|
213 |
| - - name: faqgen |
214 |
| - containerPort: 8888 |
215 |
| - protocol: TCP |
216 |
| - resources: null |
| 150 | + - containerPort: 7777 |
| 151 | + serviceAccountName: default |
| 152 | +--- |
| 153 | +kind: Service |
| 154 | +apiVersion: v1 |
| 155 | +metadata: |
| 156 | + name: faq-mega-server-cpu-svc |
| 157 | +spec: |
| 158 | + type: NodePort |
| 159 | + selector: |
| 160 | + app: faq-mega-server-cpu-deploy |
| 161 | + ports: |
| 162 | + - name: service |
| 163 | + port: 7778 |
| 164 | + targetPort: 7777 |
| 165 | + nodePort: 30778 |
0 commit comments