Selaa lähdekoodia

feat: 录音浏览器端转16k单声道WAV,适配百度ASR

百度VOP只认pcm/wav/amr/m4a,浏览器录的webm/mp4不支持。
用Web Audio API在前端解码→混单声道→线性插值重采样16k→编码16bit PCM WAV,
零服务器依赖(不需ffmpeg)。

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
simonlll 1 viikko sitten
vanhempi
commit
5a095574e5
1 muutettua tiedostoa jossa 87 lisäystä ja 9 poistoa
  1. 87 9
      src/pages/ai-chat/index.vue

+ 87 - 9
src/pages/ai-chat/index.vue

@@ -117,12 +117,15 @@
117 117
 
118 118
     <!-- 输入区 -->
119 119
     <view class="input-bar">
120
-      <!-- 麦克风按钮 -->
120
+      <!-- 麦克风按钮(同时支持触摸和鼠标,方便桌面浏览器测试) -->
121 121
       <view
122 122
         :class="['mic-btn', recording ? 'recording' : '']"
123 123
         @touchstart.prevent="startRecord"
124 124
         @touchend.prevent="stopRecord"
125 125
         @touchcancel.prevent="cancelRecord"
126
+        @mousedown.prevent="startRecord"
127
+        @mouseup.prevent="stopRecord"
128
+        @mouseleave.prevent="onMicLeave"
126 129
       >
127 130
         <text class="mic-icon">{{ recording ? '●' : '🎤' }}</text>
128 131
       </view>
@@ -199,7 +202,15 @@ export default {
199 202
         this.mediaRecorder.start()
200 203
         this.recording = true
201 204
       } catch (err) {
202
-        uni.showToast({ title: '无法访问麦克风', icon: 'none' })
205
+        let msg = '无法访问麦克风'
206
+        if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
207
+          msg = '当前环境不支持录音(需HTTPS或localhost)'
208
+        } else if (err && err.name === 'NotAllowedError') {
209
+          msg = '麦克风权限被拒绝,请在浏览器允许'
210
+        } else if (err && err.name === 'NotFoundError') {
211
+          msg = '未检测到麦克风设备'
212
+        }
213
+        uni.showToast({ title: msg, icon: 'none', duration: 2500 })
203 214
       }
204 215
     },
205 216
 
@@ -216,30 +227,97 @@ export default {
216 227
       this.mediaRecorder.stop()
217 228
     },
218 229
 
230
+    // 鼠标移出按钮:录音中则发送(等同松手),未录音则忽略
231
+    onMicLeave(e) {
232
+      if (this.recording && this.mediaRecorder) {
233
+        this.recording = false
234
+        this.mediaRecorder.stop()
235
+      }
236
+    },
237
+
219 238
     async submitAudio() {
220 239
       const mimeType = (this.audioChunks[0] && this.audioChunks[0].type) || 'audio/webm'
221
-      const blob = new Blob(this.audioChunks, { type: mimeType })
222
-      const ext = mimeType.includes('mp4') ? 'm4a' : mimeType.includes('ogg') ? 'ogg' : 'webm'
223
-
224
-      const formData = new FormData()
225
-      formData.append('file', blob, 'voice.' + ext)
240
+      const srcBlob = new Blob(this.audioChunks, { type: mimeType })
226 241
 
227 242
       uni.showLoading({ title: '识别中...' })
228 243
       try {
244
+        // 浏览器端转成百度要求的 16k 单声道 16bit WAV
245
+        const wavBlob = await this.encodeWav16k(srcBlob)
246
+
247
+        const formData = new FormData()
248
+        formData.append('file', wavBlob, 'voice.wav')
249
+
229 250
         const resp = await fetch(ASR_URL, { method: 'POST', body: formData })
230 251
         const data = await resp.json()
231 252
         uni.hideLoading()
232 253
         if (data.success && data.text) {
233 254
           this.inputText = data.text
234
-          // 自动发送
235 255
           this.$nextTick(() => this.sendMessage())
236 256
         } else {
237 257
           uni.showToast({ title: data.message || '未识别到内容', icon: 'none' })
238 258
         }
239 259
       } catch (err) {
240 260
         uni.hideLoading()
241
-        uni.showToast({ title: '网络异常,请重试', icon: 'none' })
261
+        uni.showToast({ title: '识别失败:' + (err.message || '请重试'), icon: 'none' })
262
+      }
263
+    },
264
+
265
+    // 把任意录音 Blob 解码 → 混单声道 → 重采样16k → 编码16bit PCM WAV
266
+    async encodeWav16k(blob) {
267
+      const arrayBuffer = await blob.arrayBuffer()
268
+      const AudioCtx = window.AudioContext || window.webkitAudioContext
269
+      const ctx = new AudioCtx()
270
+      const decoded = await ctx.decodeAudioData(arrayBuffer)
271
+      ctx.close()
272
+
273
+      // 混成单声道
274
+      const numCh = decoded.numberOfChannels
275
+      const srcLen = decoded.length
276
+      const mono = new Float32Array(srcLen)
277
+      for (let ch = 0; ch < numCh; ch++) {
278
+        const ch_data = decoded.getChannelData(ch)
279
+        for (let i = 0; i < srcLen; i++) mono[i] += ch_data[i] / numCh
280
+      }
281
+
282
+      // 线性插值重采样到 16000
283
+      const dstRate = 16000
284
+      const srcRate = decoded.sampleRate
285
+      const dstLen = Math.round(srcLen * dstRate / srcRate)
286
+      const out = new Float32Array(dstLen)
287
+      const ratio = srcRate / dstRate
288
+      for (let i = 0; i < dstLen; i++) {
289
+        const pos = i * ratio
290
+        const idx = Math.floor(pos)
291
+        const frac = pos - idx
292
+        const s0 = mono[idx] || 0
293
+        const s1 = mono[idx + 1] || s0
294
+        out[i] = s0 + (s1 - s0) * frac
295
+      }
296
+
297
+      // 编码 WAV (44字节头 + 16bit PCM)
298
+      const buffer = new ArrayBuffer(44 + dstLen * 2)
299
+      const view = new DataView(buffer)
300
+      const writeStr = (off, str) => { for (let i = 0; i < str.length; i++) view.setUint8(off + i, str.charCodeAt(i)) }
301
+      writeStr(0, 'RIFF')
302
+      view.setUint32(4, 36 + dstLen * 2, true)
303
+      writeStr(8, 'WAVE')
304
+      writeStr(12, 'fmt ')
305
+      view.setUint32(16, 16, true)
306
+      view.setUint16(20, 1, true)        // PCM
307
+      view.setUint16(22, 1, true)        // 单声道
308
+      view.setUint32(24, dstRate, true)
309
+      view.setUint32(28, dstRate * 2, true)
310
+      view.setUint16(32, 2, true)
311
+      view.setUint16(34, 16, true)
312
+      writeStr(36, 'data')
313
+      view.setUint32(40, dstLen * 2, true)
314
+      let off = 44
315
+      for (let i = 0; i < dstLen; i++) {
316
+        let s = Math.max(-1, Math.min(1, out[i]))
317
+        view.setInt16(off, s < 0 ? s * 0x8000 : s * 0x7FFF, true)
318
+        off += 2
242 319
       }
320
+      return new Blob([view], { type: 'audio/wav' })
243 321
     },
244 322
 
245 323
     // ============ 导航 ============