Quellcode durchsuchen

fix: 改用Web Audio直接采集PCM,解决decodeAudioData失败

MediaRecorder的webm被decodeAudioData解码常失败(尤其短录音)。
改为录音时用ScriptProcessor实时采集原始Float32 PCM,
停止后直接重采样16k编码WAV,完全绕开容器解码。
新增:录音过短(<0.3s)提示、资源清理。

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
simonlll vor 1 Woche
Ursprung
Commit
fd21a2bf64
1 geänderte Dateien mit 70 neuen und 58 gelöschten Zeilen
  1. 70 58
      src/pages/ai-chat/index.vue

+ 70 - 58
src/pages/ai-chat/index.vue

@@ -173,33 +173,46 @@ export default {
173 173
       sessionId: 'app_' + Date.now(),
174 174
       history: [],
175 175
       exampleTags: ['各班组查获排名', '打火机查获数量', '旅检一部人员资质', '全站党员人数'],
176
-      // 录音相关
176
+      // 录音相关(Web Audio 直接采集 PCM)
177 177
       recording: false,
178
-      mediaRecorder: null,
179
-      audioChunks: [],
180 178
       recordCancelled: false,
179
+      audioCtx: null,
180
+      mediaStream: null,
181
+      sourceNode: null,
182
+      scriptNode: null,
183
+      pcmChunks: [],
184
+      pcmSampleRate: 16000,
181 185
     }
182 186
   },
183 187
   methods: {
184
-    // ============ 录音相关 ============
188
+    // ============ 录音相关(Web Audio 直接采集原始 PCM,避免 decodeAudioData 失败)============
185 189
     async startRecord(e) {
186 190
       if (this.loading) return
187 191
       this.recordCancelled = false
188
-      this.audioChunks = []
192
+      this.pcmChunks = []
189 193
 
190 194
       try {
191 195
         const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
192
-        this.mediaRecorder = new MediaRecorder(stream)
193
-        this.mediaRecorder.ondataavailable = (ev) => {
194
-          if (ev.data.size > 0) this.audioChunks.push(ev.data)
196
+        this.mediaStream = stream
197
+
198
+        const AudioCtx = window.AudioContext || window.webkitAudioContext
199
+        const ctx = new AudioCtx()
200
+        if (ctx.state === 'suspended') await ctx.resume()
201
+        this.audioCtx = ctx
202
+        this.pcmSampleRate = ctx.sampleRate
203
+
204
+        const source = ctx.createMediaStreamSource(stream)
205
+        const node = ctx.createScriptProcessor(4096, 1, 1)
206
+        node.onaudioprocess = (ev) => {
207
+          if (!this.recording) return
208
+          // 复制一份当前帧的 Float32 PCM
209
+          this.pcmChunks.push(new Float32Array(ev.inputBuffer.getChannelData(0)))
195 210
         }
196
-        this.mediaRecorder.onstop = () => {
197
-          stream.getTracks().forEach(t => t.stop())
198
-          if (!this.recordCancelled && this.audioChunks.length) {
199
-            this.submitAudio()
200
-          }
201
-        }
202
-        this.mediaRecorder.start()
211
+        source.connect(node)
212
+        node.connect(ctx.destination)
213
+
214
+        this.sourceNode = source
215
+        this.scriptNode = node
203 216
         this.recording = true
204 217
       } catch (err) {
205 218
         let msg = '无法访问麦克风'
@@ -214,36 +227,53 @@ export default {
214 227
       }
215 228
     },
216 229
 
230
+    _teardownAudio() {
231
+      try { if (this.scriptNode) { this.scriptNode.disconnect(); this.scriptNode.onaudioprocess = null } } catch (e) {}
232
+      try { if (this.sourceNode) this.sourceNode.disconnect() } catch (e) {}
233
+      try { if (this.mediaStream) this.mediaStream.getTracks().forEach(t => t.stop()) } catch (e) {}
234
+      try { if (this.audioCtx) this.audioCtx.close() } catch (e) {}
235
+      this.scriptNode = null
236
+      this.sourceNode = null
237
+      this.mediaStream = null
238
+      this.audioCtx = null
239
+    },
240
+
217 241
     stopRecord(e) {
218
-      if (!this.recording || !this.mediaRecorder) return
242
+      if (!this.recording) return
219 243
       this.recording = false
220
-      this.mediaRecorder.stop()
244
+      const rate = this.pcmSampleRate
245
+      const chunks = this.pcmChunks
246
+      this._teardownAudio()
247
+      if (!this.recordCancelled) this.submitAudio(chunks, rate)
221 248
     },
222 249
 
223 250
     cancelRecord(e) {
224
-      if (!this.recording || !this.mediaRecorder) return
251
+      if (!this.recording) return
225 252
       this.recordCancelled = true
226 253
       this.recording = false
227
-      this.mediaRecorder.stop()
254
+      this._teardownAudio()
228 255
     },
229 256
 
230
-    // 鼠标移出按钮:录音中则发送(等同松手),未录音则忽略
257
+    // 鼠标移出按钮:录音中则发送(等同松手)
231 258
     onMicLeave(e) {
232
-      if (this.recording && this.mediaRecorder) {
233
-        this.recording = false
234
-        this.mediaRecorder.stop()
235
-      }
259
+      if (this.recording) this.stopRecord(e)
236 260
     },
237 261
 
238
-    async submitAudio() {
239
-      const mimeType = (this.audioChunks[0] && this.audioChunks[0].type) || 'audio/webm'
240
-      const srcBlob = new Blob(this.audioChunks, { type: mimeType })
262
+    async submitAudio(chunks, srcRate) {
263
+      // 合并所有 PCM 帧
264
+      let total = 0
265
+      for (const c of chunks) total += c.length
266
+      if (total < srcRate * 0.3) {  // 少于0.3秒视为无效
267
+        uni.showToast({ title: '录音太短,请长按说话', icon: 'none' })
268
+        return
269
+      }
270
+      const merged = new Float32Array(total)
271
+      let pos = 0
272
+      for (const c of chunks) { merged.set(c, pos); pos += c.length }
241 273
 
242 274
       uni.showLoading({ title: '识别中...' })
243 275
       try {
244
-        // 浏览器端转成百度要求的 16k 单声道 16bit WAV
245
-        const wavBlob = await this.encodeWav16k(srcBlob)
246
-
276
+        const wavBlob = this.encodeWav16k(merged, srcRate)
247 277
         const formData = new FormData()
248 278
         formData.append('file', wavBlob, 'voice.wav')
249 279
 
@@ -262,39 +292,21 @@ export default {
262 292
       }
263 293
     },
264 294
 
265
-    // 把任意录音 Blob 解码 → 混单声道 → 重采样16k → 编码16bit PCM WAV
266
-    async encodeWav16k(blob) {
267
-      const arrayBuffer = await blob.arrayBuffer()
268
-      const AudioCtx = window.AudioContext || window.webkitAudioContext
269
-      const ctx = new AudioCtx()
270
-      const decoded = await ctx.decodeAudioData(arrayBuffer)
271
-      ctx.close()
272
-
273
-      // 混成单声道
274
-      const numCh = decoded.numberOfChannels
275
-      const srcLen = decoded.length
276
-      const mono = new Float32Array(srcLen)
277
-      for (let ch = 0; ch < numCh; ch++) {
278
-        const ch_data = decoded.getChannelData(ch)
279
-        for (let i = 0; i < srcLen; i++) mono[i] += ch_data[i] / numCh
280
-      }
281
-
282
-      // 线性插值重采样到 16000
295
+    // Float32 PCM → 混(已单声道)→ 重采样16k → 16bit PCM WAV
296
+    encodeWav16k(mono, srcRate) {
283 297
       const dstRate = 16000
284
-      const srcRate = decoded.sampleRate
285
-      const dstLen = Math.round(srcLen * dstRate / srcRate)
298
+      const dstLen = Math.round(mono.length * dstRate / srcRate)
286 299
       const out = new Float32Array(dstLen)
287 300
       const ratio = srcRate / dstRate
288 301
       for (let i = 0; i < dstLen; i++) {
289
-        const pos = i * ratio
290
-        const idx = Math.floor(pos)
291
-        const frac = pos - idx
302
+        const p = i * ratio
303
+        const idx = Math.floor(p)
304
+        const frac = p - idx
292 305
         const s0 = mono[idx] || 0
293
-        const s1 = mono[idx + 1] || s0
306
+        const s1 = mono[idx + 1] !== undefined ? mono[idx + 1] : s0
294 307
         out[i] = s0 + (s1 - s0) * frac
295 308
       }
296 309
 
297
-      // 编码 WAV (44字节头 + 16bit PCM)
298 310
       const buffer = new ArrayBuffer(44 + dstLen * 2)
299 311
       const view = new DataView(buffer)
300 312
       const writeStr = (off, str) => { for (let i = 0; i < str.length; i++) view.setUint8(off + i, str.charCodeAt(i)) }
@@ -303,8 +315,8 @@ export default {
303 315
       writeStr(8, 'WAVE')
304 316
       writeStr(12, 'fmt ')
305 317
       view.setUint32(16, 16, true)
306
-      view.setUint16(20, 1, true)        // PCM
307
-      view.setUint16(22, 1, true)        // 单声道
318
+      view.setUint16(20, 1, true)
319
+      view.setUint16(22, 1, true)
308 320
       view.setUint32(24, dstRate, true)
309 321
       view.setUint32(28, dstRate * 2, true)
310 322
       view.setUint16(32, 2, true)
@@ -313,7 +325,7 @@ export default {
313 325
       view.setUint32(40, dstLen * 2, true)
314 326
       let off = 44
315 327
       for (let i = 0; i < dstLen; i++) {
316
-        let s = Math.max(-1, Math.min(1, out[i]))
328
+        const s = Math.max(-1, Math.min(1, out[i]))
317 329
         view.setInt16(off, s < 0 ? s * 0x8000 : s * 0x7FFF, true)
318 330
         off += 2
319 331
       }