import base64, concurrent.futures, json, os, statistics, sys, time, urllib.request
URL="http://yunwo-ppocr-vl-gpu.tailb014e0.ts.net:5061/v1/ocr"
ALL=[{"chunk":i,"pages":1,"path":f"/tmp/ppocr_vl_bench_pages/page_{i+1:03d}.pdf"} for i in range(4)]
ROUND=sys.argv[1]; WORKERS=int(sys.argv[2]); N=int(sys.argv[3]); CHUNKS=ALL[:N]
def emit(x): x.setdefault("ts",time.strftime("%Y-%m-%dT%H:%M:%S%z")); print(json.dumps(x,ensure_ascii=False),flush=True)
def run(item):
 p=item["path"]; t0=time.perf_counter(); emit({"event":"chunk_start","round":ROUND,"chunk":item["chunk"],"size_bytes":os.path.getsize(p)})
 with open(p,"rb") as f: enc=base64.b64encode(f.read()).decode("ascii")
 body=json.dumps({"filename":os.path.basename(p),"file_base64":enc,"restructure":True,"timeout_sec":3600}).encode(); t1=time.perf_counter()
 try:
  req=urllib.request.Request(URL,data=body,headers={"Content-Type":"application/json"},method="POST")
  with urllib.request.urlopen(req,timeout=1200) as r: raw=r.read(); status=r.status
  t2=time.perf_counter(); data=json.loads(raw.decode()); pages=data.get("pages") or []
  res={"event":"chunk_done","round":ROUND,"chunk":item["chunk"],"ok":True,"status":status,"prep_sec":round(t1-t0,3),"total_sec":round(t2-t0,3),"server_elapsed_sec":data.get("elapsed_sec"),"worker_index":data.get("worker_index"),"pages_returned":len(pages),"chars":sum(len((pg or {}).get("markdown") or "") for pg in pages if isinstance(pg,dict))}
 except Exception as e:
  t2=time.perf_counter(); res={"event":"chunk_error","round":ROUND,"chunk":item["chunk"],"ok":False,"total_sec":round(t2-t0,3),"type":type(e).__name__,"error":str(e)[:1000]}
 emit(res); return res
start=time.perf_counter(); emit({"event":"bench_start","round":ROUND,"workers":WORKERS,"chunks":N})
with concurrent.futures.ThreadPoolExecutor(max_workers=WORKERS) as ex: results=list(ex.map(run,CHUNKS))
wall=time.perf_counter()-start; ok=[r for r in results if r.get("ok")]; times=[r["total_sec"] for r in ok]
emit({"event":"bench_done","round":ROUND,"wall_sec":round(wall,3),"success_count":len(ok),"failure_count":len(results)-len(ok),"pages_per_min":round(len(ok)/wall*60,3) if wall else None,"avg_total_sec":round(statistics.mean(times),3) if times else None,"max_total_sec":round(max(times),3) if times else None})
