import base64, concurrent.futures, json, os, statistics, sys, time, urllib.error, urllib.request
URL = "http://yunwo-ppocr-vl-gpu.tailb014e0.ts.net:5061/v1/ocr"
ALL = [
 {"chunk":0,"pages":5,"path":"/tmp/ppocr_vl_bench_chunks/large_p001_005.pdf"},
 {"chunk":1,"pages":5,"path":"/tmp/ppocr_vl_bench_chunks/large_p006_010.pdf"},
 {"chunk":2,"pages":5,"path":"/tmp/ppocr_vl_bench_chunks/large_p011_015.pdf"},
 {"chunk":3,"pages":5,"path":"/tmp/ppocr_vl_bench_chunks/large_p016_020.pdf"},
]
ROUND=sys.argv[1]
CLIENT_WORKERS=int(sys.argv[2])
CHUNKS=ALL[:int(sys.argv[3])]
def emit(x):
    x.setdefault("ts", time.strftime("%Y-%m-%dT%H:%M:%S%z")); print(json.dumps(x, ensure_ascii=False), flush=True)
def run(item):
    p=item["path"]; t0=time.perf_counter(); emit({"event":"chunk_start","round":ROUND,"chunk":item["chunk"],"pages":item["pages"],"size_bytes":os.path.getsize(p)})
    with open(p,"rb") as f: enc=base64.b64encode(f.read()).decode("ascii")
    t1=time.perf_counter(); body=json.dumps({"filename":os.path.basename(p),"file_base64":enc,"restructure":True,"timeout_sec":3600}).encode()
    t2=time.perf_counter(); req=urllib.request.Request(URL,data=body,headers={"Content-Type":"application/json"},method="POST")
    try:
        with urllib.request.urlopen(req, timeout=1800) as r: raw=r.read(); status=r.status
        t3=time.perf_counter(); data=json.loads(raw.decode()); pages=data.get("pages") or []
        res={"event":"chunk_done","round":ROUND,"chunk":item["chunk"],"pages":item["pages"],"ok":True,"http_status":status,"prep_sec":round(t2-t0,3),"request_sec":round(t3-t2,3),"total_sec":round(t3-t0,3),"server_elapsed_sec":data.get("elapsed_sec"),"pages_returned":len(pages),"chars":sum(len((pg or {}).get("markdown") or "") for pg in pages if isinstance(pg,dict))}
    except Exception as e:
        t3=time.perf_counter(); res={"event":"chunk_error","round":ROUND,"chunk":item["chunk"],"pages":item["pages"],"ok":False,"total_sec":round(t3-t0,3),"type":type(e).__name__,"error":str(e)[:1000]}
    emit(res); return res
start=time.perf_counter(); emit({"event":"bench_start","round":ROUND,"client_workers":CLIENT_WORKERS,"chunks":len(CHUNKS),"total_pages":sum(x["pages"] for x in CHUNKS)})
with concurrent.futures.ThreadPoolExecutor(max_workers=CLIENT_WORKERS) as ex: results=list(ex.map(run, CHUNKS))
wall=time.perf_counter()-start; ok=[r for r in results if r.get("ok")]; times=[r["total_sec"] for r in ok]; pages=sum(r.get("pages",0) for r in ok)
emit({"event":"bench_done","round":ROUND,"wall_sec":round(wall,3),"success_count":len(ok),"failure_count":len(results)-len(ok),"total_pages":pages,"pages_per_min":round(pages/wall*60,3) if wall else None,"avg_total_sec":round(statistics.mean(times),3) if times else None,"max_total_sec":round(max(times),3) if times else None})
