|
1 | 1 | import argparse |
2 | 2 | import os |
3 | | -from qiniu import Auth, put_file, BucketManager |
| 3 | +import io |
| 4 | +from collections import defaultdict |
| 5 | +from typing import Optional |
| 6 | + |
| 7 | +import requests |
| 8 | +from qiniu import Auth, put_file, put_stream_v2, put_data |
4 | 9 | from jinja2 import Environment, BaseLoader |
| 10 | +from huggingface_hub import HfApi, hf_hub_url |
| 11 | +from huggingface_hub.hf_api import RepoFolder |
| 12 | +import fsspec |
5 | 13 |
|
6 | 14 |
|
7 | 15 | # 直接在脚本中定义模板 |
@@ -61,15 +69,102 @@ def upload_file(client, bucket_name, local_file, remote_file): |
61 | 69 | print(f"Uploaded {local_file} to {remote_file}") |
62 | 70 |
|
63 | 71 |
|
| 72 | +def upload_data(client, bucket_name, data, remote_file): |
| 73 | + token = client.upload_token(bucket_name, remote_file, 3600) |
| 74 | + put_data(token, remote_file, data) |
| 75 | + print(f"Uploaded bytes data to {remote_file}") |
| 76 | + |
| 77 | + |
| 78 | +def upload_stream(client, bucket_name, stream, remote_file, file_name, file_size): |
| 79 | + token = client.upload_token(bucket_name, remote_file, 3600) |
| 80 | + print(f"Uploading stream to {remote_file}") |
| 81 | + print(put_stream_v2(token, remote_file, stream, file_name, file_size)) |
| 82 | + |
| 83 | + |
| 84 | +def upload_hf_repo( |
| 85 | + ak, |
| 86 | + sk, |
| 87 | + bucket_name, |
| 88 | + repo_id, |
| 89 | + target, |
| 90 | + repo_type="model", |
| 91 | + revision="main", |
| 92 | + token: Optional[str] = None, |
| 93 | +): |
| 94 | + q = Auth(ak, sk) |
| 95 | + env = Environment(loader=BaseLoader()) |
| 96 | + template = env.from_string(template_string) |
| 97 | + |
| 98 | + api = HfApi(token=token) |
| 99 | + tree = api.list_repo_tree( |
| 100 | + repo_id=repo_id, |
| 101 | + repo_type=repo_type, |
| 102 | + revision=revision, |
| 103 | + recursive=True, |
| 104 | + ) |
| 105 | + |
| 106 | + structure: dict[str, dict[str, set[str]]] = defaultdict( |
| 107 | + lambda: {"dirs": set(), "files": set()} |
| 108 | + ) |
| 109 | + |
| 110 | + headers = {"Authorization": f"Bearer {token}"} if token else {} |
| 111 | + |
| 112 | + for item in tree: |
| 113 | + if isinstance(item, RepoFolder): |
| 114 | + continue |
| 115 | + |
| 116 | + dir_path = os.path.dirname(item.path) |
| 117 | + file_name = os.path.basename(item.path) |
| 118 | + parts = dir_path.split("/") if dir_path else [] |
| 119 | + for i, part in enumerate(parts): |
| 120 | + parent_dir = "/".join(parts[:i]) |
| 121 | + child_dir = parts[i] |
| 122 | + structure[parent_dir]["dirs"].add(child_dir) |
| 123 | + structure[dir_path]["files"].add(file_name) |
| 124 | + |
| 125 | + url = hf_hub_url(repo_id, item.path, repo_type=repo_type, revision=revision) |
| 126 | + remote_path = os.path.join(target, item.path) |
| 127 | + # with requests.get(url, stream=True, headers=headers) as r: |
| 128 | + # r.raise_for_status() |
| 129 | + with fsspec.open(url, block_size=1_000_000) as f: |
| 130 | + upload_stream(q, bucket_name, f, remote_path, file_name, item.size) |
| 131 | + |
| 132 | + for dir_path, content in structure.items(): |
| 133 | + index_content = template.render( |
| 134 | + directory=os.path.basename(dir_path) or "/", |
| 135 | + directories=sorted(content["dirs"]), |
| 136 | + files=sorted(content["files"]), |
| 137 | + ) |
| 138 | + remote_index = os.path.join(target, dir_path, "index.html") |
| 139 | + upload_data(q, bucket_name, index_content.encode("utf-8"), remote_index) |
| 140 | + |
| 141 | + |
64 | 142 | if __name__ == '__main__': |
65 | 143 | parser = argparse.ArgumentParser() |
66 | 144 | parser.add_argument('--ak', type=str, help='qiniu access key') |
67 | 145 | parser.add_argument('--sk', type=str, help='qiniu secret key') |
68 | 146 | parser.add_argument('--bucket', type=str, required=False, help='qiniu bucket name') |
69 | | - parser.add_argument('src', type=str, help='Local dir to be upload') |
| 147 | + parser.add_argument('--hf', action='store_true', help='treat src as HuggingFace repo id') |
| 148 | + parser.add_argument('--repo-type', type=str, default='model', help='HuggingFace repo type') |
| 149 | + parser.add_argument('--revision', type=str, default='main', help='HuggingFace repo revision') |
| 150 | + parser.add_argument('--hf-token', type=str, help='HuggingFace access token') |
| 151 | + parser.add_argument('src', type=str, help='Local dir or HF repo id to be upload') |
70 | 152 | parser.add_argument('dst', type=str, help='Qiniu dir prefix') |
71 | 153 | args = parser.parse_args() |
72 | 154 | ak = args.ak or os.getenv('QUNIU_ACCESS_TOKEN', '') |
73 | 155 | sk = args.sk or os.getenv('QUNIU_SECRET_KEY', '') |
74 | 156 | bucket = args.bucket or os.getenv('QUNIU_BUCKET_NAME', '') or 'baize-ai' |
75 | | - upload_directory(ak, sk, bucket, args.src, args.dst) |
| 157 | + if args.hf: |
| 158 | + hf_token = args.hf_token or os.getenv('HF_TOKEN', None) |
| 159 | + upload_hf_repo( |
| 160 | + ak, |
| 161 | + sk, |
| 162 | + bucket, |
| 163 | + args.src, |
| 164 | + args.dst, |
| 165 | + repo_type=args.repo_type, |
| 166 | + revision=args.revision, |
| 167 | + token=hf_token, |
| 168 | + ) |
| 169 | + else: |
| 170 | + upload_directory(ak, sk, bucket, args.src, args.dst) |
0 commit comments