WebM VP8 Codec SDK
|
00001 /* 00002 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 00003 * 00004 * Use of this source code is governed by a BSD-style license 00005 * that can be found in the LICENSE file in the root of the source 00006 * tree. An additional intellectual property rights grant can be found 00007 * in the file PATENTS. All contributing project authors may 00008 * be found in the AUTHORS file in the root of the source tree. 00009 */ 00010 00011 00012 /* 00013 * This is an example demonstrating how to implement a multi-layer VP8 00014 * encoding scheme based on temporal scalability for video applications 00015 * that benefit from a scalable bitstream. 00016 */ 00017 #include <stdio.h> 00018 #include <stdlib.h> 00019 #include <stdarg.h> 00020 #include <string.h> 00021 #define VPX_CODEC_DISABLE_COMPAT 1 00022 #include "vpx/vpx_encoder.h" 00023 #include "vpx/vp8cx.h" 00024 #define interface (vpx_codec_vp8_cx()) 00025 #define fourcc 0x30385056 00026 00027 #define IVF_FILE_HDR_SZ (32) 00028 #define IVF_FRAME_HDR_SZ (12) 00029 00030 static void mem_put_le16(char *mem, unsigned int val) { 00031 mem[0] = val; 00032 mem[1] = val>>8; 00033 } 00034 00035 static void mem_put_le32(char *mem, unsigned int val) { 00036 mem[0] = val; 00037 mem[1] = val>>8; 00038 mem[2] = val>>16; 00039 mem[3] = val>>24; 00040 } 00041 00042 static void die(const char *fmt, ...) { 00043 va_list ap; 00044 00045 va_start(ap, fmt); 00046 vprintf(fmt, ap); 00047 if(fmt[strlen(fmt)-1] != '\n') 00048 printf("\n"); 00049 exit(EXIT_FAILURE); 00050 } 00051 00052 static void die_codec(vpx_codec_ctx_t *ctx, const char *s) { 00053 const char *detail = vpx_codec_error_detail(ctx); 00054 00055 printf("%s: %s\n", s, vpx_codec_error(ctx)); 00056 if(detail) 00057 printf(" %s\n",detail); 00058 exit(EXIT_FAILURE); 00059 } 00060 00061 static int read_frame(FILE *f, vpx_image_t *img) { 00062 size_t nbytes, to_read; 00063 int res = 1; 00064 00065 to_read = img->w*img->h*3/2; 00066 nbytes = fread(img->planes[0], 1, to_read, f); 00067 if(nbytes != to_read) { 00068 res = 0; 00069 if(nbytes > 0) 00070 printf("Warning: Read partial frame. Check your width & height!\n"); 00071 } 00072 return res; 00073 } 00074 00075 static void write_ivf_file_header(FILE *outfile, 00076 const vpx_codec_enc_cfg_t *cfg, 00077 int frame_cnt) { 00078 char header[32]; 00079 00080 if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS) 00081 return; 00082 header[0] = 'D'; 00083 header[1] = 'K'; 00084 header[2] = 'I'; 00085 header[3] = 'F'; 00086 mem_put_le16(header+4, 0); /* version */ 00087 mem_put_le16(header+6, 32); /* headersize */ 00088 mem_put_le32(header+8, fourcc); /* headersize */ 00089 mem_put_le16(header+12, cfg->g_w); /* width */ 00090 mem_put_le16(header+14, cfg->g_h); /* height */ 00091 mem_put_le32(header+16, cfg->g_timebase.den); /* rate */ 00092 mem_put_le32(header+20, cfg->g_timebase.num); /* scale */ 00093 mem_put_le32(header+24, frame_cnt); /* length */ 00094 mem_put_le32(header+28, 0); /* unused */ 00095 00096 if(fwrite(header, 1, 32, outfile)); 00097 } 00098 00099 00100 static void write_ivf_frame_header(FILE *outfile, 00101 const vpx_codec_cx_pkt_t *pkt) 00102 { 00103 char header[12]; 00104 vpx_codec_pts_t pts; 00105 00106 if(pkt->kind != VPX_CODEC_CX_FRAME_PKT) 00107 return; 00108 00109 pts = pkt->data.frame.pts; 00110 mem_put_le32(header, pkt->data.frame.sz); 00111 mem_put_le32(header+4, pts&0xFFFFFFFF); 00112 mem_put_le32(header+8, pts >> 32); 00113 00114 if(fwrite(header, 1, 12, outfile)); 00115 } 00116 00117 static int mode_to_num_layers[7] = {2, 2, 3, 3, 3, 3, 5}; 00118 00119 int main(int argc, char **argv) { 00120 FILE *infile, *outfile[MAX_LAYERS]; 00121 vpx_codec_ctx_t codec; 00122 vpx_codec_enc_cfg_t cfg; 00123 int frame_cnt = 0; 00124 vpx_image_t raw; 00125 vpx_codec_err_t res; 00126 unsigned int width; 00127 unsigned int height; 00128 int frame_avail; 00129 int got_data; 00130 int flags = 0; 00131 int i; 00132 int pts = 0; // PTS starts at 0 00133 int frame_duration = 1; // 1 timebase tick per frame 00134 00135 int layering_mode = 0; 00136 int frames_in_layer[MAX_LAYERS] = {0}; 00137 int layer_flags[MAX_PERIODICITY] = {0}; 00138 00139 // Check usage and arguments 00140 if (argc < 9) 00141 die("Usage: %s <infile> <outfile> <width> <height> <rate_num> " 00142 " <rate_den> <mode> <Rate_0> ... <Rate_nlayers-1>\n", argv[0]); 00143 00144 width = strtol (argv[3], NULL, 0); 00145 height = strtol (argv[4], NULL, 0); 00146 if (width < 16 || width%2 || height <16 || height%2) 00147 die ("Invalid resolution: %d x %d", width, height); 00148 00149 if (!sscanf(argv[7], "%d", &layering_mode)) 00150 die ("Invalid mode %s", argv[7]); 00151 if (layering_mode<0 || layering_mode>6) 00152 die ("Invalid mode (0..6) %s", argv[7]); 00153 00154 if (argc != 8+mode_to_num_layers[layering_mode]) 00155 die ("Invalid number of arguments"); 00156 00157 if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 1)) 00158 die ("Failed to allocate image", width, height); 00159 00160 printf("Using %s\n",vpx_codec_iface_name(interface)); 00161 00162 // Populate encoder configuration 00163 res = vpx_codec_enc_config_default(interface, &cfg, 0); 00164 if(res) { 00165 printf("Failed to get config: %s\n", vpx_codec_err_to_string(res)); 00166 return EXIT_FAILURE; 00167 } 00168 00169 // Update the default configuration with our settings 00170 cfg.g_w = width; 00171 cfg.g_h = height; 00172 00173 // Timebase format e.g. 30fps: numerator=1, demoninator=30 00174 if (!sscanf (argv[5], "%d", &cfg.g_timebase.num )) 00175 die ("Invalid timebase numerator %s", argv[5]); 00176 if (!sscanf (argv[6], "%d", &cfg.g_timebase.den )) 00177 die ("Invalid timebase denominator %s", argv[6]); 00178 00179 for (i=8; i<8+mode_to_num_layers[layering_mode]; i++) 00180 if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-8])) 00181 die ("Invalid data rate %s", argv[i]); 00182 00183 // Real time parameters 00184 cfg.rc_dropframe_thresh = 0; 00185 cfg.rc_end_usage = VPX_CBR; 00186 cfg.rc_resize_allowed = 0; 00187 cfg.rc_min_quantizer = 4; 00188 cfg.rc_max_quantizer = 63; 00189 cfg.rc_undershoot_pct = 98; 00190 cfg.rc_overshoot_pct = 100; 00191 cfg.rc_buf_initial_sz = 500; 00192 cfg.rc_buf_optimal_sz = 600; 00193 cfg.rc_buf_sz = 1000; 00194 00195 // Enable error resilient mode 00196 cfg.g_error_resilient = 1; 00197 cfg.g_lag_in_frames = 0; 00198 cfg.kf_mode = VPX_KF_DISABLED; 00199 00200 // Disable automatic keyframe placement 00201 cfg.kf_min_dist = cfg.kf_max_dist = 1000; 00202 00203 // Temporal scaling parameters: 00204 // NOTE: The 3 prediction frames cannot be used interchangeably due to 00205 // differences in the way they are handled throughout the code. The 00206 // frames should be allocated to layers in the order LAST, GF, ARF. 00207 // Other combinations work, but may produce slightly inferior results. 00208 switch (layering_mode) 00209 { 00210 00211 case 0: 00212 { 00213 // 2-layers, 2-frame period 00214 int ids[2] = {0,1}; 00215 cfg.ts_number_layers = 2; 00216 cfg.ts_periodicity = 2; 00217 cfg.ts_rate_decimator[0] = 2; 00218 cfg.ts_rate_decimator[1] = 1; 00219 memcpy(cfg.ts_layer_id, ids, sizeof(ids)); 00220 00221 #if 1 00222 // 0=L, 1=GF, Intra-layer prediction enabled 00223 layer_flags[0] = VPX_EFLAG_FORCE_KF | 00224 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | 00225 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; 00226 layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | 00227 VP8_EFLAG_NO_REF_ARF; 00228 #else 00229 // 0=L, 1=GF, Intra-layer prediction disabled 00230 layer_flags[0] = VPX_EFLAG_FORCE_KF | 00231 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | 00232 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; 00233 layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | 00234 VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_LAST; 00235 #endif 00236 break; 00237 } 00238 00239 case 1: 00240 { 00241 // 2-layers, 3-frame period 00242 int ids[3] = {0,1,1}; 00243 cfg.ts_number_layers = 2; 00244 cfg.ts_periodicity = 3; 00245 cfg.ts_rate_decimator[0] = 3; 00246 cfg.ts_rate_decimator[1] = 1; 00247 memcpy(cfg.ts_layer_id, ids, sizeof(ids)); 00248 00249 // 0=L, 1=GF, Intra-layer prediction enabled 00250 layer_flags[0] = VPX_EFLAG_FORCE_KF | 00251 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | 00252 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; 00253 layer_flags[1] = 00254 layer_flags[2] = VP8_EFLAG_NO_REF_GF | 00255 VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | 00256 VP8_EFLAG_NO_UPD_LAST; 00257 break; 00258 } 00259 00260 case 2: 00261 { 00262 // 3-layers, 6-frame period 00263 int ids[6] = {0,2,2,1,2,2}; 00264 cfg.ts_number_layers = 3; 00265 cfg.ts_periodicity = 6; 00266 cfg.ts_rate_decimator[0] = 6; 00267 cfg.ts_rate_decimator[1] = 3; 00268 cfg.ts_rate_decimator[2] = 1; 00269 memcpy(cfg.ts_layer_id, ids, sizeof(ids)); 00270 00271 // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled 00272 layer_flags[0] = VPX_EFLAG_FORCE_KF | 00273 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | 00274 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; 00275 layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | 00276 VP8_EFLAG_NO_UPD_LAST; 00277 layer_flags[1] = 00278 layer_flags[2] = 00279 layer_flags[4] = 00280 layer_flags[5] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST; 00281 break; 00282 } 00283 00284 case 3: 00285 { 00286 // 3-layers, 4-frame period 00287 int ids[4] = {0,2,1,2}; 00288 cfg.ts_number_layers = 3; 00289 cfg.ts_periodicity = 4; 00290 cfg.ts_rate_decimator[0] = 4; 00291 cfg.ts_rate_decimator[1] = 2; 00292 cfg.ts_rate_decimator[2] = 1; 00293 memcpy(cfg.ts_layer_id, ids, sizeof(ids)); 00294 00295 // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled 00296 layer_flags[0] = VPX_EFLAG_FORCE_KF | 00297 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | 00298 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; 00299 layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | 00300 VP8_EFLAG_NO_UPD_ARF | 00301 VP8_EFLAG_NO_UPD_LAST; 00302 layer_flags[1] = 00303 layer_flags[3] = VP8_EFLAG_NO_REF_ARF | 00304 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | 00305 VP8_EFLAG_NO_UPD_ARF; 00306 break; 00307 } 00308 00309 case 4: 00310 { 00311 // 3-layers, 4-frame period 00312 int ids[4] = {0,2,1,2}; 00313 cfg.ts_number_layers = 3; 00314 cfg.ts_periodicity = 4; 00315 cfg.ts_rate_decimator[0] = 4; 00316 cfg.ts_rate_decimator[1] = 2; 00317 cfg.ts_rate_decimator[2] = 1; 00318 memcpy(cfg.ts_layer_id, ids, sizeof(ids)); 00319 00320 // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, 00321 // disabled in layer 2 00322 layer_flags[0] = VPX_EFLAG_FORCE_KF | 00323 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | 00324 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; 00325 layer_flags[2] = VP8_EFLAG_NO_REF_ARF | 00326 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; 00327 layer_flags[1] = 00328 layer_flags[3] = VP8_EFLAG_NO_REF_ARF | 00329 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | 00330 VP8_EFLAG_NO_UPD_ARF; 00331 break; 00332 } 00333 00334 case 5: 00335 { 00336 // 3-layers, 4-frame period 00337 int ids[4] = {0,2,1,2}; 00338 cfg.ts_number_layers = 3; 00339 cfg.ts_periodicity = 4; 00340 cfg.ts_rate_decimator[0] = 4; 00341 cfg.ts_rate_decimator[1] = 2; 00342 cfg.ts_rate_decimator[2] = 1; 00343 memcpy(cfg.ts_layer_id, ids, sizeof(ids)); 00344 00345 // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled 00346 layer_flags[0] = VPX_EFLAG_FORCE_KF | 00347 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | 00348 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; 00349 layer_flags[2] = VP8_EFLAG_NO_REF_ARF | 00350 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; 00351 layer_flags[1] = 00352 layer_flags[3] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; 00353 break; 00354 } 00355 00356 case 6: 00357 { 00358 // NOTE: Probably of academic interest only 00359 00360 // 5-layers, 16-frame period 00361 int ids[16] = {0,4,3,4,2,4,3,4,1,4,3,4,2,4,3,4}; 00362 cfg.ts_number_layers = 5; 00363 cfg.ts_periodicity = 16; 00364 cfg.ts_rate_decimator[0] = 16; 00365 cfg.ts_rate_decimator[1] = 8; 00366 cfg.ts_rate_decimator[2] = 4; 00367 cfg.ts_rate_decimator[3] = 2; 00368 cfg.ts_rate_decimator[4] = 1; 00369 memcpy(cfg.ts_layer_id, ids, sizeof(ids)); 00370 00371 layer_flags[0] = VPX_EFLAG_FORCE_KF; 00372 layer_flags[1] = 00373 layer_flags[3] = 00374 layer_flags[5] = 00375 layer_flags[7] = 00376 layer_flags[9] = 00377 layer_flags[11] = 00378 layer_flags[13] = 00379 layer_flags[15] = VP8_EFLAG_NO_UPD_LAST | 00380 VP8_EFLAG_NO_UPD_GF | 00381 VP8_EFLAG_NO_UPD_ARF | 00382 VP8_EFLAG_NO_UPD_ENTROPY; 00383 layer_flags[2] = 00384 layer_flags[6] = 00385 layer_flags[10] = 00386 layer_flags[14] = 0; 00387 layer_flags[4] = 00388 layer_flags[12] = VP8_EFLAG_NO_REF_LAST; 00389 layer_flags[8] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | 00390 VP8_EFLAG_NO_UPD_ENTROPY; 00391 break; 00392 } 00393 00394 default: 00395 break; 00396 } 00397 00398 // Open input file 00399 if(!(infile = fopen(argv[1], "rb"))) 00400 die("Failed to open %s for reading", argv[1]); 00401 00402 // Open an output file for each stream 00403 for (i=0; i<cfg.ts_number_layers; i++) 00404 { 00405 char file_name[512]; 00406 sprintf (file_name, "%s_%d.ivf", argv[2], i); 00407 if (!(outfile[i] = fopen(file_name, "wb"))) 00408 die("Failed to open %s for writing", file_name); 00409 write_ivf_file_header(outfile[i], &cfg, 0); 00410 } 00411 00412 // Initialize codec 00413 if (vpx_codec_enc_init (&codec, interface, &cfg, 0)) 00414 die_codec (&codec, "Failed to initialize encoder"); 00415 00416 // Cap CPU & first I-frame size 00417 vpx_codec_control (&codec, VP8E_SET_CPUUSED, -6); 00418 vpx_codec_control (&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, 600); 00419 00420 frame_avail = 1; 00421 while (frame_avail || got_data) { 00422 vpx_codec_iter_t iter = NULL; 00423 const vpx_codec_cx_pkt_t *pkt; 00424 00425 flags = layer_flags[frame_cnt % cfg.ts_periodicity]; 00426 00427 frame_avail = read_frame(infile, &raw); 00428 if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts, 00429 1, flags, VPX_DL_REALTIME)) 00430 die_codec(&codec, "Failed to encode frame"); 00431 00432 // Reset KF flag 00433 layer_flags[0] &= ~VPX_EFLAG_FORCE_KF; 00434 00435 got_data = 0; 00436 while ( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) { 00437 got_data = 1; 00438 switch (pkt->kind) { 00439 case VPX_CODEC_CX_FRAME_PKT: 00440 for (i=cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity]; 00441 i<cfg.ts_number_layers; i++) 00442 { 00443 write_ivf_frame_header(outfile[i], pkt); 00444 if (fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, 00445 outfile[i])); 00446 frames_in_layer[i]++; 00447 } 00448 break; 00449 default: 00450 break; 00451 } 00452 printf (pkt->kind == VPX_CODEC_CX_FRAME_PKT 00453 && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":"."); 00454 fflush (stdout); 00455 } 00456 frame_cnt++; 00457 pts += frame_duration; 00458 } 00459 printf ("\n"); 00460 fclose (infile); 00461 00462 printf ("Processed %d frames.\n",frame_cnt-1); 00463 if (vpx_codec_destroy(&codec)) 00464 die_codec (&codec, "Failed to destroy codec"); 00465 00466 // Try to rewrite the output file headers with the actual frame count 00467 for (i=0; i<cfg.ts_number_layers; i++) 00468 { 00469 if (!fseek(outfile[i], 0, SEEK_SET)) 00470 write_ivf_file_header (outfile[i], &cfg, frames_in_layer[i]); 00471 fclose (outfile[i]); 00472 } 00473 00474 return EXIT_SUCCESS; 00475 } 00476