Files
pages/2019/05/27/wikipic.html
2025-12-31 16:00:29 +00:00

324 lines
18 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<!-- Begin Jekyll SEO tag v2.8.0 -->
<title>使用PHP批量下载Mediawiki站点的图片 | Mayx的博客</title>
<meta name="generator" content="Jekyll v3.9.5" />
<meta property="og:title" content="使用PHP批量下载Mediawiki站点的图片" />
<meta name="author" content="mayx" />
<meta property="og:locale" content="zh_CN" />
<meta name="description" content="又是万能的PHP!不过还是Mediawiki API的功劳" />
<meta property="og:description" content="又是万能的PHP!不过还是Mediawiki API的功劳" />
<meta property="og:site_name" content="Mayx的博客" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="2019-05-27T00:00:00+08:00" />
<meta name="twitter:card" content="summary" />
<meta property="twitter:title" content="使用PHP批量下载Mediawiki站点的图片" />
<meta name="google-site-verification" content="huTYdEesm8NaFymixMNqflyCp6Jfvd615j5Wq1i2PHc" />
<meta name="msvalidate.01" content="0ADFCE64B3557DC4DC5F2DC224C5FDDD" />
<meta name="yandex-verification" content="fc0e535abed800be" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"BlogPosting","author":{"@type":"Person","name":"mayx"},"dateModified":"2019-05-27T00:00:00+08:00","datePublished":"2019-05-27T00:00:00+08:00","description":"又是万能的PHP!不过还是Mediawiki API的功劳","headline":"使用PHP批量下载Mediawiki站点的图片","mainEntityOfPage":{"@type":"WebPage","@id":"/2019/05/27/wikipic.html"},"publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"https://avatars0.githubusercontent.com/u/17966333"},"name":"mayx"},"url":"/2019/05/27/wikipic.html"}</script>
<!-- End Jekyll SEO tag -->
<link rel="canonical" href="https://mabbs.github.io/2019/05/27/wikipic.html" />
<link type="application/atom+xml" rel="alternate" href="/atom.xml" title="Mayx的博客" />
<link rel="alternate" type="application/rss+xml" title="Mayx的博客(RSS)" href="/rss.xml" />
<link rel="alternate" type="application/json" title="Mayx的博客(JSON Feed)" href="/feed.json" />
<link rel="stylesheet" href="/assets/css/style.css?v=1767196818" />
<!--[if !IE]> -->
<link rel="stylesheet" href="/Live2dHistoire/live2d/css/live2d.css" />
<!-- <![endif]-->
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Mayx的博客" />
<link rel="webmention" href="https://webmention.io/mabbs.github.io/webmention" />
<link rel="pingback" href="https://webmention.io/mabbs.github.io/xmlrpc" />
<link rel="preconnect" href="https://summary.mayx.eu.org" crossorigin="anonymous" />
<link rel="prefetch" href="https://www.blogsclub.org/badge/mabbs.github.io" as="image" />
<link rel="blogroll" type="text/xml" href="/blogroll.opml" />
<link rel="me" href="https://github.com/Mabbs" />
<script src="/assets/js/jquery.min.js"></script>
<!--[if lt IE 9]>
<script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/jquery-ajaxtransport-xdomainrequest/1.0.3/jquery.xdomainrequest.min.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/respond.js/1.4.2/respond.min.js"></script>
<![endif]-->
<script>
var lastUpdated = new Date("Thu, 01 Jan 2026 00:00:18 +0800");
var BlogAPI = "https://summary.mayx.eu.org";
</script>
<script src="/assets/js/main.js"></script>
<!--[if !IE]> -->
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async="async" src="https://www.googletagmanager.com/gtag/js?id=UA-137710294-1"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-137710294-1');
</script>
<script src="/assets/js/instant.page.js" type="module"></script>
<!-- <![endif]-->
</head>
<body>
<!--[if !IE]> --><noscript><marquee style="top: -15px; position: relative;"><small>发现当前浏览器没有启用JavaScript这不影响你的浏览但可能会有一些功能无法使用……</small></marquee></noscript><!-- <![endif]-->
<!--[if IE]><marquee style="top: -15px; position: relative;"><small>发现当前浏览器为Internet Explorer这不影响你的浏览但可能会有一些功能无法使用……</small></marquee><![endif]-->
<div class="wrapper">
<header class="h-card">
<h1><a class="u-url u-uid p-name" rel="me" href="/">Mayx的博客</a></h1>
<img src="https://avatars0.githubusercontent.com/u/17966333" fetchpriority="high" class="u-photo" alt="Logo" style="width: 90%; max-width: 300px; max-height: 300px;" />
<p class="p-note">Mayx's Home Page</p>
<form action="/search.html">
<input type="text" name="keyword" id="search-input-all" placeholder="Search blog posts.." />&#160;<input type="submit" value="搜索" />
</form>
<br />
<p class="view"><a class="u-url" href="/Mabbs/">About me</a></p>
<ul class="downloads">
<li style="width: 270px; border-right: none;"><a href="/MayxBlog.tgz">Download <strong>TGZ File</strong></a></li>
</ul>
</header>
<section class="h-entry">
<small><time class="date dt-published" datetime="2019-05-27T00:00:00+08:00">27 May 2019</time> - 字数统计828 - 阅读大约需要3分钟 - Hits: <span id="/2019/05/27/wikipic.html" class="visitors">Loading...</span></small>
<h1 class="p-name">使用PHP批量下载Mediawiki站点的图片</h1>
<p class="view">by <a class="p-author h-card" href="//github.com/Mabbs">mayx</a></p>
<div id="outdate" style="display:none;">
<hr /><p>
这是一篇创建于 <span id="outime"></span> 天前的文章,其中的信息可能已经有所发展或是发生改变。
</p>
</div>
<script>
daysold = Math.floor((new Date().getTime() - new Date("Mon, 27 May 2019 00:00:00 +0800").getTime()) / (24 * 60 * 60 * 1000));
if (daysold > 90) {
document.getElementById("outdate").style.display = "block";
document.getElementById("outime").innerHTML = daysold;
}
</script>
<hr />
<b>AI摘要</b>
<p id="ai-output">这篇文章讲述了作者使用PHP处理 Mediawiki 站点图片下载问题的方法。通过解析页面内容,利用 `action=parse` 的 API 接口获取图片标签中的 URL作者创建了一个循环来遍历图片数组使用正则表达式解析出图片地址并将它们写入到 "List.txt" 文件中。作为备份策略,作者还提到可以从 `action=query&list=allimages` 接口获取整个Wiki站的所有图片。</p>
<hr />
<ul><li><a href="#解决方案">解决方案</a></li><li><a href="#code">Code</a></li><li><a href="#ps">P.S.</a></li></ul>
<hr />
<main class="post-content e-content" role="main"><p>又是万能的PHP!不过还是Mediawiki API的功劳<!--more--> </p><p>
最近我为了备份一下<a href="http://zh.moegirl.org/">某个Wiki站</a>Ta们把R18名字空间的东西删的一干二净 <del>后来才知道原来转移到了<a href="https://www.hmoegirl.com/">一个Wiki上</a>,真的是好久没关注了</del> 然后学习了一下Mediawiki API来下载整个WikiTa们把站点导出也给弄没了QAQ </p><p>
文本很好下载但是Mediawiki的图片我不知道存在哪里API文档翻烂了也没找到把图片解析成地址的API那怎么办呢</p>
<h1 id="解决方案">
<a href="#解决方案"><svg class='octicon' viewBox='0 0 16 16' version='1.1' width='16' height='32' aria-hidden='true'><path fill-rule='evenodd' d='M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z'></path></svg></a> 解决方案
</h1>
<p>“解析”emmmm……parse不错正好有这么一个action好的那就这样搞吧</p>
<h1 id="code">
<a href="#code"><svg class='octicon' viewBox='0 0 16 16' version='1.1' width='16' height='32' aria-hidden='true'><path fill-rule='evenodd' d='M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z'></path></svg></a> Code
</h1>
<div class="language-php highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="cp">&lt;?php</span>
<span class="nb">set_time_limit</span><span class="p">(</span><span class="mi">0</span><span class="p">);</span>
<span class="nb">ignore_user_abort</span><span class="p">();</span>
<span class="nv">$list</span> <span class="o">=</span> <span class="k">array</span><span class="p">(</span><span class="s2">"图片数组"</span><span class="p">);</span>
<span class="nv">$arrlength</span><span class="o">=</span><span class="nb">count</span><span class="p">(</span><span class="nv">$list</span><span class="p">);</span>
<span class="k">for</span><span class="p">(</span><span class="nv">$x</span><span class="o">=</span><span class="mi">0</span><span class="p">;</span><span class="nv">$x</span><span class="o">&lt;</span><span class="nv">$arrlength</span><span class="p">;</span><span class="nv">$x</span><span class="o">++</span><span class="p">)</span> <span class="p">{</span>
<span class="nv">$tmp</span> <span class="o">=</span> <span class="nb">json_decode</span><span class="p">(</span><span class="nb">file_get_contents</span><span class="p">(</span><span class="s2">"https://MediaWiki的地址/api.php?action=parse&amp;text=[[File:"</span><span class="mf">.</span><span class="nv">$list</span><span class="p">[</span><span class="nv">$x</span><span class="p">]</span><span class="mf">.</span><span class="s2">"]]&amp;contentmodel=wikitext&amp;formatversion=2&amp;format=json"</span><span class="p">),</span><span class="kc">true</span><span class="p">);</span>
<span class="nv">$preg</span><span class="o">=</span><span class="s1">'/src="(.*?)"/is'</span><span class="p">;</span>
<span class="nb">preg_match</span><span class="p">(</span><span class="nv">$preg</span><span class="p">,</span><span class="nv">$tmp</span><span class="p">[</span><span class="n">parse</span><span class="p">][</span><span class="n">text</span><span class="p">],</span><span class="nv">$match</span><span class="p">);</span>
<span class="nv">$tt</span><span class="o">=</span><span class="nv">$tt</span><span class="mf">.</span><span class="s2">"
"</span><span class="mf">.</span><span class="nv">$match</span><span class="p">[</span><span class="mi">1</span><span class="p">];</span>
<span class="p">}</span>
<span class="nv">$markout</span> <span class="o">=</span> <span class="nb">fopen</span><span class="p">(</span><span class="s2">"List.txt"</span><span class="p">,</span> <span class="s2">"w"</span><span class="p">)</span> <span class="k">or</span> <span class="k">die</span><span class="p">(</span><span class="s2">"Unable to open file!"</span><span class="p">);</span>
<span class="nb">fwrite</span><span class="p">(</span><span class="nv">$markout</span><span class="p">,</span> <span class="nv">$tt</span><span class="p">);</span>
<span class="nb">fclose</span><span class="p">(</span><span class="nv">$markout</span><span class="p">);</span>
<span class="k">die</span><span class="p">(</span><span class="s2">"Finish"</span><span class="p">);</span>
<span class="cp">?&gt;</span>
</code></pre></div></div>
<h1 id="ps">
<a href="#ps"><svg class='octicon' viewBox='0 0 16 16' version='1.1' width='16' height='32' aria-hidden='true'><path fill-rule='evenodd' d='M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z'></path></svg></a> P.S.
</h1>
<p>如果需要获取该Wiki的所有图片可以从<code class="language-plaintext highlighter-rouge">api.php?action=query&amp;list=allimages</code>这里获取。</p></main>
<small style="display: block">tags: <a rel="category tag" class="p-category" href="/search.html?keyword=PHP"><em>PHP</em></a> - <a rel="category tag" class="p-category" href="/search.html?keyword=Mediawiki"><em>Mediawiki</em></a> - <a rel="category tag" class="p-category" href="/search.html?keyword=%E5%9B%BE%E7%89%87"><em>图片</em></a> <span style="float: right;"><a href="https://gitlab.com/mayx/mayx.gitlab.io/tree/master/_posts/2019-05-27-wikipic.md">查看原始文件</a></span></small>
<h4 style="border-bottom: 1px solid #e5e5e5;margin: 2em 0 5px;">推荐文章</h4>
<p id="suggest-container">Loading...</p>
<script>
var suggest = $("#suggest-container");
$.get(BlogAPI + "/suggest?id=/2019/05/27/wikipic.html&update=" + lastUpdated.valueOf(), function (data) {
if (data.length) {
getSearchJSON(function (search) {
suggest.empty();
var searchMap = {};
for (var i = 0; i < search.length; i++) {
searchMap[search[i].url] = search[i];
}
var tooltip = $('<div class="content-tooltip"></div>').appendTo('body').hide();
for (var j = 0; j < data.length; j++) {
var item = searchMap[data[j].id];
if (item) {
var link = $('<a href="' + item.url + '">' + item.title + '</a>');
var contentPreview = item.content.substring(0, 100);
if (item.content.length > 100) {
contentPreview += "……";
}
link.hover(
function(e) {
tooltip.text($(this).data('content'))
.css({
top: e.pageY + 10,
left: e.pageX + 10
})
.show();
},
function() {
tooltip.hide();
}
).mousemove(function(e) {
tooltip.css({
top: e.pageY + 10,
left: e.pageX + 10
});
}).data('content', contentPreview);
suggest.append(link);
suggest.append(' - ' + item.date + '<br />');
}
}
});
} else {
suggest.html("暂无推荐文章……");
}
});
</script>
<br />
<div class="pagination">
<span class="prev">
<a href="/2019/05/12/baidu.html">
上一篇如何不使用百度App打开搜索结果
</a>
</span>
<br />
<span class="next">
<a href="/2019/05/30/exam.html">
下一篇:高考即将来临
</a>
</span>
</div>
<!--[if !IE]> -->
<link rel="stylesheet" href="/assets/css/gitalk.css">
<script src="/assets/js/gitalk.min.js"></script>
<div id="gitalk-container"></div>
<script>
var gitalk = new Gitalk({
clientID: '36557aec4c3cb04f7ac6',
clientSecret: 'ac32993299751cb5a9ba81cf2b171cca65879cdb',
repo: 'mabbs.github.io',
owner: 'Mabbs',
admin: ['Mabbs'],
id: '/2019/05/27/wikipic', // Ensure uniqueness and length less than 50
distractionFreeMode: false, // Facebook-like distraction free mode
proxy: "https://cors-anywhere.mayx.eu.org/?https://github.com/login/oauth/access_token"
})
gitalk.render('gitalk-container')
</script>
<!-- <![endif]-->
</section>
<!--[if !IE]> -->
<div id="landlord" style="left:5px;bottom:0px;">
<div class="message" style="opacity:0"></div>
<canvas id="live2d" width="500" height="560" class="live2d"></canvas>
<div class="live_talk_input_body">
<form id="live_talk_input_form">
<div class="live_talk_input_name_body" >
<input type="checkbox" id="load_this" />
<input type="hidden" id="post_id" value="/2019/05/27/wikipic.html" />
<label for="load_this">
<span style="font-size: 11px; color: #fff;">&#160;想问这篇文章</span>
</label>
</div>
<div class="live_talk_input_text_body">
<input name="talk" type="text" class="live_talk_talk white_input" id="AIuserText" autocomplete="off" placeholder="要和我聊什么呀?" />
<button type="submit" class="live_talk_send_btn" id="talk_send">发送</button>
</div>
</form>
</div>
<input name="live_talk" id="live_talk" value="1" type="hidden" />
<div class="live_ico_box" style="display:none;">
<div class="live_ico_item type_info" id="showInfoBtn"></div>
<div class="live_ico_item type_talk" id="showTalkBtn"></div>
<div class="live_ico_item type_music" id="musicButton"></div>
<div class="live_ico_item type_youdu" id="youduButton"></div>
<div class="live_ico_item type_quit" id="hideButton"></div>
<input name="live_statu_val" id="live_statu_val" value="0" type="hidden" />
<audio src="" style="display:none;" id="live2d_bgm" data-bgm="0" preload="none"></audio>
<input id="duType" value="douqilai" type="hidden" />
</div>
</div>
<div id="open_live2d">召唤伊斯特瓦尔</div>
<!-- <![endif]-->
<footer>
<p>
<small>Made with ❤ by Mayx<br />Last updated at 2026-01-01 00:00:18<br /> 总字数614622 - 文章数178 - <a href="/atom.xml" >Atom</a> - <a href="/README.html" >About</a></small>
</p>
</footer>
</div>
<script src="/assets/js/scale.fix.js"></script>
<!--[if !IE]> -->
<script src="/assets/js/main_new.js"></script>
<script src="/Live2dHistoire/live2d/js/live2d.js"></script>
<script src="/Live2dHistoire/live2d/js/message.js"></script>
<!-- <![endif]-->
</body>
</html>