mirror of
https://codeberg.org/mayx/pages
synced 2026-01-01 13:13:41 +08:00
324 lines
18 KiB
HTML
324 lines
18 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="zh-CN">
|
||
<head>
|
||
<meta charset="UTF-8" />
|
||
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||
|
||
<!-- Begin Jekyll SEO tag v2.8.0 -->
|
||
<title>使用PHP批量下载Mediawiki站点的图片 | Mayx的博客</title>
|
||
<meta name="generator" content="Jekyll v3.9.5" />
|
||
<meta property="og:title" content="使用PHP批量下载Mediawiki站点的图片" />
|
||
<meta name="author" content="mayx" />
|
||
<meta property="og:locale" content="zh_CN" />
|
||
<meta name="description" content="又是万能的PHP!不过还是Mediawiki API的功劳" />
|
||
<meta property="og:description" content="又是万能的PHP!不过还是Mediawiki API的功劳" />
|
||
<meta property="og:site_name" content="Mayx的博客" />
|
||
<meta property="og:type" content="article" />
|
||
<meta property="article:published_time" content="2019-05-27T00:00:00+08:00" />
|
||
<meta name="twitter:card" content="summary" />
|
||
<meta property="twitter:title" content="使用PHP批量下载Mediawiki站点的图片" />
|
||
<meta name="google-site-verification" content="huTYdEesm8NaFymixMNqflyCp6Jfvd615j5Wq1i2PHc" />
|
||
<meta name="msvalidate.01" content="0ADFCE64B3557DC4DC5F2DC224C5FDDD" />
|
||
<meta name="yandex-verification" content="fc0e535abed800be" />
|
||
<script type="application/ld+json">
|
||
{"@context":"https://schema.org","@type":"BlogPosting","author":{"@type":"Person","name":"mayx"},"dateModified":"2019-05-27T00:00:00+08:00","datePublished":"2019-05-27T00:00:00+08:00","description":"又是万能的PHP!不过还是Mediawiki API的功劳","headline":"使用PHP批量下载Mediawiki站点的图片","mainEntityOfPage":{"@type":"WebPage","@id":"/2019/05/27/wikipic.html"},"publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"https://avatars0.githubusercontent.com/u/17966333"},"name":"mayx"},"url":"/2019/05/27/wikipic.html"}</script>
|
||
<!-- End Jekyll SEO tag -->
|
||
|
||
<link rel="canonical" href="https://mabbs.github.io/2019/05/27/wikipic.html" />
|
||
<link type="application/atom+xml" rel="alternate" href="/atom.xml" title="Mayx的博客" />
|
||
<link rel="alternate" type="application/rss+xml" title="Mayx的博客(RSS)" href="/rss.xml" />
|
||
<link rel="alternate" type="application/json" title="Mayx的博客(JSON Feed)" href="/feed.json" />
|
||
<link rel="stylesheet" href="/assets/css/style.css?v=1767196818" />
|
||
<!--[if !IE]> -->
|
||
<link rel="stylesheet" href="/Live2dHistoire/live2d/css/live2d.css" />
|
||
<!-- <![endif]-->
|
||
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Mayx的博客" />
|
||
<link rel="webmention" href="https://webmention.io/mabbs.github.io/webmention" />
|
||
<link rel="pingback" href="https://webmention.io/mabbs.github.io/xmlrpc" />
|
||
<link rel="preconnect" href="https://summary.mayx.eu.org" crossorigin="anonymous" />
|
||
<link rel="prefetch" href="https://www.blogsclub.org/badge/mabbs.github.io" as="image" />
|
||
<link rel="blogroll" type="text/xml" href="/blogroll.opml" />
|
||
<link rel="me" href="https://github.com/Mabbs" />
|
||
<script src="/assets/js/jquery.min.js"></script>
|
||
<!--[if lt IE 9]>
|
||
<script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv.min.js"></script>
|
||
<script src="//cdnjs.cloudflare.com/ajax/libs/jquery-ajaxtransport-xdomainrequest/1.0.3/jquery.xdomainrequest.min.js"></script>
|
||
<script src="//cdnjs.cloudflare.com/ajax/libs/respond.js/1.4.2/respond.min.js"></script>
|
||
<![endif]-->
|
||
<script>
|
||
var lastUpdated = new Date("Thu, 01 Jan 2026 00:00:18 +0800");
|
||
var BlogAPI = "https://summary.mayx.eu.org";
|
||
</script>
|
||
<script src="/assets/js/main.js"></script>
|
||
<!--[if !IE]> -->
|
||
|
||
<!-- Global site tag (gtag.js) - Google Analytics -->
|
||
<script async="async" src="https://www.googletagmanager.com/gtag/js?id=UA-137710294-1"></script>
|
||
<script>
|
||
window.dataLayer = window.dataLayer || [];
|
||
function gtag(){dataLayer.push(arguments);}
|
||
gtag('js', new Date());
|
||
gtag('config', 'UA-137710294-1');
|
||
</script>
|
||
|
||
<script src="/assets/js/instant.page.js" type="module"></script>
|
||
<!-- <![endif]-->
|
||
</head>
|
||
|
||
<body>
|
||
<!--[if !IE]> --><noscript><marquee style="top: -15px; position: relative;"><small>发现当前浏览器没有启用JavaScript,这不影响你的浏览,但可能会有一些功能无法使用……</small></marquee></noscript><!-- <![endif]-->
|
||
<!--[if IE]><marquee style="top: -15px; position: relative;"><small>发现当前浏览器为Internet Explorer,这不影响你的浏览,但可能会有一些功能无法使用……</small></marquee><![endif]-->
|
||
<div class="wrapper">
|
||
<header class="h-card">
|
||
<h1><a class="u-url u-uid p-name" rel="me" href="/">Mayx的博客</a></h1>
|
||
|
||
|
||
<img src="https://avatars0.githubusercontent.com/u/17966333" fetchpriority="high" class="u-photo" alt="Logo" style="width: 90%; max-width: 300px; max-height: 300px;" />
|
||
|
||
|
||
<p class="p-note">Mayx's Home Page</p>
|
||
|
||
<form action="/search.html">
|
||
<input type="text" name="keyword" id="search-input-all" placeholder="Search blog posts.." /> <input type="submit" value="搜索" />
|
||
</form>
|
||
<br />
|
||
|
||
|
||
|
||
|
||
|
||
<p class="view"><a class="u-url" href="/Mabbs/">About me</a></p>
|
||
|
||
<ul class="downloads">
|
||
|
||
<li style="width: 270px; border-right: none;"><a href="/MayxBlog.tgz">Download <strong>TGZ File</strong></a></li>
|
||
|
||
</ul>
|
||
</header>
|
||
<section class="h-entry">
|
||
|
||
<small><time class="date dt-published" datetime="2019-05-27T00:00:00+08:00">27 May 2019</time> - 字数统计:828 - 阅读大约需要3分钟 - Hits: <span id="/2019/05/27/wikipic.html" class="visitors">Loading...</span></small>
|
||
<h1 class="p-name">使用PHP批量下载Mediawiki站点的图片</h1>
|
||
|
||
<p class="view">by <a class="p-author h-card" href="//github.com/Mabbs">mayx</a></p>
|
||
<div id="outdate" style="display:none;">
|
||
<hr /><p>
|
||
这是一篇创建于 <span id="outime"></span> 天前的文章,其中的信息可能已经有所发展或是发生改变。
|
||
</p>
|
||
</div>
|
||
<script>
|
||
daysold = Math.floor((new Date().getTime() - new Date("Mon, 27 May 2019 00:00:00 +0800").getTime()) / (24 * 60 * 60 * 1000));
|
||
if (daysold > 90) {
|
||
document.getElementById("outdate").style.display = "block";
|
||
document.getElementById("outime").innerHTML = daysold;
|
||
}
|
||
</script>
|
||
|
||
<hr />
|
||
|
||
<b>AI摘要</b>
|
||
<p id="ai-output">这篇文章讲述了作者使用PHP处理 Mediawiki 站点图片下载问题的方法。通过解析页面内容,利用 `action=parse` 的 API 接口获取图片标签中的 URL,作者创建了一个循环来遍历图片数组,使用正则表达式解析出图片地址,并将它们写入到 "List.txt" 文件中。作为备份策略,作者还提到可以从 `action=query&list=allimages` 接口获取整个Wiki站的所有图片。</p>
|
||
|
||
<hr />
|
||
|
||
|
||
|
||
<ul><li><a href="#解决方案">解决方案</a></li><li><a href="#code">Code</a></li><li><a href="#ps">P.S.</a></li></ul>
|
||
<hr />
|
||
|
||
|
||
<main class="post-content e-content" role="main"><p>又是万能的PHP!不过还是Mediawiki API的功劳<!--more--> </p><p>
|
||
最近我为了备份一下<a href="http://zh.moegirl.org/">某个Wiki站</a>(Ta们把R18名字空间的东西删的一干二净 <del>后来才知道原来转移到了<a href="https://www.hmoegirl.com/">一个Wiki上</a>,真的是好久没关注了</del> ),然后学习了一下Mediawiki API来下载整个Wiki(Ta们把站点导出也给弄没了QAQ) </p><p>
|
||
文本很好下载,但是Mediawiki的图片我不知道存在哪里,API文档翻烂了也没找到把图片解析成地址的API,那怎么办呢?</p>
|
||
<h1 id="解决方案">
|
||
|
||
|
||
<a href="#解决方案"><svg class='octicon' viewBox='0 0 16 16' version='1.1' width='16' height='32' aria-hidden='true'><path fill-rule='evenodd' d='M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z'></path></svg></a> 解决方案
|
||
|
||
|
||
</h1>
|
||
|
||
<p>“解析”?emmmm……parse?不错,正好有这么一个action,好的,那就这样搞吧!</p>
|
||
<h1 id="code">
|
||
|
||
|
||
<a href="#code"><svg class='octicon' viewBox='0 0 16 16' version='1.1' width='16' height='32' aria-hidden='true'><path fill-rule='evenodd' d='M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z'></path></svg></a> Code
|
||
|
||
|
||
</h1>
|
||
|
||
<div class="language-php highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="cp"><?php</span>
|
||
<span class="nb">set_time_limit</span><span class="p">(</span><span class="mi">0</span><span class="p">);</span>
|
||
<span class="nb">ignore_user_abort</span><span class="p">();</span>
|
||
<span class="nv">$list</span> <span class="o">=</span> <span class="k">array</span><span class="p">(</span><span class="s2">"图片数组"</span><span class="p">);</span>
|
||
|
||
<span class="nv">$arrlength</span><span class="o">=</span><span class="nb">count</span><span class="p">(</span><span class="nv">$list</span><span class="p">);</span>
|
||
<span class="k">for</span><span class="p">(</span><span class="nv">$x</span><span class="o">=</span><span class="mi">0</span><span class="p">;</span><span class="nv">$x</span><span class="o"><</span><span class="nv">$arrlength</span><span class="p">;</span><span class="nv">$x</span><span class="o">++</span><span class="p">)</span> <span class="p">{</span>
|
||
<span class="nv">$tmp</span> <span class="o">=</span> <span class="nb">json_decode</span><span class="p">(</span><span class="nb">file_get_contents</span><span class="p">(</span><span class="s2">"https://MediaWiki的地址/api.php?action=parse&text=[[File:"</span><span class="mf">.</span><span class="nv">$list</span><span class="p">[</span><span class="nv">$x</span><span class="p">]</span><span class="mf">.</span><span class="s2">"]]&contentmodel=wikitext&formatversion=2&format=json"</span><span class="p">),</span><span class="kc">true</span><span class="p">);</span>
|
||
<span class="nv">$preg</span><span class="o">=</span><span class="s1">'/src="(.*?)"/is'</span><span class="p">;</span>
|
||
<span class="nb">preg_match</span><span class="p">(</span><span class="nv">$preg</span><span class="p">,</span><span class="nv">$tmp</span><span class="p">[</span><span class="n">parse</span><span class="p">][</span><span class="n">text</span><span class="p">],</span><span class="nv">$match</span><span class="p">);</span>
|
||
<span class="nv">$tt</span><span class="o">=</span><span class="nv">$tt</span><span class="mf">.</span><span class="s2">"
|
||
"</span><span class="mf">.</span><span class="nv">$match</span><span class="p">[</span><span class="mi">1</span><span class="p">];</span>
|
||
<span class="p">}</span>
|
||
<span class="nv">$markout</span> <span class="o">=</span> <span class="nb">fopen</span><span class="p">(</span><span class="s2">"List.txt"</span><span class="p">,</span> <span class="s2">"w"</span><span class="p">)</span> <span class="k">or</span> <span class="k">die</span><span class="p">(</span><span class="s2">"Unable to open file!"</span><span class="p">);</span>
|
||
<span class="nb">fwrite</span><span class="p">(</span><span class="nv">$markout</span><span class="p">,</span> <span class="nv">$tt</span><span class="p">);</span>
|
||
<span class="nb">fclose</span><span class="p">(</span><span class="nv">$markout</span><span class="p">);</span>
|
||
<span class="k">die</span><span class="p">(</span><span class="s2">"Finish"</span><span class="p">);</span>
|
||
<span class="cp">?></span>
|
||
</code></pre></div></div>
|
||
<h1 id="ps">
|
||
|
||
|
||
<a href="#ps"><svg class='octicon' viewBox='0 0 16 16' version='1.1' width='16' height='32' aria-hidden='true'><path fill-rule='evenodd' d='M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z'></path></svg></a> P.S.
|
||
|
||
|
||
</h1>
|
||
|
||
<p>如果需要获取该Wiki的所有图片,可以从<code class="language-plaintext highlighter-rouge">api.php?action=query&list=allimages</code>这里获取。</p></main>
|
||
|
||
|
||
<small style="display: block">tags: <a rel="category tag" class="p-category" href="/search.html?keyword=PHP"><em>PHP</em></a> - <a rel="category tag" class="p-category" href="/search.html?keyword=Mediawiki"><em>Mediawiki</em></a> - <a rel="category tag" class="p-category" href="/search.html?keyword=%E5%9B%BE%E7%89%87"><em>图片</em></a> <span style="float: right;"><a href="https://gitlab.com/mayx/mayx.gitlab.io/tree/master/_posts/2019-05-27-wikipic.md">查看原始文件</a></span></small>
|
||
|
||
|
||
<h4 style="border-bottom: 1px solid #e5e5e5;margin: 2em 0 5px;">推荐文章</h4>
|
||
<p id="suggest-container">Loading...</p>
|
||
<script>
|
||
var suggest = $("#suggest-container");
|
||
$.get(BlogAPI + "/suggest?id=/2019/05/27/wikipic.html&update=" + lastUpdated.valueOf(), function (data) {
|
||
if (data.length) {
|
||
getSearchJSON(function (search) {
|
||
suggest.empty();
|
||
var searchMap = {};
|
||
for (var i = 0; i < search.length; i++) {
|
||
searchMap[search[i].url] = search[i];
|
||
}
|
||
|
||
var tooltip = $('<div class="content-tooltip"></div>').appendTo('body').hide();
|
||
for (var j = 0; j < data.length; j++) {
|
||
var item = searchMap[data[j].id];
|
||
if (item) {
|
||
var link = $('<a href="' + item.url + '">' + item.title + '</a>');
|
||
var contentPreview = item.content.substring(0, 100);
|
||
if (item.content.length > 100) {
|
||
contentPreview += "……";
|
||
}
|
||
link.hover(
|
||
function(e) {
|
||
tooltip.text($(this).data('content'))
|
||
.css({
|
||
top: e.pageY + 10,
|
||
left: e.pageX + 10
|
||
})
|
||
.show();
|
||
},
|
||
function() {
|
||
tooltip.hide();
|
||
}
|
||
).mousemove(function(e) {
|
||
tooltip.css({
|
||
top: e.pageY + 10,
|
||
left: e.pageX + 10
|
||
});
|
||
}).data('content', contentPreview);
|
||
|
||
suggest.append(link);
|
||
suggest.append(' - ' + item.date + '<br />');
|
||
}
|
||
}
|
||
});
|
||
} else {
|
||
suggest.html("暂无推荐文章……");
|
||
}
|
||
});
|
||
</script>
|
||
|
||
<br />
|
||
<div class="pagination">
|
||
|
||
<span class="prev">
|
||
<a href="/2019/05/12/baidu.html">
|
||
上一篇:如何不使用百度App打开搜索结果?
|
||
</a>
|
||
</span>
|
||
|
||
<br />
|
||
|
||
<span class="next">
|
||
<a href="/2019/05/30/exam.html">
|
||
下一篇:高考即将来临
|
||
</a>
|
||
</span>
|
||
|
||
</div>
|
||
|
||
<!--[if !IE]> -->
|
||
<link rel="stylesheet" href="/assets/css/gitalk.css">
|
||
<script src="/assets/js/gitalk.min.js"></script>
|
||
|
||
<div id="gitalk-container"></div>
|
||
|
||
<script>
|
||
var gitalk = new Gitalk({
|
||
clientID: '36557aec4c3cb04f7ac6',
|
||
clientSecret: 'ac32993299751cb5a9ba81cf2b171cca65879cdb',
|
||
repo: 'mabbs.github.io',
|
||
owner: 'Mabbs',
|
||
admin: ['Mabbs'],
|
||
id: '/2019/05/27/wikipic', // Ensure uniqueness and length less than 50
|
||
distractionFreeMode: false, // Facebook-like distraction free mode
|
||
proxy: "https://cors-anywhere.mayx.eu.org/?https://github.com/login/oauth/access_token"
|
||
})
|
||
gitalk.render('gitalk-container')
|
||
</script>
|
||
<!-- <![endif]-->
|
||
|
||
</section>
|
||
<!--[if !IE]> -->
|
||
<div id="landlord" style="left:5px;bottom:0px;">
|
||
<div class="message" style="opacity:0"></div>
|
||
<canvas id="live2d" width="500" height="560" class="live2d"></canvas>
|
||
<div class="live_talk_input_body">
|
||
<form id="live_talk_input_form">
|
||
<div class="live_talk_input_name_body" >
|
||
<input type="checkbox" id="load_this" />
|
||
<input type="hidden" id="post_id" value="/2019/05/27/wikipic.html" />
|
||
<label for="load_this">
|
||
<span style="font-size: 11px; color: #fff;"> 想问这篇文章</span>
|
||
</label>
|
||
</div>
|
||
<div class="live_talk_input_text_body">
|
||
<input name="talk" type="text" class="live_talk_talk white_input" id="AIuserText" autocomplete="off" placeholder="要和我聊什么呀?" />
|
||
<button type="submit" class="live_talk_send_btn" id="talk_send">发送</button>
|
||
</div>
|
||
</form>
|
||
</div>
|
||
<input name="live_talk" id="live_talk" value="1" type="hidden" />
|
||
<div class="live_ico_box" style="display:none;">
|
||
<div class="live_ico_item type_info" id="showInfoBtn"></div>
|
||
<div class="live_ico_item type_talk" id="showTalkBtn"></div>
|
||
<div class="live_ico_item type_music" id="musicButton"></div>
|
||
<div class="live_ico_item type_youdu" id="youduButton"></div>
|
||
<div class="live_ico_item type_quit" id="hideButton"></div>
|
||
<input name="live_statu_val" id="live_statu_val" value="0" type="hidden" />
|
||
<audio src="" style="display:none;" id="live2d_bgm" data-bgm="0" preload="none"></audio>
|
||
<input id="duType" value="douqilai" type="hidden" />
|
||
</div>
|
||
</div>
|
||
<div id="open_live2d">召唤伊斯特瓦尔</div>
|
||
<!-- <![endif]-->
|
||
<footer>
|
||
<p>
|
||
<small>Made with ❤ by Mayx<br />Last updated at 2026-01-01 00:00:18<br /> 总字数:614622 - 文章数:178 - <a href="/atom.xml" >Atom</a> - <a href="/README.html" >About</a></small>
|
||
</p>
|
||
</footer>
|
||
</div>
|
||
<script src="/assets/js/scale.fix.js"></script>
|
||
<!--[if !IE]> -->
|
||
<script src="/assets/js/main_new.js"></script>
|
||
<script src="/Live2dHistoire/live2d/js/live2d.js"></script>
|
||
<script src="/Live2dHistoire/live2d/js/message.js"></script>
|
||
<!-- <![endif]-->
|
||
</body>
|
||
</html>
|