EsIndex.php 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. <?php
  2. namespace app\controller;
  3. use app\model\DocImportRecord;
  4. use think\facade\Log;
  5. use think\facade\Request;
  6. use think\response\Json;
  7. /**
  8. * Es索引操作类
  9. */
  10. class EsIndex extends CommonEsController
  11. {
  12. /**
  13. * 创建索引
  14. */
  15. public function createIndex()
  16. {
  17. $index = Request::param('index') ?? '';
  18. $type = Request::param('type') ?? '';
  19. if( ! $index || ! $type) {
  20. return $this->_json_error();
  21. }
  22. $params = [
  23. 'index' => $index,
  24. 'type' => $type,
  25. 'body' => []
  26. ];
  27. $client = $this->getEsClient();
  28. return $this->_json_succ(
  29. $client->index($params)
  30. );
  31. }
  32. /**
  33. * 删除索引
  34. */
  35. public function delete():Json
  36. {
  37. $index = Request::param('index') ?? 'document';
  38. if( ! $index) {
  39. return $this->_json_error('参数错误!');
  40. }
  41. $params = ['index' => $index];
  42. $client = $this->getEsClient();
  43. return $this->_json_succ(
  44. $client->indices()->delete($params)
  45. );
  46. }
  47. /**
  48. * 批量添加
  49. */
  50. public function addAlbum()
  51. {
  52. $data = file_get_contents('/Users/qdy0517/Downloads/es-document/es-qikan/zhiwang_list.json');
  53. $contents = explode("\n", $data);
  54. $newArray = [];
  55. foreach($contents as $key => $content)
  56. {
  57. $content = json_decode($contents[$key]);
  58. $newContent = json_decode(json_encode($content) , true);
  59. $newArray[$key]['album'] = $newContent['专辑名称']?? '';
  60. $newArray[$key]['publication_place'] = $newContent['出版地']?? '';
  61. $newArray[$key]['comprehensive_impact_factors'] = $newContent['(2021)综合影响因子']?? '';
  62. $newArray[$key]['composite_impact_factor'] = $newContent['(2021)复合影响因子']?? '';
  63. $newArray[$key]['bj_city_source_journals'] = $newContent['北京大学《中文核心期刊要目总览》来源期刊']?? '';
  64. $newArray[$key]['ISSN'] = $newContent['ISSN']?? '';
  65. $newArray[$key]['format'] = $newContent['开本'] ?? '';
  66. $newArray[$key]['host_unit'] = $newContent['主办单位']?? '';
  67. $newArray[$key]['CN'] = $newContent['CN']?? '';
  68. $newArray[$key]['published_literature_volume'] = $newContent['出版文献量']?? '';
  69. $newArray[$key]['type'] = $newContent['type']?? '';
  70. $newArray[$key]['special_name'] = $newContent['专题名称']?? '';
  71. $newArray[$key]['total_download_times'] = $newContent['总下载次数']?? '';
  72. $newArray[$key]['total_citations_number'] = $newContent['总被引次数']?? '';
  73. $newArray[$key]['url'] = $newContent['url']?? '';
  74. $newArray[$key]['cn_name'] = $newContent['cn_name']?? '';
  75. $newArray[$key]['en_name'] = $newContent['en_name']?? '';
  76. $newArray[$key]['language'] = $newContent['语种']?? '';
  77. $newArray[$key]['publication_cycle'] = $newContent['出版周期']?? '';
  78. $newArray[$key]['postal_distribution_code'] = $newContent['邮发代号']?? '';
  79. $newArray[$key]['first_time'] = $newContent['创刊时间']?? '';
  80. $newArray[$key]['md5'] = $newContent['md5']?? '';
  81. $newArray[$key]['included_in'] = $newContent['该刊被以下数据库收录']?? '';
  82. }
  83. $count = count($newArray);
  84. for ($i = 0; $i < $count; $i++) {
  85. $params['body'][] = [
  86. 'index' => [ #创建
  87. '_index' => 'album_v20221009',
  88. '_type' => 'album',
  89. // '_id' => $i,
  90. ]
  91. ];
  92. $params['body'][]=$newArray[$i];
  93. }
  94. $client = $this->getEsClient();
  95. $result = $client->bulk($params);
  96. return $this->_json_succ($result);
  97. }
  98. /**
  99. * 批量添加
  100. */
  101. public function addDoc()
  102. {
  103. Log::info('info='.__FUNCTION__);
  104. $docRecord = DocImportRecord::where('type' , 'zw')->order('id' , 'desc')->limit(1)->find();
  105. if(empty($docRecord)) {
  106. $fileNumVal = 0;
  107. } else {
  108. $value = $docRecord->value;
  109. $fileNumVal = $value + 1;
  110. }
  111. Log::info('$fileNumVal='.$fileNumVal);
  112. //$fileDir = '/Users/qdy0517/Downloads/es-document/zhiwang'.$fileNumVal.'.json';
  113. $fileDir = 'https://jm.jiankangche.cn/zhiwang02/zhiwang_'.$fileNumVal.'.json';
  114. Log::info('$fileDir1='.$fileDir);
  115. $data = file_get_contents($fileDir);
  116. $contents = explode("\n" , $data);
  117. //$contents = array_slice($contents, 0,50000);
  118. $count = count($contents);
  119. $params['body'] = [];
  120. for ($i = 0; $i < $count; $i++) {
  121. $params['body'][] = [
  122. 'index' => [
  123. '_index' => 'document_v1',
  124. '_type' => 'zhiwang',
  125. ]
  126. ];
  127. $singleDoc = json_decode($contents[$i] , true);
  128. if(isset($singleDoc['references']) && $singleDoc['references']) {
  129. $singleDoc['references'] = array_reduce($singleDoc['references'], 'array_merge', array());
  130. }
  131. if(isset($singleDoc['citationDocument']) && $singleDoc['citationDocument']) {
  132. $singleDoc['citationDocument'] = array_reduce($singleDoc['citationDocument'], 'array_merge', array());
  133. }
  134. json_encode($singleDoc);
  135. $params['body'][]=$singleDoc;
  136. }
  137. $client = $this->getEsClient();
  138. $result = $client->bulk($params);
  139. if($result) {
  140. DocImportRecord::create(
  141. [
  142. 'type' => 'zw' ,
  143. 'value' => $fileNumVal
  144. ]
  145. );
  146. }
  147. return $this->_json_succ($result);
  148. }
  149. /**
  150. * 单条添加
  151. */
  152. public function add($data)
  153. {
  154. $params = [
  155. 'index' => $data['index'] ?? 'document',
  156. 'id' => $data['id'],
  157. 'body' => $data,
  158. 'type' => $data['type'] ?? 'document',
  159. ];
  160. $client = $this->getEsClient();
  161. $result = $client->index($params);
  162. return $result;
  163. }
  164. /**
  165. * 批量更新
  166. */
  167. public function batchUpdate()
  168. {
  169. $fileDir = '/Users/qdy0517/Downloads/zhiwang.json';
  170. $data = file_get_contents($fileDir);
  171. $contents = explode("\n" , $data);
  172. //return count($contents);die; 3249403
  173. //$contents = array_slice($contents, 0,2000000);
  174. foreach($contents as $key => $content) {
  175. $newContent = $contents[$key];
  176. $newContent = str_replace("'", '"', $newContent);
  177. $newData = json_decode($newContent, true);
  178. $newArray[$key]['title'] = $newData['标题'] ?? '';
  179. $newArray[$key]['author'] = $newData['作者'] ?? '';
  180. $newArray[$key]['organization'] = $newData['院校'] ?? '';
  181. $newArray[$key]['journal'] = $newData['杂志'] ?? '';
  182. }
  183. $count = count($newArray);
  184. $params['body'] = [];
  185. for ($i = 0; $i < $count; $i++) {
  186. $params['body'][] = [
  187. 'index' => [
  188. '_index' => 'author_v1',
  189. '_type' => 'zhiwang',
  190. ]
  191. ];
  192. $params['body'][] = $newArray[$i];
  193. }
  194. $client = $this->getEsClient();
  195. $result = $client->bulk($params);
  196. return $this->_json_succ(
  197. $result
  198. );
  199. }
  200. /**
  201. * author_v2
  202. */
  203. public function newAuthorVTwoMultiAdd()
  204. {
  205. $fileDir = '/Users/qdy0517/Downloads/zhiwang.json';
  206. $data = file_get_contents($fileDir);
  207. $contents = explode("\n" , $data);
  208. foreach($contents as $key => $content) {
  209. $newContent = $contents[$key];
  210. $newContent = str_replace("'", '"', $newContent);
  211. $newData = json_decode($newContent, true);
  212. $newArray[$key]['title'] = $newData['标题'] ?? '';
  213. $newArray[$key]['author'] = $newData['作者'] ?? '';
  214. $newArray[$key]['organization'] = $newData['院校'] ?? '';
  215. $newArray[$key]['journal'] = $newData['杂志'] ?? '';
  216. }
  217. /**
  218. * 增加机构数量
  219. */
  220. // foreach ($newArray as $k => $v) {
  221. // $organization = $v['organization'];
  222. // if(stripos($organization , ';') !== false) {
  223. // $organization = str_replace(';' , ',' , $organization);
  224. // }
  225. // $newArray[$k]['org_count'] = count(array_unique(explode(',' , $organization)));
  226. // }
  227. $count = count($newArray);
  228. $params['body'] = [];
  229. for ($i = 0; $i < $count; $i++) {
  230. $params['body'][] = [
  231. 'index' => [
  232. '_index' => 'author_v2',
  233. '_type' => 'zhiwang',
  234. ]
  235. ];
  236. $params['body'][] = $newArray[$i];
  237. }
  238. $client = $this->getEsClient();
  239. $result = $client->bulk($params);
  240. return $this->_json_succ(
  241. $result
  242. );
  243. }
  244. }