diff --git a/Tagger/Tagger.jar b/Tagger/Tagger.jar index 02e660c..b7f6f04 100644 Binary files a/Tagger/Tagger.jar and b/Tagger/Tagger.jar differ diff --git a/Tagger/src/PixivManager.java b/Tagger/src/PixivManager.java index a0ac3ce..e8824c8 100644 --- a/Tagger/src/PixivManager.java +++ b/Tagger/src/PixivManager.java @@ -58,131 +58,27 @@ public class PixivManager { } for (String s : imageTag.pixiv_image_list) { - //String url = "https://api.proxycrawl.com/?token=ahDRaxo3KT2OOX2nQZQV9A&url=https://www.pixiv.net/en/artworks/"+s; - String url = "http://45.33.13.215/crawler/crawler/"+s+".html"; - try { - if (!new File("downloadedData/temp"+s+".html").exists()) { - System.out.println("Starting download of "+url+" ..."); - utils.downloadFileFromUrl(url, "downloadedData/temp"+s+".html"); - if (new File("downloadedData/temp"+s+".html").exists()) { - String[] data = utils.readFromFile("downloadedData/temp"+s+".html"); - int scriptEndLine = 0; - while (scriptEndLine")+"///"+cutpos); - if (cutpos")+3)); - System.out.println(data[scriptEndLine].substring(cutpos,data[scriptEndLine].indexOf("}}}'>")+3)); - } - bw.close(); - fw.close(); - } catch (IOException e) { - e.printStackTrace(); - } - JSONObject jsonData = utils.readJsonFromFile("finaltemp"); - //System.out.println(Arrays.deepToString(JSONObject.getNames(jsonData.getJSONObject("preload")))); - //System.out.println(Arrays.deepToString(JSONObject.getNames(jsonData.getJSONObject("preload").getJSONObject("illust")))); - JSONArray tagsArray = jsonData.getJSONObject("illust").getJSONObject(s).getJSONObject("tags").getJSONArray("tags"); - for (int i=0;i0) { - insertedTag = ENTag; - tagSubmitted=true; - } else - if (romaji.length()>0){ - insertedTag = romaji; - tagSubmitted=true; - } - - //insertedTag is the tag that will be used for the image. - insertedTag = ConvertTag(insertedTag.trim().toLowerCase()); - - if (tagSubmitted) { - if (imageTag.tag_whitelist.size()==0 || imageTag.tag_whitelist.containsKey(insertedTag.trim().toLowerCase())) { - if (imageTag.taglist.containsKey(s)) { - List tags = imageTag.taglist.get(s); - tags.add(insertedTag); - imageTag.taglist.put(s, tags); - } else { - List tags = new ArrayList(); - tags.add(insertedTag); - imageTag.taglist.put(s,tags); - } - if (imageTag.tagCounter.containsKey(insertedTag)) { - imageTag.tagCounter.put(insertedTag,imageTag.tagCounter.get(insertedTag)+1); - } else { - imageTag.tagCounter.put(insertedTag,1); - } - } - } - } - String taglist = s+": <"+imageTag.taglist.get(s)+">"; - //System.out.println(taglist); - bwOutput.append(taglist); - bwOutput.newLine(); - //jsonData.getJSONObject("preload").getJSONObject("illust").getJSONObject(s).getJSONObject("tags"); - }/* else { - System.out.println("Skipping image "+s+" because webpage cannot be found."); - utils.logToFile(s+"\n", "skippedItems.txt"); - }*/ - } else { - System.out.println("Skipping image "+s+" because it has already been processed."); - } - } - } catch (IOException e) { - if (e instanceof FileNotFoundException) { - System.out.println("Skipping image "+s+" because webpage cannot be found."); - utils.logToFile(s, "skippedItems.txt"); - } else { + AttemptDownload(bwOutput, s, true); + } + + int retryAttempts=0; + final int MAXATTEMPTS = 3; + while (retryAttempts0) { + List retryList = new ArrayList(); + retryList.addAll(imageTag.pixiv_retry_list); + imageTag.pixiv_retry_list.clear(); + try { + Thread.sleep(10000); + } catch (InterruptedException e) { e.printStackTrace(); } + for (String s : imageTag.pixiv_retry_list) { + System.out.println(" Retry Attempt Number "+(retryAttempts+1)+"..."); + AttemptDownload(bwOutput, s, (retryAttempts")+"///"+cutpos); + if (cutpos")+3)); + System.out.println(data[scriptEndLine].substring(cutpos,data[scriptEndLine].indexOf("}}}'>")+3)); + } + bw.close(); + fw.close(); + } catch (IOException e) { + e.printStackTrace(); + } + JSONObject jsonData = utils.readJsonFromFile("finaltemp"); + //System.out.println(Arrays.deepToString(JSONObject.getNames(jsonData.getJSONObject("preload")))); + //System.out.println(Arrays.deepToString(JSONObject.getNames(jsonData.getJSONObject("preload").getJSONObject("illust")))); + JSONArray tagsArray = jsonData.getJSONObject("illust").getJSONObject(s).getJSONObject("tags").getJSONArray("tags"); + for (int i=0;i0) { + insertedTag = ENTag; + tagSubmitted=true; + } else + if (romaji.length()>0){ + insertedTag = romaji; + tagSubmitted=true; + } + + //insertedTag is the tag that will be used for the image. + insertedTag = ConvertTag(insertedTag.trim().toLowerCase()); + + if (tagSubmitted) { + if (imageTag.tag_whitelist.size()==0 || imageTag.tag_whitelist.containsKey(insertedTag.trim().toLowerCase())) { + if (imageTag.taglist.containsKey(s)) { + List tags = imageTag.taglist.get(s); + tags.add(insertedTag); + imageTag.taglist.put(s, tags); + } else { + List tags = new ArrayList(); + tags.add(insertedTag); + imageTag.taglist.put(s,tags); + } + if (imageTag.tagCounter.containsKey(insertedTag)) { + imageTag.tagCounter.put(insertedTag,imageTag.tagCounter.get(insertedTag)+1); + } else { + imageTag.tagCounter.put(insertedTag,1); + } + } + } + } + String taglist = s+": <"+imageTag.taglist.get(s)+">"; + //System.out.println(taglist); + bwOutput.append(taglist); + bwOutput.newLine(); + //jsonData.getJSONObject("preload").getJSONObject("illust").getJSONObject(s).getJSONObject("tags"); + }/* else { + System.out.println("Skipping image "+s+" because webpage cannot be found."); + utils.logToFile(s+"\n", "skippedItems.txt"); + }*/ + } else { + System.out.println("Skipping image "+s+" because the server couldn't find it. Will retry it later..."); + if (addToRetryListOnFail) { + imageTag.pixiv_retry_list.add(s); + } + //System.out.println("Skipping image "+s+" because it has already been processed."); + } + } + } catch (IOException e) { + if (e instanceof FileNotFoundException) { + System.out.println("Skipping image "+s+" because webpage cannot be found."); + utils.logToFile(s, "skippedItems.txt"); + } else { + e.printStackTrace(); + } + } + /*org.apache.commons.io.FileUtils.copyURLToFile(new URL( + url + ),temp);*/ + } private String ConvertTag(String insertedTag) { diff --git a/Tagger/src/imageTag.java b/Tagger/src/imageTag.java index 0c7ff50..3b7785a 100644 --- a/Tagger/src/imageTag.java +++ b/Tagger/src/imageTag.java @@ -26,6 +26,7 @@ public class imageTag { public static Filters filters; public static HashMap tag_whitelist = new HashMap(); public static List pixiv_image_list = new ArrayList(); + public static List pixiv_retry_list = new ArrayList(); public static List pixiv_rawimage_list = new ArrayList(); public static HashMap> taglist = new HashMap>(); public static HashMap subtaglist = new HashMap();