- ], + "outputs": [], "source": [ "import pandas as pd\n", "# data=pd.read_csv('/content/drive/MyDrive/consult/Louie_disaster_tweets.csv',header=None)\n", @@ -336,8 +310,8 @@ "metadata": {}, "outputs": [], "source": [ - "from dots_feat import get_data\n", - "dd=get_data(10)\n", + "from dots_feat import get_OS_data\n", + "dd=get_OS_data(10)\n", "dd" ] }, @@ -350,7 +324,7 @@ "os_url=([os.environ['OS_TOKEN']])\n", "n=10\n", "bash_command = f\"\"\"\n", - "curl -X GET \"{os_url}/emergency-management-news/_search?scroll=5m\" -H 'Content-Type: application/json' -d '{{\n", + "curl -X GET \"{os_url}/emergency-management-news/_search?scroll=1m\" -H 'Content-Type: application/json' -d '{{\n", "\"_source\": [\"metadata.GDELT_DATE\", \"metadata.page_title\",\"metadata.DocumentIdentifier\", \"metadata.Organizations\",\"metadata.Persons\",\"metadata.Themes\",\"metadata.text\", \"metadata.Locations\"],\n", " \"size\": {n},\n", " \"query\": {{\n", @@ -426,8 +400,8 @@ "metadata": {}, "outputs": [], "source": [ - "from dots_feat import featurize_stories, process_data, get_data, process_hit\n", - "data = get_data(10)\n", + "from dots_feat import featurize_stories, process_data, get_OS_data, process_hit\n", + "data = get_OS_data(10)\n", "data['hits']['hits']" ] }, @@ -448,7 +422,7 @@ "# parser.add_argument('-e',, default=datetime.datetime.strptime(20231231), help='end date')\n", "args, unknown = parser.parse_known_args()\n", "\n", - "from DOTS.dots_feat import featurize_stories, process_data, get_data, process_hit" + "from DOTS.dots_feat import featurize_stories, process_data, get_OS_data, process_hit" ] }, { @@ -492,7 +466,7 @@ "metadata": {}, "outputs": [], "source": [ - "def get_data(n=args.n):\n", + "def get_OS_data(n=args.n):\n", " bash_command = f\"\"\"\n", " curl -X GET \"{os_url}\" -H 'Content-Type: application/json' -d '{{\n", "\"_source\": [\"metadata.GDELT_DATE\", \"metadata.page_title\",\"metadata.DocumentIdentifier\", \"metadata.Organizations\",\"metadata.Persons\",\"metadata.Themes\",\"metadata.text\", \"metadata.Locations\"],\n", @@ -518,7 +492,7 @@ "metadata": {}, "outputs": [], "source": [ - "data = get_data(10)\n", + "data = get_OS_data(10)\n", "data" ] }, @@ -528,7 +502,7 @@ "metadata": {}, "outputs": [], "source": [ - "data = get_data(10)" + "data = get_OS_data(10)" ] }, { @@ -537,7 +511,7 @@ "metadata": {}, "outputs": [], "source": [ - "data = get_data(10)\n", + "data = get_OS_data(10)\n", "hits = data['hits']['hits']\n", "hit = hits[1]\n", "hit['_source']\n", @@ -723,7 +697,7 @@ "metadata": {}, "outputs": [], "source": [ - "data = get_data(n=10)\n", + "data = get_OS_data(n=10)\n", "# data = get_big_data()\n", "# articles = process_data(data)\n", "import concurrent.futures\n", @@ -830,20 +804,14 @@ " theme = source['metadata']['Themes'].rsplit('_')[-1]\n", " title = source['metadata']['page_title']\n", " url = source['metadata']['DocumentIdentifier']\n", - " # with open(\"input/report.csv\", \"a\") as f:\n", - " # writer = csv.writer(f)\n", - " # writer.writerow(\n", - " # [\n", - " # date,loc,title,org,per,theme,url,\n", - " # ]\n", - " # )\n", " output.append([date, loc, title, org, per, theme, url])\n", - "\n", - " pagination_id=response['_scroll_id']\n", - " # print(iii)\n", + " # pagination_id=response['_scroll_id']\n", + " \n", " except:\n", + " # pagination_id=None\n", " pass\n", - " return pagination_id, output" + " return output\n", + " " ] }, { @@ -882,20 +850,17 @@ "cell_type": "code", "execution_count": 4, "metadata": {}, - "outputs": [ - { - "ename": "UnboundLocalError", - "evalue": "cannot access local variable 'pagination_id' where it is not associated with a value", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mUnboundLocalError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[4], line 10\u001b[0m\n\u001b[1;32m 5\u001b[0m response \u001b[38;5;241m=\u001b[39m client\u001b[38;5;241m.\u001b[39mscroll(\n\u001b[1;32m 6\u001b[0m scroll\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m5m\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 7\u001b[0m scroll_id\u001b[38;5;241m=\u001b[39mpagination_id\n\u001b[1;32m 8\u001b[0m )\n\u001b[1;32m 9\u001b[0m hits \u001b[38;5;241m=\u001b[39m response[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhits\u001b[39m\u001b[38;5;124m\"\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhits\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m---> 10\u001b[0m pagination_id, article \u001b[38;5;241m=\u001b[39m \u001b[43mprocess_response\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m articles\u001b[38;5;241m.\u001b[39mappend(article)\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# except:\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# print(\"A ConnectionTimeout error occurred.\")\u001b[39;00m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# pass\u001b[39;00m\n", - "Cell \u001b[0;32mIn[2], line 37\u001b[0m, in \u001b[0;36mprocess_response\u001b[0;34m(response)\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m:\n\u001b[1;32m 36\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m---> 37\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpagination_id\u001b[49m, output\n", - "\u001b[0;31mUnboundLocalError\u001b[0m: cannot access local variable 'pagination_id' where it is not associated with a value" - ] - } - ], + "outputs": [], + "source": [ + "# response['hits']['hits']\n", + "# response[\"_scroll_id\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "articles=[]\n", "hits = response[\"hits\"][\"hits\"]\n", @@ -906,7 +871,7 @@ " scroll_id=pagination_id\n", " )\n", " hits = response[\"hits\"][\"hits\"]\n", - " pagination_id, article = + "metadata": {}, + "outputs": [], + "source": [ + "# articles = [item for sublist in articles for item in sublist]\n", + "(articles)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response, client = get_massive_OS_data(1000)\n", + "pagination_id = response[\"_scroll_id\"]\n", + "hits = response[\"hits\"][\"hits\"]\n", + "articles=[]\n", + "pbar = tqdm(total=100)\n", + "while len(hits) != 0:\n", + " # try:\n", + " response = client.scroll(\n", + " scroll='5m',\n", + " scroll_id=pagination_id\n", + " )\n", + " hits = response[\"hits\"][\"hits\"]\n", + " pagination_id, article = process_response(response)\n", + " articles.append(article)\n", + " pbar.update(1)\n", + " # except:\n", + " # print(\"A ConnectionTimeout error occurred.\")\n", + " # pass\n", + "articles = [item for sublist in articles for item in sublist]\n", + "pbar.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "articles[13]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## part 5 google" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "from gnews import GNews\n", + "from tqdm import tqdm\n", + "from scrape import get_OS_data, get_massive_OS_data,get_google_news\n", + "from pull import process_hit, process_data, process_response\n", + "from feat import featurize_stories\n", + "google_news = GNews()\n", + "\n", + "# google_news.period = '7d' # News from last 7 days\n", + "google_news.max_results = 10000 # number of responses across a keyword\n", + " = 'United States' # News from a specific country \n", + "google_news.language = 'english' # News in a specific language\n", + "google_news.exclude_websites = ['', ''] # Exclude news from specific website i.e and\n", + "google_news.start_date = (2024, 1, 1) # Search from 1st Jan 2020\n", + "google_news.end_date = (2024, 3, 1) # Search until 1st March 2020\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 24%|██▍ | 22/92 [00:31<01:22, 1.18s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "An error occurred while fetching the article: Article `download()` failed with 403 Client Error: Forbidden for url: on URL\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 67%|██████▋ | 62/92 [01:38<00:36, 1.20s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "An error occurred while fetching the article: Article `download()` failed with 403 Client Error: Forbidden - Reason: Your User-Agent is banned from this site. for url: on URL\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 77%|███████▋ | 71/92 [02:06<00:40, 1.91s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "An error occurred while fetching the article: Article `download()` failed with 401 Client Error: HTTP Forbidden for url: on URL\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 85%|████████▍ | 78/92 [02:24<00:46, 3.32s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "An error occurred while fetching the article: Article `download()` failed with HTTPSConnectionPool(host='', port=443): Read timed out. (read timeout=7) on URL\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 91%|█████████▏| 84/92 [02:39<00:28, 3.53s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "An error occurred while fetching the article: Article `download()` failed with HTTPSConnectionPool(host='', port=443): Read timed out. (read timeout=7) on URL\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 92/92 [02:49<00:00, 1.84s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "An error occurred while fetching the article: Article `download()` failed with 403 Client Error: Forbidden for url: on URL\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "json_resp = google_news.get_news('fire')\n", + "article=[]\n", + "\n", + "for i in tqdm(range(len(json_resp)), desc=\"grabbing directly from GoogleNews\"):\n", + " aa=(google_news.get_full_article(json_resp[i]['url']))\n", + " try:\n", + " date=aa.publish_date.strftime(\"%d-%m-%Y\")\n", + " except:\n", + " date=None\n", + " try:\n", + " title=aa.title\n", + " text=aa.text\n", + " except:\n", + " title=None\n", + " text=None\n", + " article.append([title,date,text])\n", + "\n", + "# return article" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8" + ] + }, + "execution_count": 120, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(article)" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "featurizing articles: 100%|██████████| 8/8 [00:11<00:00, 1.42s/it]\n" + ] + } + ], + "source": [ + "# farticles = [item for sublist in article for item in sublist]\n", + "rank_articles=[]\n", + "for i in tqdm(article, desc=\"featurizing articles\"):\n", + " foreparts=str(i).split(',')[:2] # location and date\n", + " meat=\"\".join(str(i).split(',')[2:-3]) # article text\n", + " cc=featurize_stories(str(i), top_k = 3, max_len=512)\n", + " rank_articles.append([foreparts,cc])" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[[\"['Investigators Determine Fatal Sudbury Fire was Electrical'\", ' None'],\n", + " ['floor', 'smoke alarms', 'chief whalen']],\n", + " [[\"['Catching the Fire Bug'\", ' datetime.datetime(2024'],\n", + " ['gusting winds', 'bigger blazes', 'prairie chickens']],\n", + " [[\"['Wisconsin Experiencing High Fire Danger Statewide'\", ' None'],\n", + " ['temperatures', 'breezy conditions', 'warm temperatures']],\n", + " [[\"['West Monument Creek Fire burns on the U.S. Air Force Academy'\",\n", + " ' datetime.datetime(2024'],\n", + " ['academy', 'families', 'trucks']],\n", + " [[\"['Libraries collections largely unscathed after Feb. 9 fire'\",\n", + " ' datetime.datetime(2024'],\n", + " ['morning', 'machines', 'mak morton']],\n", + " [[\"['Homedics Recalls Massagers Due to Fire and Burn Hazards'\", ' None'],\n", + " ['ww', 'burn hazards', 'date codes']],\n", + " [[\"['State of Delaware News'\", ' datetime.datetime(2024'],\n", + " ['delaware news', 'damage', 'morning']],\n", + " [[\"['OSHA Proposes to Replace Its Existing Fire Brigades Standard With New Comprehensive Emergency Response Standard'\",\n", + " ' None'],\n", + " ['preparedness', 'vehicle preparedness', 'nineteen sections']]]" + ] + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rank_articles" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[['Investigators Determine Fatal Sudbury Fire was Electrical',\n", + " None,\n", + " 'SUDBURY — The fire last week that claimed one person’s life and injured two others who tried to rescue her was accidental, said Sudbury Fire Chief John Whalen, Sudbury Police Chief Scott Nix, State Fire Marshal Jon M. Davine, and Middlesex County District Attorney Marian T. Ryan.\\n\\nThe Middlesex District Attorney’s office identified the victim as Hema O. Shahani, 74.\\n\\n“On behalf of the Sudbury Fire Department and our community, I want to express our deepest condolences to Ms. Shahani’s loved ones,” said Chief Whalen. “They lost a cherished family member and all our thoughts are with them right now.”\\n\\nThe three-alarm fire at 30 Goodman’s Hill Road was reported through a 9-1-1 call shortly after 5:00 am on Feb. 21. Ms. Shahani, who had limited mobility, was unable to escape. A family member and a Sudbury Police officer were transported from the scene for injuries sustained while attempting to rescue her.\\n\\nThe origin and cause of the fire were investigated by the Sudbury Fire Department, Sudbury Police Department, State Police Fire & Explosion Investigation Unit assigned to the State Fire Marshal’s office, State Police assigned to the Middlesex DA’s office, and Fire District 14 investigators.\\n\\nThe investigation revealed that the two-story, single-family home had lost heat on the first floor on the night of Feb. 20 into Feb. 21, and that family members used space heaters to provide heat in some areas. At least one of these space heaters ran overnight. Investigators collectively determined that the most likely cause was a circuit overheating while powering the space heater, causing a fire inside the wall. That fire extended to a closet on the opposite side of the wall and continued to spread from there.\\n\\nFire investigators were assisted by members of the Department of Fire Services’ Code Compliance & Enforcement Unit, who found no working smoke alarms in the first- and second-floor living areas.\\n\\n“Our hearts go out to Ms. Shahani’s family and every family that loses a loved one to fire,” said State Fire Marshal Davine. “This was a tragic loss for them and the community. Please, if you do one thing today, check to be sure you have working smoke alarms installed on every level of your home.”\\n\\nSudbury firefighters were assisted at the scene by mutual aid companies from Concord, Framingham, Marlborough, Maynard, Wayland, and Weston. Firefighters from Acton, Lincoln, and Hudson provided station coverage.\\n\\n###'],\n", + " ['Catching the Fire Bug',\n", + " datetime.datetime(2024, 2, 29, 11, 0, tzinfo=tzutc()),\n", + " 'Before I left the city for the prairie, I never thought that I might be a pyromaniac. I’d never started a fire outside a hearth, or thrilled at seeing one burning on the landscape. Then I stood in Nebraska, at dusk, with a crew of wildland firefighters, land managers, and biologists charged with setting fires. Our group had been organized by the local chapter of the conservation organization Pheasants Forever; our job was to burn several thousand acres of prairie in a “prescribed fire,” which would support soil, grasses, and wildlife. Our “burn boss,” a man from California named Dan Kelleher, had a white mustache and luminous blue eyes. He liked to call himself an old hippie, but he was a fire savant. “Play fire scientist,” he told us. How powerfully, timidly, unpredictably, eagerly, or gently did a fire behave and why? What matrix of variables created a fire’s character? We would learn how wind, humidity, topography, fuel, and clouds spurred or inhibited combustion. We’d get to know fire intimately.\\n\\nI was there as a journalist. But I was soon absorbed by the work itself. The mornings began in an old barn. I jumped into a four-wheeled U.T.V., my hands red and aching as I gripped the frozen wheel. We traversed the leks of prairie chickens, whose short-short-long calls reverberated through the early light. At our destination, we’d huddle in the idling truck, waiting for the morning sun to melt the frosted grass and for the night’s high relative humidity to drop. When a favorable reading came in over the radio, we’d rush to assemble our drip torches, put on our fire packs, start our water pumps, and grab hand tools. Kelleher delighted in catching us off guard as we tried to organize ourselves. Without warning, he’d pour fuel on the ground and light a test fire with his “tweaker torch”—a giant lighter he had bought from the local gas station.\\n\\nWe’d stand and watch, observing the flames and the direction of the smoke. The test fires were almost ritualistic. It was as if we were acknowledging the arrival of fire with our attention, or perhaps the fire, in its vibrancy, was capturing us. Once Kelleher gave us the go-ahead, we would start our burn, often incinerating hundreds of acres each day. To manipulate fire precisely amid gusting winds and uneven terrain, we had to think technically about its behavior. We decided from one moment to the next how aggressively or slowly to apply fire on the ground, and in what pattern. In order to predict fire, I had to mentally inhabit it. All of my senses were awakened.\\n\\nThe intense aesthetic pleasure of fire surprised me. The smoke smelled comforting—like a mixture of sage and resin, citronella and my grandpa. There was a kind of horticultural joy, too. Our flames destroyed the young Eastern red cedars that threatened to turn prairie into woodland. The newly blackened earth absorbed the sun and then warmed, which signalled to the bulbs and the rhizomes that they could emerge and drink light from the big sky. Ash and char, I learned, delivered a nutrient pulse that encouraged root growth and maximized carbon storage. Kelleher called fire the “earth’s cleanser,” and he always reminded the crew of its purpose. “Why are we burning?” he would bark, a Marlboro Light between his fingers. “We apply fire to achieve ecological benefits.”\\n\\nAfter one long day, I walked past the reflective windows of a building next to crew headquarters. I didn’t recognize myself. My face and hands were covered in soot. My hair was matted and my leather boots were encrusted with dirt. Fire was changing me, and not just physically. I’d always harbored a sense of unease about my own species; I’d wondered if we were somehow fundamentally ruinous to the earth. Now, with my drip torch in hand, I felt like I was a link in a chain of mutual benefit connecting soil, grass, insects, animals, humans, and sky.\\n\\nBack home in the city, I began looking for other crews to join, so that I could work on more prescribed fires. I woke up in the mornings and e-mailed and called people in New Jersey and Michigan. I bought a plane ticket to Montana to burn five hundred acres in the Blackfoot River watershed, east of Missoula, but the burn got cancelled owing to rain. I seemed to think of nothing but flame. On the phone with a mentor and former wildland firefighter, I tried to put my experience into words. His diagnosis was quick. “You have the fire bug,” he said. “We get bitten by the fire.”\\n\\nThe term “fire bug” is common among wildland firefighters and prescribed-fire practitioners. Some people speak as though they’ve been bit by an actual bug; others seem to imagine a virus that infects. The notion of a sickness or infection seemed to explain better than anything else my unexpected pining for fire. I had the fire bug, but what did that make me? I wondered whether I was becoming some kind of arsonist.\\n\\nKelleher sometimes joked with us before the start of the day’s operations. “What’s that line from ‘Backdraft’?” he’d ask. Then, quoting the pyromaniac Ronald Bartel, from the 1991 movie, he would shout, “Burn it all!” Still, I noticed among my colleagues a general aversion to words like “pyromaniac” or “pyro.” Yes, we exhibited what the DSM-V describes as a “fascination with, interest in, curiosity about, or attraction to fire”—one of the signs of pyromania. But when psychiatrists speak about pyromania, they refer to a rare condition defined by a lack of impulse control. In contrast, we took pride in the extreme mental and physical discipline demanded by our jobs; we knew that maintaining control of ourselves was essential in minimizing the dangers of fire.\\n\\nAt the same time, we were practitioners and even disciples of fire. We sometimes called ourselves “students of fire,” a term that implied a posture of humility, and an acknowledgment that fire could never be totally mastered. On fire lines, I saw caution, respect, reverence—and pleasure. Our experiences transcended the binary of “firefighter” or “pyro”—the former heroized, the latter pathologized.\\n\\nMaybe it’s not surprising that we don’t have enough words to describe the complex relationships people can have with fire: for more than a century, our society has sought to remove it from the land even as our daily lives have become dependent on a different kind of combustion. Throughout American history, as the historian Stephen Pyne writes in his book “Between Two Fires,” landscape fires were seen as “beneath the dignity of an aspiring great power and Enlightened society.” During the twentieth century, state and national campaigns to stop fire starters in prairies and forests were often aggressive; in the South, long-held fire traditions supporting hunting, harvesting, gathering, and grazing were described by a government psychologist as ignorant traditions; one academic called them “ancestor worship.” Meanwhile, in the West, Native Americans risked fines and imprisonment for managing lands with fire, and wildfire lookout towers were erected on sacred mountain peaks. These campaigns coincided with often sensational coverage of arsonists in newspapers and tabloids. When the term “fire bug” appeared, as early as the mid-nineteenth century, it was used to describe people who’d been accused of setting fires in protest, for revenge, for insurance payouts, for political reasons, or simply because the moon was full.\\n\\nSome foresters saw the foolishness of trying to prevent fire-adapted landscapes from burning. They pointed out that the accumulation of fuels over time would eventually lead to bigger blazes. They were castigated for advocating “Paiute forestry,” and branded as “light burners.” In 1935, the chief of the U.S. Forest Service implemented “an experiment on a continental scale,” and decreed that any wildland fire that broke out had to be extinguished by 10 A.M. the following morning. The logic of suppressing fires was deemed unassailable, and the act of lighting them rendered subversive. Today, it’s easy to wonder how things would have turned out if the light-burners had won the debate. What if fire-lighting had been recognized as a benign and necessary form of stewardship? How many current environmental crises might have been averted if the relationship between people, landscape, and fire had not been so misunderstood?\\n\\nSome wildland firefighters describe the fire bug as something that’s always existed inside them, like a kink in their DNA. I ask them: When did you light your first fire? They were often surprisingly young. The late ecologist and fire advocate Herbert Stoddard set his first fire when he was five years old. During the late eighteen-hundreds, in Florida, where he grew up, it was normal for women and children to set fires around their homes, burning away underbrush to prevent future conflagrations and maintain the open longleaf pine forests.\\n\\nIn a book called “Pathological Firesetting,” from 1951, the authors described fire-setters as people who indulge in “the magical power of the child to create and destroy whole worlds.” I think of that line when I watch my six-year-old sit inside his outdoor fort, feeding twigs to the fires he starts in a little stone hearth. His state of trance-like reverie reminds me that our species’ connection to fire is very, very old. Maybe we all have the kink. I’ve set fires in oak woodlands, longleaf pine savannahs, marshes, pocosin, bottomland hardwood swamps, and ponderosa pine forests. Sleeping on the ground or in the back of rental cars, far from my kids, tired and dirty, I’ve sometimes thought I might have gone mad.\\n\\nBut I keep seeking out fire because of what I learn. In thousands of small instances, fire has shown me how death is a necessary ecological condition of life. I have seen that people are not fundamentally ruinous—just currently remiss in their responsibilities to the land. My competence in the work itself, utilizing machines, tools, and my own hands, never fails to quiet some thrumming tension in my core, a need to be an instrument for fulfilling responsibilities. I’ve set fires alongside little kids and people in their seventies. I’m determined to be a student of fire into old age myself. Meanwhile, the fire bug continues to afflict me. Some months ago, I found out I was going to have another child. The first thing I did was count on my fingers. With relief, I calculated that I would have just enough time after giving birth to get into shape and return to the prairie to burn in the spring. ♦'],\n", + " ['Wisconsin Experiencing High Fire Danger Statewide',\n", + " None,\n", + " \"MADISON, Wis. – As temperatures rise and more people get outside early this week, the Wisconsin Department of Natural Resources (DNR) asks the public to stay vigilant and avoid burning because of high fire danger statewide.\\n\\nLack of snow cover and accumulated rain statewide are causing an early start to Wisconsin's wildfire season. Over the past weekend alone, the DNR responded to 15 wildfires burning nearly 30 acres.\\n\\nThe forecast for the early part of this week indicates warm temperatures, low humidity and breezy conditions. On windy, dry days, embers from any fire, especially burn piles and campfires, can easily escape control and cause a wildfire if not properly extinguished.\\n\\nTo help keep Wisconsinites safe, the DNR asks you to avoid all outdoor burning, including limiting the use of campfires and bonfires. Be sure to discard hot ashes from woodstoves or fireplaces in a metal container until cold. Outdoor enthusiasts should also use caution with off-road vehicles or equipment that can create a spark and start a fire.\\n\\nThe DNR is suspending annual burn permits in 25 counties where the DNR has protection responsibility. Check with local authorities if your property is outside the DNR's jurisdiction.\\n\\nSo far in 2024, the DNR has already responded to over 50 fires burning 160 acres; the 10-year average is three wildfires burning two acres. The majority of these recent wildfires have been related to debris burning.\\n\\nCheck before you burn. Remember – fire danger and burning restrictions change every day.\\n\\nFIRE SAFETY TIPS\\n\\nAvoid outdoor burning until conditions improve. Burn permits for debris burning are currently suspended in numerous counties.\\n\\nOperate equipment (chainsaws, off-road vehicles, lawnmowers, etc.) early in the morning or late in the day to avoid sparks at peak burn hours.\\n\\nSecure dragging trailer chains.\\n\\nReport fires early by calling 911.\\n\\nCheck current fire danger, wildfire reports and burning restrictions on the DNR website.\\n\\nFor a more comprehensive view of current fire activity, visit the DNR's wildfire dashboard.\"],\n", + " ['West Monument Creek Fire burns on the U.S. Air Force Academy',\n", + " datetime.datetime(2024, 2, 27, 0, 14, 12, tzinfo=tzutc()),\n", + " 'West Monument Creek Fire burns on the U.S. Air Force Academy\\n\\nThe West Monument Creek Fire started on the U.S. Air Force Academy Sunday, Feb. 25 and continues to burn Monday. As of 4 p.m. Feb. 26, the fire had burned 168 acres and was not fully contained. (U.S. Air Force photo by Trevor Cokley)\\n\\nU.S. Air Force Academy Strategic Communications\\n\\nU.S. AIR FORCE ACADEMY, Colo. – Crews from the U.S. Air Force Academy, Colorado Springs, U.S. Forest Service and Fort Carson Directorate of Emergency Services fire departments continued Monday, Feb. 26 to fight the West Monument Creek Fire.\\n\\nAs of Monday evening the fire had burned 168 acres and was 50% contained.\\n\\nThe installation remains closed to the general public until further notice. The Child Development Centers and Military Treatment Facility are scheduled to reopen for regular hours Tuesday. If the situation changes due to fire or weather conditions, updates will be released Tuesday morning. The Visitor Center remains closed, and some road closures remain in effect.\\n\\nThe fire was first spotted about 1 p.m. on Sunday, Feb. 25 on the south end of the Academy, in an area west of the Pine Valley housing area. Crews from the Academy and Colorado Springs Fire Department began battling the blaze after first spotting it.\\n\\n“This has truly been a team effort from across the region, and a huge thank you to everyone on the front line that is supporting this response,” said Lt. Gen. Richard Clark, U.S. Air Force Academy superintendent. “We are so impressed by the teamwork that has gone into keeping our community and Academy safe.”\\n\\nFire Station 1 responds to the West Monument Creek Fire that started on the U.S. Air Force Academy Sunday, Feb. 25 and continues to burn Monday. (U.S. Air Force photo by Rayna Grace)\\n\\nA UH-60 Black Hawk and two CH-47 Chinook helicopters dropped more than 14,000 gallons of water, targeting hot spots in the affected area.\\n\\nU.S. Forest Service provided a 10-person hand crew and several management resources. The hand crew is working in steep terrain to clear grass and brush to prevent further spread of the fire.\\n\\nCSFD has provided two Type 6 brush trucks, one Type 3 wildland engine, and dozens of personnel to assist. Their Special Operations Unit drone provided up-to-the minute aerial updates to the incident commander when other aircraft were unable to fly due to weather.\\n\\nAn Army CH-47 Chinook from Fort Carson, Colorado drops water on the West Monument Creek Fire at the U.S. Air Force Academy Monday, Feb. 26, 2024. (U.S. Air Force photo by Justin Pacheco)\\n\\nThe Academy’s Emergency Family Assistance Center (EFAC) remained operational Monday evening to provide support to Academy military membesr, families and government civilians seeking assistance.\\n\\nMore photos are available on Flickr.'],\n", + " ['Libraries collections largely unscathed after Feb. 9 fire',\n", + " datetime.datetime(2024, 2, 14, 15, 48, 10, tzinfo=tzoffset(None, -25200)),\n", + " 'Norlin Library’s collections escaped serious damage in the early morning fire on Friday, Feb. 9. Even so, library preservationists and fire remediators are working to return all collections to service as soon as possible.\\n\\nAs soon as the fire was extinguished and they were cleared to enter the building, Conservator Hillary Morgan and Head of Preservation & Collection Care Section Megan Lambert got to work assessing the damage to the collection and planning the recovery.\\n\\n“Fortunately there was minimal damage to the collection since the fire was put out quickly,” said Lambert. “Materials closest to the source of the fire had ash and soot which included reference, popular reading, periodicals and some display collections. From our initial assessment, very few, if any, materials will need to be replaced.”\\n\\nThe University Libraries are working with SERVPRO, a restoration service company, to clean up and mitigate most of the damage including removing soot off the surface of books using HEPA filtered vacuums and vulcanized rubber sponges that pick up particulates.\\n\\n“Soot and smoke can be abrasive,” explained Morgan. “While mechanically cleaning materials, especially more fragile materials, we have to go slowly otherwise we can cause more damage and staining.”\\n\\nSERVPRO is working around the clock to clean library spaces and collections. Once they have finished, Preservation will review the insides of the books in the area and conduct spot checks to make sure there isn’t any extra cleaning that needs to be done.\\n\\n“We are hoping that air and surface cleaning will prevent any residual smoke smell on the books,” said Lambert. “Many of the materials in the vicinity have glossy coated paper and use library binding covers which are the most durable covers you can have in a library. They are easier to clean and less likely to have particulates ingrained in compared to cloth or leather.”\\n\\nAfter the fire, the library was aired out to help dissipate the smoke smell and SERVPRO is using negative air machines which pull the air through filters to decrease all the particulates.\\n\\nAdditionally, a few paintings were affected with soot damage on the surface and behind the canvas. Because soot is acidic, the Libraries are working with Colorado Art Restoration Services (CARS) to test the paintings for damage and assess the need for cleaning.\\n\\nA CU Boulder alum and former Libraries preservation student worker, Mak Morton, now works for CARS and is helping with the restoration. While a student majoring in Art History, Morton was a recipient of the Laughing Goat Scholarship and helped to restore a collection of historic glacier and ice maps so they are available to researchers.\\n\\nFor the most current information on Norlin access and libraries services, visit the Libraries website.'],\n", + " ['Homedics Recalls Massagers Due to Fire and Burn Hazards',\n", + " None,\n", + " 'Description:\\n\\nThis recall involves the HoMedics Therapist Select Massagers with model number HHP-715. The product has a 120 VAC, 60 HZ power cord. “HoMedics” is printed on the side of the barrel of the product. The products are black with a handle, housing and massage head attached to the end. The product has four interchangeable massage heads.\\n\\nOnly manufacturing dates through the end of 2022 and prior are included in the recall. The manufacturing date is represented by a date code found on a sticker on the underside of the product’s barrel. Date codes are a 4-digit number WWYY where WW is the sequential week of the year and YY is the last two digits of the manufacturing year. Only products with a YY of 20, 21 or 22 are subject to this recall.'],\n", + " ['State of Delaware News',\n", + " datetime.datetime(2024, 2, 27, 21, 48, 1, tzinfo=tzutc()),\n", + " 'February 27, 2024\\n\\nThe Delaware State Fire Marshal’s Office is currently investigating a house fire that occurred this morning in Newark, DE.\\n\\nThe Aetna Hose Hook and Ladder Company of Newark was notified of a fire in the 200 block of Emma Way in the Shelley Farms community, shortly after 4:00 AM. Upon arrival they found heavy smoke and fire coming from the residence. Mutual aid companies responded to assist with extinguishment. A mayday was called for a possible missing Firefighter but was quickly cleared after utilizing the Fireground Accountability Protocol used by all Fire Departments in Delaware.\\n\\nDeputy Fire Marshal’s were called to the scene to determine the origin and cause of the fire. The origin of the fire was determined to be in a bedroom and the cause is still under investigation.\\n\\nTwo residents were transported to Christiana Hospital for minor injuries and smoke inhalation. No other injuries were reported. The damage to the residence is estimated at approximately $150,000.\\n\\nChief Deputy Robert Fox\\n\\nDelaware State Fire Marshal’s Office\\n\\n2307 MacArthur Dr\\n\\nNew Castle, Delaware 19720\\n\\n(302) 323-5375 Office'],\n", + " ['OSHA Proposes to Replace Its Existing Fire Brigades Standard With New Comprehensive Emergency Response Standard',\n", + " None,\n", + " 'Quick Hits\\n\\nOSHA has proposed a rule that would transform the Fire Brigades standard into an Emergency Response standard.\\n\\nThe proposed rule would cover not only firefighting, but also emergency medical services, tactical rescue, and the equivalent services offered in workplaces by employer-provided services.\\n\\nComments are due no later than May 6, 2024.\\n\\nWith the potential exception of the COVID-19 standards OSHA created, no other proposal for a change in OSHA standards in recent memory has been as comprehensive as these changes. Comments on the proposal are due on or before May 6, 2024.\\n\\nThe 250-page NPRM transforms 29 C.F.R. § 1910.156 into an entirely new standard, but it also makes important changes to:\\n\\n29 C.F.R. § 1910.6 Incorporation by reference\\n\\n29 C.F.R. § 1910.120 Hazardous waste operations and emergency response and Appendix B\\n\\n29 C.F.R. § 1910.134 Respiratory protection\\n\\n29 C.F.R. § 1910.155 Scope, application and definitions applicable to this subpart\\n\\n29 C.F.R. § 1910.157 Portable fire extinguishers\\n\\n29 C.F.R. § 1910.158 Standpipe and hose systems\\n\\n29 C.F.R. § 1910.159 Automatic sprinkler systems\\n\\nMost of the changes to these standards are modest. The last two, 29 C.F.R. §§ 1910.158 and 1910.159, essentially do nothing more than require that those systems are compatible with what local fire departments use to couple with them.\\n\\nOverview of Requirements\\n\\nThere is no way this article can cover in detail all of the nuances of the proposed standard, given its length and complexity, but those requirements include:\\n\\nOrganization of the Workplace Emergency Response Team, Workplace Emergency Response Employer, or Emergency Service Organization; Establishing the organization’s emergency response plan; Establishing the organization’s emergency service(s) capability(ies); Team member and responder participation; Creation of a risk management plan; Responder medical and physical requirements; Minimum training requirement; Facility preparedness activities; Equipment and personal protective equipment (PPE) requirements; Vehicle preparedness and operational requirements; Pre-incident planning requirements; Incident Management System (IMS) creation; Creation of emergency incident protocols; Post-incident analysis; and Program analysis.\\n\\nNational Fire Protection Association Standards\\n\\nThe proposed Emergency Response standard incorporates by reference twenty-two National Fire Protection Association (NFPA) standards, most of which are focused on firefighting training, personal protective equipment, health and safety requirements, and apparatus. Though the standard relates to other types of emergency response, much of it does so with a strong firefighting influence.\\n\\nESOs and WEREs\\n\\nThe new version of 29 C.F.R. § 1910.156 is divided into nineteen sections and categorizes subject employers as either Emergency Service Organizations (ESOs) or Workplace Emergency Response Employers (WEREs), sometimes referred to as Workplace Emergency Response Teams (WERTs). WEREs and WERTs are often found in chemical plants, refineries, large manufacturing facilities, and power generation facilities where the size of the workforce and the hazards associated with an emergency are such that the owners and operators of those facilities find value in having on-site fire departments and/or emergency medical responders. While there is a differentiation between these types of entities, many of the requirements applicable to ESOs are identical to those for WEREs/WERTs.\\n\\nVehicle Preparedness and Operation\\n\\nThere are some serious questions about OSHA’s authority with regard to portions of the proposed standard, such as those related to vehicle preparedness and operational requirements. On-road vehicle design has never been subject to OSHA oversight and traditionally has fallen under the U.S. Department of Transportation’s and/or the National Highway Traffic Safety Administration’s purview. Regardless, this standard would require compliance with standards created by OSHA, as well as those established by the NFPA. Similarly, on-road vehicle operation has never been subject to OSHA regulation, but this standard would do so as it relates to emergency response vehicles. Depending on how OSHA applies the standard, it even goes as far as to the personal vehicles operated by volunteer fire fighters or emergency medical responders who use their own vehicles to respond to emergencies.\\n\\nVolunteer Services\\n\\nOSHA clearly intends that volunteer fire departments and volunteer emergency medical services be covered by this standard, and the NPRM discusses employment law cases that relate to control and remuneration for services provided. While many of these volunteer services are public or quasi-public entities, in certain parts of the country they are private organizations, and, as a result, these private organizations would likely be subject to coverage under this standard. As state plans would be required to adopt the standard or the equivalent, this would require public and quasi-public entities in those states to comply with the standard.\\n\\nEmergency Responders’ Physical and Mental Health\\n\\nNearly 80 percent of paramedics self-reported being overweight or obese, according to a 2015 study cited by OSHA in the proposed rule, and for many years there have been shortages of emergency medical service employees, yet the proposed standard calls for the creation of minimum medical requirements commensurate with the requirements of the position, ongoing medical evaluations, and fitness programs. There are no exceptions for provider disability or age contained within the standard. Moreover, mental health services would have to be provided (though they can be provided through community-based resources) to providers engaged in “potentially traumatic events.”\\n\\nKey Takeaways\\n\\nIf the proposed rule is adopted, the impact on subject employers will be massive and compliance difficult. Most of the providers of the sort of services covered by the proposed standard are private employers that augment services provided by public or quasi-public providers. While the proposed standard, if adopted, would require the creation of mutual aid agreements for a number of reasons, the efforts to negotiate those agreements will be influenced considerably by the fact that the public and quasi-public entities are not compelled to do so. Private entities providing services in the same communities may allow competition to serve as barriers to creating such agreements.\\n\\nOgletree Deakins’ Workplace Safety and Health Practice Group will continue to monitor developments and will publish updates on the Workplace Safety and Health blog as additional information becomes available. Further information on OSHA’s Emergency Response proposed rule is available via the firm’s recent podcast program.\\n\\nTo learn more about OSHA’s proposed rule to replace its existing Fire Brigades standard, please join us at Ogletree Deakins’ national Workplace Strategies program, which will feature a breakout session entitled “To Inspections and Beyond: The Latest From OSHA (and Cal/OSHA).” This annual seminar, which will be held May 1–4, 2024, at the historic Washington Hilton in Washington, D.C., is the premier event of its kind for sophisticated HR professionals, in-house counsel, and other business professionals.\\n\\nFollow and Subscribe\\n\\nLinkedIn | Instagram | Webinars | Podcasts']]" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "article" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "datetime.datetime(2024, 2, 27, 21, 48, 1, tzinfo=tzutc())" + ] + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "article[6][1]" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'27-02-2024'" + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "formatted_date = article[6][1].strftime(\"%d-%m-%Y\")\n", + "formatted_date" ] }, { diff --git a/DOTS/ b/DOTS/ deleted file mode 100644 index ffbf2fd..0000000 --- a/DOTS/ +++ /dev/null @@ -1,296 +0,0 @@ -# conda create -n DT ipykernel -# python -m ipykernel install --user --name DT -# pip install torch bs4 transformers spacy numpy pandas scikit-learn scipy nltk -from bs4 import BeautifulSoup -import argparse, signal -from tqdm import tqdm -from datetime import datetime -import numpy as np -import concurrent.futures -from dotenv import load_dotenv -from opensearchpy import OpenSearch -from sklearn.metrics.pairwise import cosine_similarity -from sklearn.feature_extraction.text import CountVectorizer -from transformers import AutoModel, AutoTokenizer -import torch, spacy,nltk,subprocess, json, requests,string,csv,logging,os -try: -'tokenizers/punkt') -except LookupError: -'punkt') -load_dotenv() -os_url = os.getenv('OS_TOKEN') - -# Setup logging -logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s') - -# Setup argument parser -parser = argparse.ArgumentParser(description='Process OS data for dynamic features.') -parser.add_argument('-n', type=int, default=1000, help='Number of data items to get') -parser.add_argument('-f', type=int, default=3, help='Number of features per item to get') -parser.add_argument('-o', type=str, default='dots_feats.csv', help='Output file name') -parser.add_argument('-p', type=int, default=1, help='Parallelize requests') -parser.add_argument('-d', type=int, default=1, help='data size, 0 for small, 1 for large') -# parser.add_argument('-e', type=datetime, default=20231231, help='end date') -args, unknown = parser.parse_known_args() - -# Load models and tokenizers -model_name = "distilroberta-base" -model = AutoModel.from_pretrained(model_name) -tokenizer = AutoTokenizer.from_pretrained(model_name) -# !python -m spacy download en_core_web_sm -nlp = spacy.load('en_core_web_sm') - -# Define constants -n_gram_range = (1, 2) -stop_words = "english" -embeddings=[] -device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - -def handler(signum, frame): - raise TimeoutError() -signal.signal(signal.SIGALRM, handler) - - -# Define functions -def get_data(n=args.n): # , s=args.s, e=args.e): - bash_command = f""" - curl -X GET "{os_url}/emergency-management-news/_search?scroll=5m" -H 'Content-Type: application/json' -d '{{ - "_source": ["metadata.GDELT_DATE", "metadata.page_title","metadata.DocumentIdentifier", "metadata.Organizations","metadata.Persons","metadata.Themes","metadata.text", "metadata.Locations"], - "size": {n}, - "query": {{ - "bool": {{ - "must": [ - {{"match_all": {{}}}} - ] - }} - }} - }}' - """ - process =, shell=True, capture_output=True, text=True) - output = process.stdout - data = json.loads(output) - return data - - -def get_massive_data(n=args.n): - client = OpenSearch(os_url) - query = { - "size": str(n), - "timeout": "10s", - "slice": { - "id": 0, - "max": 10 - }, - "query": { - "bool": { - "must": [ - {"match_all": {}}, - ]} - }, - "_source": ["metadata.GDELT_DATE", "metadata.page_title","metadata.DocumentIdentifier", "metadata.Organizations","metadata.Persons","metadata.Themes","metadata.text", "metadata.Locations"], - } - response = - scroll='1m', - body=query, - ) - - return response, client - - -def process_hit(hit): - text = [] - source = hit['_source'] - date = datetime.strptime(source['metadata']['GDELT_DATE'], "%Y%m%d%H%M%S") - date = formatted_date = date.strftime("%d-%m-%Y") - loc = source['metadata']['Locations'] - loc = loc.replace("'", '"') # json requires double quotes for keys and string values - try: - list_of_dicts = json.loads(loc) - location_full_names = [dict['Location FullName'] for dict in list_of_dicts if 'Location FullName' in dict] - loc = location_full_names[0] - except: - loc = None - org = source['metadata']['Organizations'] - per = source['metadata']['Persons'] - theme = source['metadata']['Themes'].rsplit('_')[-1] - title = source['metadata']['page_title'] - url = source['metadata']['DocumentIdentifier'] - try: - response = requests.get(url) - except requests.exceptions.ConnectionError: # - logging.debug(f"timeout for {url}") - return text,date,loc,title - if response.status_code != 200: - logging.debug(f"Failed to get {url}") - return text,date,loc,title,org,per,theme - soup = BeautifulSoup(response.text, 'html.parser') - paragraphs = soup.find_all(['p']) - if not paragraphs: - logging.debug(f"No

tags in {url}") - return text,date,loc,title,org,per,theme - for p in paragraphs: - text.append(p.get_text()) - return text,date,loc,title,org,per,theme - - -def process_data(hits,fast=args.p): - articles = [] - results=[] - hits = data['hits']['hits'] - - for hit in tqdm(hits, desc="attempting to grab text from url"): - if fast==0: - try: - results.append(process_hit(hit)) - signal.alarm(5) - except: - logging.debug(f"Grabbing the url stalled after 5s, skipping...") - pass - else: - e = concurrent.futures.ThreadPoolExecutor() - try: - future = e.submit(process_hit, hit) - result = future.result(timeout=5) # Set timeout for 5 seconds - results.append(result) - except concurrent.futures.TimeoutError: - logging.debug(f"Grabbing the url stalled after 5s, skipping...") - - with open('DOTS/input/feat_input.csv', 'w') as file: - writer = csv.writer(file) - for text,date,loc,title,org,per,theme in results: - if loc is None: - logging.debug(f"No location info, grabbing from org...") - loc = org - if text == None or text == []: - logging.debug(f"No text from url available, using org/persons/theme instead...") - articles.append([loc,date,None,org,per,theme]) - writer.writerow([loc,date,None,org,per,theme]) - writer.writerow(['\n']) - else: - articles.append([loc,date,text,None,None,None]) - writer.writerow([loc,date,text,None,None,None]) - writer.writerow(['\n']) - signal.alarm(0) - return articles - - -def process_response(response): - hits = response["hits"]["hits"] - output=[] - for hit in hits: - source = hit["_source"] - date = datetime.strptime(source['metadata']['GDELT_DATE'], "%Y%m%d%H%M%S") - date = formatted_date = date.strftime("%d-%m-%Y") - loc = source['metadata']['Locations'] - loc = loc.replace("'", '"') # json requires double quotes for keys and string values - try: - list_of_dicts = json.loads(loc) - location_full_names = [dict['Location FullName'] for dict in list_of_dicts if 'Location FullName' in dict] - loc = location_full_names[0] - except: - loc = None - org = source['metadata']['Organizations'] - per = source['metadata']['Persons'] - theme = source['metadata']['Themes'].rsplit('_')[-1] - title = source['metadata']['page_title'] - url = source['metadata']['DocumentIdentifier'] - output.append([date, loc, title, org, per, theme, url]) - - pagination_id=response['_scroll_id'] - return pagination_id, output - - - - -def chunk_text(text, max_len): - tokens = nltk.word_tokenize(text) - num_chunks = len(tokens) // max_len - final_chunk_size = len(tokens) % max_len -"chunking artcile into {num_chunks} chunks") - # If the final chunk is too small, distribute its tokens among the other chunks - if final_chunk_size < max_len / 2: - num_chunks += 1 - chunk_sizes = [len(tokens) // num_chunks + (1 if i < len(tokens) % num_chunks else 0) for i in range(num_chunks)] - chunks = [tokens[sum(chunk_sizes[:i]):sum(chunk_sizes[:i+1])] for i in range(num_chunks)] - else: - chunks = [tokens[i:i + max_len] for i in range(0, len(tokens), max_len)] - return chunks - - -def featurize_stories(text, top_k, max_len): - # Extract candidate words/phrases - count = CountVectorizer(ngram_range=n_gram_range, stop_words=stop_words).fit([text]) - all_candidates = count.get_feature_names_out() - doc = nlp(text) - noun_phrases = set(chunk.text.strip().lower() for chunk in doc.noun_chunks) - nouns = set() - datetimes = set() - for token in doc: - if token.pos_ == "NOUN": - nouns.add(token.text) - if token.ent_type_ == "DATE": - datetimes.add(token.text) # no need to tokenize dates, just add to feature list artificially by repeating - all_nouns = nouns.union(noun_phrases) - candidates = list(filter(lambda candidate: candidate in all_nouns, all_candidates)) - candidate_tokens = tokenizer(candidates, padding=True, return_tensors="pt") - # if device == 'cuda': - candidate_tokens = {k: for k, v in (candidate_tokens).items()} - candidate_embeddings = model(**candidate_tokens)["pooler_output"] - candidate_embeddings = candidate_embeddings.detach() # .to_numpy - chunks = chunk_text(text, max_len) # use this to chunk better and use less padding thus less memory but also less affect from averging - for chunk in chunks: - text_tokens = tokenizer(chunk, padding=True, return_tensors="pt") - # if device == 'cuda': - text_tokens = {k: for k, v in (text_tokens).items()} - text_embedding = model(**text_tokens)["pooler_output"] - text_embedding = text_embedding.detach()#.to_numpy() - embeddings.append(text_embedding) - max_emb_shape = max(embedding.shape[0] for embedding in embeddings) - padded_embeddings = [np.pad(embedding.cpu(), ((0, max_emb_shape - embedding.shape[0]), (0, 0))) for embedding in embeddings] - avg_embedding = np.min(padded_embeddings, axis=0) - distances = cosine_similarity(avg_embedding, candidate_embeddings.cpu()) - torch.cuda.empty_cache() - return [candidates[index] for index in distances.argsort()[0][::-1][-top_k:]] - - - -# Main pipeline -def main(args): - if args.d == 0: - data = get_data(args.n) - articles = process_data(data) - else: - response, client = get_massive_data(args.n) - pagination_id = response["_scroll_id"] - hits = response["hits"]["hits"] - articles=[] - while len(hits) != 0: - # try: - response = client.scroll( - scroll='5m', - scroll_id=pagination_id - ) - hits = response["hits"]["hits"] - pagination_id, article = process_response(response) - articles.append(article) - # except: - # print("A ConnectionTimeout error occurred.") - # pass - articles = [item for sublist in articles for item in sublist] - rank_articles=[] - for i in tqdm(articles, desc="featurizing articles"): - foreparts=str(i).split(',')[:2] # location and date - meat="".join(str(i).split(',')[2:-3]) # text - aftparts=str(i).split(',')[-3:] # org, per, theme - try: - cc=featurize_stories(str(i), top_k = args.f, max_len=512) - rank_articles.append([foreparts,cc]) - except Exception as e: - logging.error(f"Failed to process article: {e}") - with open('DOTS/output/'+args.o, 'w', newline='') as file: - writer = csv.writer(file) - writer.writerows(rank_articles) - -if __name__ == "__main__": - main(args) diff --git a/DOTS/ b/DOTS/ new file mode 100644 index 0000000..36b972a --- /dev/null +++ b/DOTS/ @@ -0,0 +1,147 @@ +# conda create -n DT ipykernel +# python -m ipykernel install --user --name DT +# pip install torch bs4 transformers spacy numpy pandas scikit-learn scipy nltk +import argparse, signal +from tqdm import tqdm +from datetime import datetime +import numpy as np +from sklearn.metrics.pairwise import cosine_similarity +from sklearn.feature_extraction.text import CountVectorizer +from transformers import AutoModel, AutoTokenizer +import torch, spacy,nltk,subprocess, json, requests,string,csv,logging,os +from .scrape import get_OS_data, get_massive_OS_data, get_google_news +from .pull import process_hit, process_data +try: +'tokenizers/punkt') +except LookupError: +'punkt') + + +# Setup logging +logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s') + +# Setup argument parser +parser = argparse.ArgumentParser(description='Process OS data for dynamic features.') +parser.add_argument('-n', type=int, default=100, help='Number of data items to get') +parser.add_argument('-f', type=int, default=3, help='Number of features per item to get') +parser.add_argument('-o', type=str, default='dots_feats.csv', help='Output file name') +# parser.add_argument('-p', type=int, default=1, help='Parallelize requests') +parser.add_argument('-t', type=int, default=1, help='Scroll Timeout in minutes, if using "d=1" large data set') +parser.add_argument('-d', type=int, default=1, help='0 for a small amount, 1 for large, 2 for google news, 3 for NPR') +# parser.add_argument('-e', type=datetime, default=20231231, help='end date') +args, unknown = parser.parse_known_args() + +# Load models and tokenizers +model_name = "distilroberta-base" +model = AutoModel.from_pretrained(model_name) +tokenizer = AutoTokenizer.from_pretrained(model_name) +# !python -m spacy download en_core_web_sm +nlp = spacy.load('en_core_web_sm') + +# Define constants +n_gram_range = (1, 2) +stop_words = "english" +embeddings=[] +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# Define functions +def chunk_text(text, max_len): + tokens = nltk.word_tokenize(text) + num_chunks = len(tokens) // max_len + final_chunk_size = len(tokens) % max_len +"chunking artcile into {num_chunks} chunks") + # If the final chunk is too small, distribute its tokens among the other chunks + if final_chunk_size < max_len / 2: + num_chunks += 1 + chunk_sizes = [len(tokens) // num_chunks + (1 if i < len(tokens) % num_chunks else 0) for i in range(num_chunks)] + chunks = [tokens[sum(chunk_sizes[:i]):sum(chunk_sizes[:i+1])] for i in range(num_chunks)] + else: + chunks = [tokens[i:i + max_len] for i in range(0, len(tokens), max_len)] + return chunks + + +def featurize_stories(text, top_k, max_len): + # Extract candidate words/phrases + count = CountVectorizer(ngram_range=n_gram_range, stop_words=stop_words).fit([text]) + all_candidates = count.get_feature_names_out() + doc = nlp(text) + noun_phrases = set(chunk.text.strip().lower() for chunk in doc.noun_chunks) + nouns = set() + datetimes = set() + for token in doc: + if token.pos_ == "NOUN": + nouns.add(token.text) + if token.ent_type_ == "DATE": + datetimes.add(token.text) # no need to tokenize dates, just add to feature list artificially by repeating + all_nouns = nouns.union(noun_phrases) + candidates = list(filter(lambda candidate: candidate in all_nouns, all_candidates)) + candidate_tokens = tokenizer(candidates, padding=True, return_tensors="pt") + # if device == 'cuda': + candidate_tokens = {k: for k, v in (candidate_tokens).items()} + candidate_embeddings = model(**candidate_tokens)["pooler_output"] + candidate_embeddings = candidate_embeddings.detach() # .to_numpy + chunks = chunk_text(text, max_len) # use this to chunk better and use less padding thus less memory but also less affect from averging + for chunk in chunks: + text_tokens = tokenizer(chunk, padding=True, return_tensors="pt") + # if device == 'cuda': + text_tokens = {k: for k, v in (text_tokens).items()} + text_embedding = model(**text_tokens)["pooler_output"] + text_embedding = text_embedding.detach()#.to_numpy() + embeddings.append(text_embedding) + max_emb_shape = max(embedding.shape[0] for embedding in embeddings) + padded_embeddings = [np.pad(embedding.cpu(), ((0, max_emb_shape - embedding.shape[0]), (0, 0))) for embedding in embeddings] + avg_embedding = np.min(padded_embeddings, axis=0) + distances = cosine_similarity(avg_embedding, candidate_embeddings.cpu()) + torch.cuda.empty_cache() + return [candidates[index] for index in distances.argsort()[0][::-1][-top_k:]] + + + +# Main pipeline +def main(args): + if args.d == 0: + data = get_OS_data(args.n) + articles = process_data(data) + # articles = process_response(data) + dname='small0_' + elif args.d == 1: + response, client = get_massive_OS_data(args.n,args.t) + pagination_id = response["_scroll_id"] + hits = response["hits"]["hits"] + articles=[] + while len(hits) != 0: + response = client.scroll( + scroll=str(args.t)+'m', + scroll_id=pagination_id + ) + hits = response["hits"]["hits"] + article = process_data(response) + articles.append(article) + articles = [item for sublist in articles for item in sublist] + dname='large1_' + elif args.d == 2: + articles= get_google_news('disaster') + dname='google2_' + elif args.d == 3: + articles= get_npr_news('disaster') + dname='npr3_' + rank_articles=[] + for i in tqdm(articles, desc="featurizing articles"): + try: + foreparts=str(i).split(',')[:2] # location and date + except: + foreparts=None + meat="".join(str(i).split(',')[2:-3]) # text + try: + cc=featurize_stories(str(i), top_k = args.f, max_len=512) + rank_articles.append([foreparts,cc]) + except Exception as e: + logging.error(f"Failed to process article: {e}") + + with open('DOTS/output/'+dname+args.o, 'w', newline='') as file: + writer = csv.writer(file) + writer.writerows(rank_articles) + +if __name__ == "__main__": + main(args) diff --git a/DOTS/input/feat_input.csv b/DOTS/input/feat_input.csv index 05ee96a..8f6c8e8 100644 --- a/DOTS/input/feat_input.csv +++ b/DOTS/input/feat_input.csv @@ -1,15 +1,30 @@ -Yemen,16-01-2024,,"['white house', 'palestinian health ministry', 'european union', 'associated press', 'al jazeera', 'world economic forum in davos', 'island policy ministry', 'health ministry', 'united nations']","['wael dahdouh', 'khan younis', 'abdulrahman al', 'antonio guterres', 'khaled al-balshy', 'mohammed deif', 'yehya sinwar', 'sheikh mohammed', 'marwan issa', 'lolita baldor', 'beit lahia']","STAFFERS', '']" +,16-01-2024,Yemen,"Live updates | US bombs Yemen's Houthis, and Israeli strikes kill 158 in Gaza","['white house', 'palestinian health ministry', 'european union', 'associated press', 'al jazeera', 'world economic forum in davos', 'island policy ministry', 'health ministry', 'united nations']","['wael dahdouh', 'khan younis', 'abdulrahman al', 'antonio guterres', 'khaled al-balshy', 'mohammed deif', 'yehya sinwar', 'sheikh mohammed', 'marwan issa', 'lolita baldor', 'beit lahia']","STAFFERS', '']" " " -Gaza Strip,16-01-2024,"['Palestinians walk through destruction by the Israeli bombardment in the Nusseirat refugee camp in Gaza Strip, Tuesday, Jan. 16, 2024. Credit: AP/Adel Hana', ""RAFAH, Gaza Strip — Palestinian militants battled Israeli forces in devastated northern Gaza and launched a barrage of rockets from farther south on Tuesday in a show of force more than 100 days into Israel's massive air and ground campaign against the tiny coastal enclave."", ""The fighting in the north, which was the first target of Israel's offensive and where entire neighborhoods have been pulverized, showed how far Israel remains from achieving its goals of dismantling Hamas and returning scores of hostages captured in the Oct. 7 attack that sparked the war."", 'In other developments, France and Qatar, the Persian Gulf nation that helped mediate a previous cease-fire, said late Tuesday that they had brokered a deal between Israel and Hamas to deliver medicine to Israeli hostages in Gaza, as well as additional aid to Palestinians in the besieged territory.', 'France said it had been working since October on the deal, which will provide three months’ worth of medication for 45 hostages with chronic illnesses, as well as other medicines and vitamins. The medicines are expected to enter Gaza from Egypt on Wednesday.', 'It was the first known agreement between the warring sides since a weeklong truce in November.', 'Get the latest breaking news as it happens.', '', 'By clicking Sign up, you agree to our privacy policy.', ""Meanwhile, Gaza's humanitarian crisis is worsening, with 85% of the territory's 2.3 million Palestinians having fled their homes and U.N. agencies warning of mass starvation and disease. The conflict threatens to widen after the U.S. and Israel traded strikes with Iranian-backed groups across the region. "", 'Israel has vowed to crush Hamas’ military and governing capabilities to ensure that the Oct. 7 attack is never repeated. Militants stormed into Israel from Gaza that day, killing some 1,200 people, mostly civilians, and capturing around 250 people. With strong diplomatic and military support from the United States, Israel has resisted international calls for a cease-fire.', 'A Palestinian looks at the destruction after an Israeli strike at a residential building in Deir al Balah, Gaza Strip, Sunday, Jan. 14, 2024. Credit: AP/Adel Hana', 'Nearly half of the hostages were released during the truce, but more than 100 remain in captivity. Hamas has said it will not release any others until Israel ends the war.', 'STRIKES AND COUNTERSTRIKES ACROSS THE REGION', 'The longer the war goes on, the more it threatens to ignite other fronts across the region.', 'Iran fired missiles late Monday at what it said were Israeli “spy headquarters” in an upscale neighborhood near the sprawling U.S. Consulate in Irbil, the seat of Iraq’s northern semi-autonomous Kurdish region. Iraq and the U.S. condemned the strikes, which killed several civilians, and Baghdad recalled its ambassador to Iran in protest.', ""Israeli security forces search for assailants near the scene of a deadly car-ramming and stabbing attack at a bus stop, in Ra'anana, Israel, Monday, Jan. 15, 2024. Israeli police say a car-ramming and stabbing attack by Palestinians killed a woman and wounded at least 12 others north of Tel Aviv. The police say they arrested two Palestinian suspects from the West Bank. They say the suspects stole three different cars and attempted to run down pedestrians. Credit: AP/Oded Balilty"", 'Iranian-backed groups in Iraq and Syria have carried out dozens of attacks on bases housing U.S. forces, and a U.S. airstrike in Baghdad killed an Iranian-backed militia leader earlier this month.', 'Elsewhere, Iranian-backed Houthi rebels in Yemen have resumed their attacks on container ships in the Red Sea following a wave of U.S.-led strikes last week. The U.S. military carried out another strike Tuesday. Separately, it said two Navy SEALS are missing after a raid last week on a ship carrying Iranian-made missile parts and weapons bound for Yemen.', ""Israel and Lebanon's Hezbollah militant group have exchanged fire along the border nearly every day since the war in Gaza began. The strikes and counterstrikes have grown more severe since an Israeli strike killed Hamas' deputy political leader in Beirut this month, raising fears of a repeat of the 2006 war."", ""MILITANTS KEEP FIGHTING IN GAZA'S HARD-HIT NORTH"", ""In Gaza, the Israeli military said its forces located some 100 rocket installations and 60 ready-to-use rockets in the area of Beit Lahiya, a town on the territory's northern edge. Israeli forces killed dozens of militants during the operation, the military said, without providing evidence."", 'Mahmoud Abdel-Ghani, who lives in Beit Lahiya, said Israeli airstrikes hit several buildings on the eastern side of the town.', 'Hundreds of thousands of people fled northern Gaza, including Gaza City, following Israeli evacuation orders in October. Israel shut off water to the north in the opening days of the war, and hardly any aid has been allowed into the area, even as tens of thousands of people have remained there.', 'Residents reached by phone Tuesday described the heaviest fighting in weeks in Gaza City.', '“The bombing never stopped,” said Faris Abu Abbas, who lives in the Tel al-Hawa neighborhood. “The resistance is here and didn’t leave.”', 'Ayoub Saad, who lives near Shifa Hospital downtown, said he heard gunfire and shelling overnight and into Tuesday and saw dead and wounded people being brought to the hospital on carts.', ""After weeks of heavy fighting across northern Gaza, Israeli officials said at the start of the year that they were scaling back operations there. The focus shifted to the southern city of Khan Younis and built-up refugee camps in central Gaza dating back to the 1948 war surrounding Israel's creation."", ""But there too, they have encountered heavy resistance. The military said at least 25 rockets were fired into Israel on Tuesday, damaging a store in one of the strongest bombardments in more than a week. Israel's Channel 12 television said the rockets were launched from the Bureij camp in central Gaza."", 'A SPIRALING HUMANITARIAN CRISIS', "" Gaza’s Health Ministry said Tuesday that the bodies of 158 people killed in Israeli strikes have been brought to hospitals in the past 24 hours, bringing the war's overall death toll to 24,285. The ministry does not differentiate between civilian and combatant deaths but says around two-thirds of those killed were women and children."", ""Senior U.N. officials warned Monday that Gaza faces widespread famine and disease if more aid is not allowed in. While they did not directly blame Israel, they said aid delivery is hobbled by the opening of too few border crossings, a slow vetting process, and continuing fighting throughout the territory — all of which is largely under Israel's control."", 'U.N. Secretary-General Antonio Guterres said U.N. agencies and their partners “cannot effectively deliver humanitarian aid while Gaza is under such heavy, widespread and unrelenting bombardment.” At least 152 U.N. staffers have been killed in Gaza since the start of the war.', 'Israeli officials say they have placed no limits on humanitarian aid and have called on the U.N. to provide more workers and trucks to accelerate delivery.', 'Israel completely sealed off Gaza after Hamas’ Oct. 7 attack and only relented under U.S. pressure. The U.S., as well as the U.N., have continued to push Israel to ease the flow of aid.', 'Israel blames the high civilian death toll on Hamas because it fights in dense residential areas. Israel says its forces have killed roughly 8,000 militants, without providing evidence, and that 190 of its own soldiers have been killed in the Gaza offensive.', '___', 'Magdy reported from Cairo. Lidman reported from Tel Aviv, Israel. Associated Press writers Jon Gambrell in Jerusalem and Sylvie Corbet in Paris contributed to this report.', ' Body parts defendant arrested\xa0... Nursing home must pay ... Affordable housing ... Adventureland preview', 'Get more on these and other NewsdayTV stories', ' Body parts defendant arrested\xa0... Nursing home must pay ... Affordable housing ... Adventureland preview', 'Get more on these and other NewsdayTV stories', 'Privacy Policy |Terms of service |Subscription terms |Your ad choices |Cookie Settings |California Privacy Rights |About Us |Contact Newsday |Reprints & permissions |Advertise with Newsday |Help', 'Copyright ©2024 Newsday. All rights reserved.']",,, +"['Palestinians walk through destruction by the Israeli bombardment in the Nusseirat refugee camp in Gaza Strip, Tuesday, Jan. 16, 2024. Credit: AP/Adel Hana', ""RAFAH, Gaza Strip — Palestinian militants battled Israeli forces in devastated northern Gaza and launched a barrage of rockets from farther south on Tuesday in a show of force more than 100 days into Israel's massive air and ground campaign against the tiny coastal enclave."", ""The fighting in the north, which was the first target of Israel's offensive and where entire neighborhoods have been pulverized, showed how far Israel remains from achieving its goals of dismantling Hamas and returning scores of hostages captured in the Oct. 7 attack that sparked the war."", 'In other developments, France and Qatar, the Persian Gulf nation that helped mediate a previous cease-fire, said late Tuesday that they had brokered a deal between Israel and Hamas to deliver medicine to Israeli hostages in Gaza, as well as additional aid to Palestinians in the besieged territory.', 'France said it had been working since October on the deal, which will provide three months’ worth of medication for 45 hostages with chronic illnesses, as well as other medicines and vitamins. The medicines are expected to enter Gaza from Egypt on Wednesday.', 'It was the first known agreement between the warring sides since a weeklong truce in November.', 'Get the latest breaking news as it happens.', '', 'By clicking Sign up, you agree to our privacy policy.', ""Meanwhile, Gaza's humanitarian crisis is worsening, with 85% of the territory's 2.3 million Palestinians having fled their homes and U.N. agencies warning of mass starvation and disease. The conflict threatens to widen after the U.S. and Israel traded strikes with Iranian-backed groups across the region. "", 'Israel has vowed to crush Hamas’ military and governing capabilities to ensure that the Oct. 7 attack is never repeated. Militants stormed into Israel from Gaza that day, killing some 1,200 people, mostly civilians, and capturing around 250 people. With strong diplomatic and military support from the United States, Israel has resisted international calls for a cease-fire.', 'A Palestinian looks at the destruction after an Israeli strike at a residential building in Deir al Balah, Gaza Strip, Sunday, Jan. 14, 2024. Credit: AP/Adel Hana', 'Nearly half of the hostages were released during the truce, but more than 100 remain in captivity. Hamas has said it will not release any others until Israel ends the war.', 'STRIKES AND COUNTERSTRIKES ACROSS THE REGION', 'The longer the war goes on, the more it threatens to ignite other fronts across the region.', 'Iran fired missiles late Monday at what it said were Israeli “spy headquarters” in an upscale neighborhood near the sprawling U.S. Consulate in Irbil, the seat of Iraq’s northern semi-autonomous Kurdish region. Iraq and the U.S. condemned the strikes, which killed several civilians, and Baghdad recalled its ambassador to Iran in protest.', ""Israeli security forces search for assailants near the scene of a deadly car-ramming and stabbing attack at a bus stop, in Ra'anana, Israel, Monday, Jan. 15, 2024. Israeli police say a car-ramming and stabbing attack by Palestinians killed a woman and wounded at least 12 others north of Tel Aviv. The police say they arrested two Palestinian suspects from the West Bank. They say the suspects stole three different cars and attempted to run down pedestrians. Credit: AP/Oded Balilty"", 'Iranian-backed groups in Iraq and Syria have carried out dozens of attacks on bases housing U.S. forces, and a U.S. airstrike in Baghdad killed an Iranian-backed militia leader earlier this month.', 'Elsewhere, Iranian-backed Houthi rebels in Yemen have resumed their attacks on container ships in the Red Sea following a wave of U.S.-led strikes last week. The U.S. military carried out another strike Tuesday. Separately, it said two Navy SEALS are missing after a raid last week on a ship carrying Iranian-made missile parts and weapons bound for Yemen.', ""Israel and Lebanon's Hezbollah militant group have exchanged fire along the border nearly every day since the war in Gaza began. The strikes and counterstrikes have grown more severe since an Israeli strike killed Hamas' deputy political leader in Beirut this month, raising fears of a repeat of the 2006 war."", ""MILITANTS KEEP FIGHTING IN GAZA'S HARD-HIT NORTH"", ""In Gaza, the Israeli military said its forces located some 100 rocket installations and 60 ready-to-use rockets in the area of Beit Lahiya, a town on the territory's northern edge. Israeli forces killed dozens of militants during the operation, the military said, without providing evidence."", 'Mahmoud Abdel-Ghani, who lives in Beit Lahiya, said Israeli airstrikes hit several buildings on the eastern side of the town.', 'Hundreds of thousands of people fled northern Gaza, including Gaza City, following Israeli evacuation orders in October. Israel shut off water to the north in the opening days of the war, and hardly any aid has been allowed into the area, even as tens of thousands of people have remained there.', 'Residents reached by phone Tuesday described the heaviest fighting in weeks in Gaza City.', '“The bombing never stopped,” said Faris Abu Abbas, who lives in the Tel al-Hawa neighborhood. “The resistance is here and didn’t leave.”', 'Ayoub Saad, who lives near Shifa Hospital downtown, said he heard gunfire and shelling overnight and into Tuesday and saw dead and wounded people being brought to the hospital on carts.', ""After weeks of heavy fighting across northern Gaza, Israeli officials said at the start of the year that they were scaling back operations there. The focus shifted to the southern city of Khan Younis and built-up refugee camps in central Gaza dating back to the 1948 war surrounding Israel's creation."", ""But there too, they have encountered heavy resistance. The military said at least 25 rockets were fired into Israel on Tuesday, damaging a store in one of the strongest bombardments in more than a week. Israel's Channel 12 television said the rockets were launched from the Bureij camp in central Gaza."", 'A SPIRALING HUMANITARIAN CRISIS', "" Gaza’s Health Ministry said Tuesday that the bodies of 158 people killed in Israeli strikes have been brought to hospitals in the past 24 hours, bringing the war's overall death toll to 24,285. The ministry does not differentiate between civilian and combatant deaths but says around two-thirds of those killed were women and children."", ""Senior U.N. officials warned Monday that Gaza faces widespread famine and disease if more aid is not allowed in. While they did not directly blame Israel, they said aid delivery is hobbled by the opening of too few border crossings, a slow vetting process, and continuing fighting throughout the territory — all of which is largely under Israel's control."", 'U.N. Secretary-General Antonio Guterres said U.N. agencies and their partners “cannot effectively deliver humanitarian aid while Gaza is under such heavy, widespread and unrelenting bombardment.” At least 152 U.N. staffers have been killed in Gaza since the start of the war.', 'Israeli officials say they have placed no limits on humanitarian aid and have called on the U.N. to provide more workers and trucks to accelerate delivery.', 'Israel completely sealed off Gaza after Hamas’ Oct. 7 attack and only relented under U.S. pressure. The U.S., as well as the U.N., have continued to push Israel to ease the flow of aid.', 'Israel blames the high civilian death toll on Hamas because it fights in dense residential areas. Israel says its forces have killed roughly 8,000 militants, without providing evidence, and that 190 of its own soldiers have been killed in the Gaza offensive.', '___', 'Magdy reported from Cairo. Lidman reported from Tel Aviv, Israel. Associated Press writers Jon Gambrell in Jerusalem and Sylvie Corbet in Paris contributed to this report.', 'Privacy Policy |Terms of service |Subscription terms |Your ad choices |Cookie Settings |California Privacy Rights |About Us |Contact Newsday |Reprints & permissions |Advertise with Newsday |Help', 'Copyright ©2024 Newsday. All rights reserved.']",16-01-2024,Gaza Strip,"After over 100 days of war, Palestinians fight in hard-hit areas of Gaza and fire rockets at Israel","['gaza health ministry', 'united states', 'writer jon gambrell']","['khan younis', 'antonio guterres', 'faris abu abbas', 'adel hana', 'mahmoud abdel-ghani', 'oded balilty', 'deir al balah', 'ayoub saad']","ARAB', '']" " " -"Miseno, Campania, Italy",16-01-2024,"['Alessandro Tortora /', 'Italian archaeologists revealed a “monumental” Roman villa in early January, and its history is haunting.', 'Located in the Municipality of Bacoli, in the city of Naples, the enormous Roman villa had “the most spectacular accesses to the Miseno beach,” which is pretty much off-limits to modern-day Italians due to waste and brushwood, according to a press release from the city. The age of the villa is believed to be around the 1st century A.D., meaning the residents of this gorgeous home likely had a front-row seat to one of the most violent cataclysms in known history.', 'In 79 A.D., Mount Vesuvius began its famed eruption, destroying the town of Pompeii with a series of pyroclastic flows (superheated ash and debris), and leaving behind the casts of humans caught in its wake. And the residents of this newly-discovered Roman villa had the perfect view of the brutal event.', 'You can watch this news report, which shows the view from the villa. Even if you don’t speak Italian, the location is jaw-dropping enough to see from context.', 'Gli ultimi rinvenimenti archeologici, in prossimità di Punta Sarparella a #Bacoli, scoperti durante i lavori del Comune per la valorizzazione dell’area verde di Miseno, nel luogo dove sorgeva una costruzione abusiva.#miseno #bacoli #soprintendenzadinapoli_areamet @MiC_Italia', '— Mariano Nuzzo (@MarianoNuzzo) January 14, 2024', '', '“Perhaps this would have been the promontory from which Pliny the Elder, who held the position of Praefectus classis Misenensis, would have seen the eruption of Vesuvius, and then would have set sail for Stabiae, to help the inhabitants of the various coastal cities, threatened from the Vesuvian eruption,” the city wrote in their press release.', 'But can you imagine standing on your patio, looking out of the bay towards the island of Sicily, drinking your afternoon sangria, and watching as an entire mountain explodes and kills everyone in its path? The moment must have been on par or likely worse than those who watched the Hindenburg blimp disaster.', 'The villa was home to ten rooms, so who knows how many people may have been there, watching the devastation unfold. (RELATED: Scientists Reveal Ancient Cataclysm That Plunged The World Into Darkness 1,500 Years Ago)', 'Pompeii’s ruins weren’t rediscovered until 1709, which meant people went almost 1,700 years with zero knowledge of this horrific historical moment. It makes you wonder how many other natural disasters are hiding just beneath our feet.', '', '©2024 The Daily Caller, Inc.', 'The Daily Caller | 1775 Eye Street NW | Suite 1150-290 | Washington, DC 20006']",,, +"['Alessandro Tortora /', 'Italian archaeologists revealed a “monumental” Roman villa in early January, and its history is haunting.', 'Located in the Municipality of Bacoli, in the city of Naples, the enormous Roman villa had “the most spectacular accesses to the Miseno beach,” which is pretty much off-limits to modern-day Italians due to waste and brushwood, according to a press release from the city. The age of the villa is believed to be around the 1st century A.D., meaning the residents of this gorgeous home likely had a front-row seat to one of the most violent cataclysms in known history.', 'In 79 A.D., Mount Vesuvius began its famed eruption, destroying the town of Pompeii with a series of pyroclastic flows (superheated ash and debris), and leaving behind the casts of humans caught in its wake. And the residents of this newly-discovered Roman villa had the perfect view of the brutal event.', 'You can watch this news report, which shows the view from the villa. Even if you don’t speak Italian, the location is jaw-dropping enough to see from context.', 'Gli ultimi rinvenimenti archeologici, in prossimità di Punta Sarparella a #Bacoli, scoperti durante i lavori del Comune per la valorizzazione dell’area verde di Miseno, nel luogo dove sorgeva una costruzione abusiva.#miseno #bacoli #soprintendenzadinapoli_areamet @MiC_Italia', '— Mariano Nuzzo (@MarianoNuzzo) January 14, 2024', '', '“Perhaps this would have been the promontory from which Pliny the Elder, who held the position of Praefectus classis Misenensis, would have seen the eruption of Vesuvius, and then would have set sail for Stabiae, to help the inhabitants of the various coastal cities, threatened from the Vesuvian eruption,” the city wrote in their press release.', 'But can you imagine standing on your patio, looking out of the bay towards the island of Sicily, drinking your afternoon sangria, and watching as an entire mountain explodes and kills everyone in its path? The moment must have been on par or likely worse than those who watched the Hindenburg blimp disaster.', 'The villa was home to ten rooms, so who knows how many people may have been there, watching the devastation unfold. (RELATED: Scientists Reveal Ancient Cataclysm That Plunged The World Into Darkness 1,500 Years Ago)', 'Pompeii’s ruins weren’t rediscovered until 1709, which meant people went almost 1,700 years with zero knowledge of this horrific historical moment. It makes you wonder how many other natural disasters are hiding just beneath our feet.', '', '©2024 The Daily Caller, Inc.', 'The Daily Caller | 1775 Eye Street NW | Suite 1150-290 | Washington, DC 20006']",16-01-2024,"Miseno, Campania, Italy","Residents Of Once-Lost Roman Estate Likely Watched Thousands Die In Ancient Cataclysm, Researchers Claim",nan,"['punta sarparella a bacoli', 'mariano nuzzo marianonuzzo']","DISASTERS', '']" " " -"Clarendon, Clarendon, Jamaica",16-01-2024,"['\nThe police in St Catherine have now identified the licensed firearm holder who was shot dead at a cock fight in bushes in Old Harbour on Sunday.', '\n\tHe is 56-year-old\xa0Devon Gallimore of', '\n\tDavis district in Old Harbour and New Bowen in Clarendon.', '\n\tThe police say his weapon has still not been located.', '\n\tIt is reported that Gallimore was at the event when he received a phone call and moved away from the crowd.', '\n\tExplosions were subsequently heard.', '\n\tChecks revealed\xa0that he was shot in the head and was seen lying on his back.\xa0', '\n\tThe Old Harbour police are investigating the matter.', '\n\t- Rasbert Turner', '\nFollow The Gleaner on X, formerly Twitter, and Instagram @JamaicaGleaner and on Facebook @GleanerJamaica. Send us a message on WhatsApp at 1-876-499-0169 or email us at or\n\t\xa0', 'View the discussion thread.']",,, +"['\nThe police in St Catherine have now identified the licensed firearm holder who was shot dead at a cock fight in bushes in Old Harbour on Sunday.', '\n\tHe is 56-year-old\xa0Devon Gallimore of', '\n\tDavis district in Old Harbour and New Bowen in Clarendon.', '\n\tThe police say his weapon has still not been located.', '\n\tIt is reported that Gallimore was at the event when he received a phone call and moved away from the crowd.', '\n\tExplosions were subsequently heard.', '\n\tChecks revealed\xa0that he was shot in the head and was seen lying on his back.\xa0', '\n\tThe Old Harbour police are investigating the matter.', '\n\t- Rasbert Turner', '\nFollow The Gleaner on X, formerly Twitter, and Instagram @JamaicaGleaner and on Facebook @GleanerJamaica. Send us a message on WhatsApp at 1-876-499-0169 or email us at or\n\t\xa0', 'View the discussion thread.']",16-01-2024,"Clarendon, Clarendon, Jamaica",Licensed firearm holder killed in Old Harbour identified,nan,"['rasbert turner', 'devon gallimore']","ADVICE', '']" " " -"Philadelphia, Pennsylvania, United States",16-01-2024,"['For premium support please call:', 'Snow shovels have been gathering dust since 2022 in many cities along the Interstate 95 corridor, but the snowless streak has finally come to an end at the expense of travel delays and school cancellations.', '""The streak has ended!"" the National Weather Service (NWS) office in New York City said on Tuesday morning, when an inch of snow accumulated for the first time in 700 days, dating back to Feb. 13, 2022. As of 10 a.m. EST on Tuesday, there were more than 300 flight cancellations and over 700 delays between LaGuardia, Newark Liberty and John F. Kennedy airports, all located in the New York City area.', 'Silvestre, 6, of Washington, sleds over a snow bump on the hill at the U.S. Capitol, as schools are closed due to a winter storm, Tuesday, Jan. 16, 2024. (AP Photo/Jacquelyn Martin)', 'Tuesday marked two years to the day since at least 1 inch of snow last fell in Washington, D.C., a streak that finally came to an end with the early week snow. An Earthcam on the Washington Monument captured the snowy scene, a rare sight in recent years.', 'While the early week system was far from a blockbuster snowstorm, it delivered enough fresh powder for some school districts to have a snow day, including around the Philadelphia area, where several inches of snow accumulated for the first time since Jan. 29, 2022.', 'AccuWeather meteorologists say that residents of these cities should not need to wait another 700 days to see snow again, as another storm is already on the horizon.', ""A storm swinging in from the central United States will deliver another round of snow to part of the Northeast into the end of the week. Similar to the early week snow, accumulations will be on the lighter side as opposed to a classic nor'easter, but enough snow may fall from Baltimore through Boston for folks to break out the snow shovels once again."", 'Want next-level safety, ad-free? Unlock advanced, hyperlocal severe weather alerts when you subscribe to Premium+ on the AccuWeather app. AccuWeather Alerts™ are prompted by our expert meteorologists who monitor and analyze dangerous weather risks 24/7 to keep you and your family safer.', 'Advertisement', 'Advertisement', 'Advertisement', 'Advertisement', 'Advertisement', 'Advertisement', 'Advertisement', 'Advertisement', 'Advertisement', 'Advertisement']",,, +"['For premium support please call:', 'Snow shovels have been gathering dust since 2022 in many cities along the Interstate 95 corridor, but the snowless streak has finally come to an end at the expense of travel delays and school cancellations.', '""The streak has ended!"" the National Weather Service (NWS) office in New York City said on Tuesday morning, when an inch of snow accumulated for the first time in 700 days, dating back to Feb. 13, 2022. As of 10 a.m. EST on Tuesday, there were more than 300 flight cancellations and over 700 delays between LaGuardia, Newark Liberty and John F. Kennedy airports, all located in the New York City area.', 'Silvestre, 6, of Washington, sleds over a snow bump on the hill at the U.S. Capitol, as schools are closed due to a winter storm, Tuesday, Jan. 16, 2024. (AP Photo/Jacquelyn Martin)', 'Tuesday marked two years to the day since at least 1 inch of snow last fell in Washington, D.C., a streak that finally came to an end with the early week snow. An Earthcam on the Washington Monument captured the snowy scene, a rare sight in recent years.', 'While the early week system was far from a blockbuster snowstorm, it delivered enough fresh powder for some school districts to have a snow day, including around the Philadelphia area, where several inches of snow accumulated for the first time since Jan. 29, 2022.', 'AccuWeather meteorologists say that residents of these cities should not need to wait another 700 days to see snow again, as another storm is already on the horizon.', ""A storm swinging in from the central United States will deliver another round of snow to part of the Northeast into the end of the week. Similar to the early week snow, accumulations will be on the lighter side as opposed to a classic nor'easter, but enough snow may fall from Baltimore through Boston for folks to break out the snow shovels once again."", 'Want next-level safety, ad-free? Unlock advanced, hyperlocal severe weather alerts when you subscribe to Premium+ on the AccuWeather app. AccuWeather Alerts™ are prompted by our expert meteorologists who monitor and analyze dangerous weather risks 24/7 to keep you and your family safer.', 'Advertisement', 'Advertisement', 'Advertisement', 'Advertisement', 'Advertisement', 'Advertisement', 'Advertisement', 'Advertisement', 'Advertisement', 'Advertisement']",16-01-2024,"Philadelphia, Pennsylvania, United States","Snow drought over: NYC, DC blanketed by biggest storm in 2 years","['united states', 'national weather service']","['newark liberty', 'jacquelyn martin', 'john f kennedy']","WEATHER', '']" +" +" +"['NEW BEDFORD (WBSM) — New Bedford Police arrested a juvenile for carrying a firearm in the city’s South End over the weekend.', 'On Saturday, January 13, detectives assigned to the Organized Crime Intelligence Bureau were surveilling the South End.', 'Detectives said that at around 3 p.m., they observed suspicious activity involving a stopped vehicle in a supermarket parking lot. As the car traveled away, police followed it and conducted a traffic stop at the Dartmouth Street-Fair Street intersection.', 'Inside the vehicle were three occupants. According to police, the occupants allegedly gave “conflicting statements” to the detectives.', 'While searching the vehicle, police found a Polymer 80 firearm located in the backseat where a 17-year-old juvenile had been seated. Also discovered was a magazine and 16 rounds of .40-caliber ammunition.', 'The juvenile was placed under arrest and charged with carrying a firearm, as well as possession of ammunition and a high-capacity feeding device.', 'Under Massachusetts law, magazines capable of holding more than 10 rounds are considered to be “high capacity,” which is an additional criminal charge.', 'Gallery Credit: Ariel Dorsey ', 'Gallery Credit: Kate Robinson']",16-01-2024,"New Bedford, Massachusetts, United States",New Bedford Juvenile Arrested for Carrying Firearm and Ammunition,"['southcoast most exciting police chases', 'crime intelligence bureau', 'bedford police']","['kate robinson', 'ariel dorsey']","UNCERTAINTY1', '']" +" +" +"['Fire crews have dealt with a blaze that has caused extensive damage to a house in North Clare this afternoon.', 'The alarm was raised at around 3.30pm when emergency services were alerted to fire at a domestic property at Newtown, Corofin.', 'Units of Clare County Fire and Rescue Service from Ennis station were dispatched to the incident along with Gardaí who were first to arrive at the scene. The National Ambulance Service was also alerted.', 'The two male occupants of the detached property had escaped from the burning building unharmed. It was quickly established that they were uninjured and did not require medical attention.', 'On arrival, fire crews found the property well alight and that a section of the roof had have collapsed also.', 'A total of four units of the fire service including a water tanker and hydraulic platform were mobilised to the incident.', 'Once the fire had been brought under control, firefighters wearing breathing apparatus were able to enter the property to carry out an inspection and to ensure there were no pockets of fire remaining.', 'Fire crews and Gardaí remain at the scene this evening where the road by the local national school remains closed.']",16-01-2024,"Corofin, Clare, Ireland",Fire Crews Attend To Extensive House Fire in North Clare,['national ambulance service'],nan,"SCHOOL', '']" +" +" +"['A preliminary 3.4 magnitude earthquake struck near Gilroy Tuesday morning, according to the United States Geological Survey.', 'The quake hit at 10:38 a.m. and was centered 4.5 miles east-northeast of Watsonville, 8 miles southwest of Gilroy and 10.8 miles north of Prunedale, the USGS said.', 'No other information was immediately available.', '\nBAY AREA QUAKE CENTRAL\nYou can take steps to plan and prepare for the next big one. Access our Bay Area Quake Tracker, the latest earthquake stories, extensive quake prep checklists, videos and many other disaster preparedness resources all in one place: NBCBAYAREA.COM/QUAKES', '']",16-01-2024,United States,"Earthquake near Gilroy, Watsonville – NBC Bay Area",['united states geological'],nan,"SURVEY', '']" +" +" +"['Millions of Syrians face hunger with the suspension of all in-kind WFP food aid this month, in part due to major cuts to US funding. US aid cuts of up to 50 percent are expected across all humanitarian sectors in 2024, senior humanitarian sources said.', '16 January 2024', 'MARSEILLE — Millions of Syrians lost a lifeline this month, after the World Food Program (WFP) suspended all in-kind food assistance across Syria due to an unprecedented budget shortfall, owing in large part to drastic cuts in United States government funding, senior humanitarian sources told Syria Direct.', 'The United States Agency for International Aid (USAID), through its Bureau of Humanitarian Assistance (BHA), not only supports United Nations (UN) programs like the WFP, but also directly funds non-governmental organizations in Syria.\xa0', 'BHA funding cuts of up to 50 percent for many programs are expected this year across all humanitarian sectors, two senior humanitarian sources based in northeastern Syria told Syria Direct.\xa0', '“Gaza of course has an impact on Syria,” one humanitarian source said, referring to the humanitarian crisis resulting from Israel’s ongoing war in the Palestinian territory. The other noted that cuts are expected “across all the [humanitarian] sectors,” including “camp-based activities,” with the exception of Al-Hol camp holding families of former Islamic State fighters.', 'The WFP is funded primarily by governments, alongside private donations. The US, the single largest donor to the WFP, has so far contributed around $207 million in 2024 as of January 15. In 2023, it gave a total of $3 billion, down from $7.2 billion in 2022. Germany, the second-largest donor, has contributed $127 million so far this year, in addition to $1.3 billion in 2023 and $1.8 billion in 2022.', 'According to the WFP’s 2024-2026 management plan released in late 2023, the UN food body expected to receive $10 billion in 2024, while it projected it needed $22.7 billion for operational requirements.', 'In Syria, even before the latest WFP cuts, “the level of aid did not correspond to the level of need,” Saleem Algerk, media coordinator at Shafak organization, one of WFP’s implementing partners, told Syria Direct. “Thus, if aid decreases, the difficult humanitarian situation and the number of people under the poverty line will increase,” he added. Ninety percent of Syrians already live below the poverty line.', 'Direct US funding for NGOs working in Syria poses another stark challenge. In the opposition-held northwest—home to more than four million people, more than half of whom are internally displaced—the US is the largest single donor for service delivery, Nicole Hark, Mercy Corps Country Director for Syria, told Syria Direct.\xa0', 'For Layla, a 41-year-old mother of six living in a displacement camp in Syria’s opposition-held northwest, losing WFP food aid this month was only the latest blow.\xa0', 'Initially displaced from their native Hama to Idlib province, her family’s home in the town of Jenderes was destroyed last year in the devastating February 6 earthquake that struck Syria and Turkey. They found themselves displaced once more—this time to a camp.', '“Getting by has become very hard,” Layla told Syria Direct. Her husband, who has Hepatitis B, has not been able to work for four years. Each time aid is cut, the family falls further into debt, borrowing money to survive.\xa0', 'In July 2023, around 45 percent of the\xa0 5.5 million Syrians receiving WFP food assistance lost it. Layla’s family still received aid, but distributions were reduced from once a month to once every two months. This month, however, they were completely cut off—among the 3.2 million Syrians still receiving aid after the July cut who have now lost support.\xa0', 'This follows repeated WFP aid cuts in recent months to Syrians both inside Syria and in neighboring countries—as well as other countries across the world. In 2024, the UN food body faces an unprecedented 60 percent funding shortfall due to increasing needs and decreasing contributions. For 2023, less than half of the $23.5 billion needed to fund its global operations was secured.\xa0', '“I wish the aid would come back for the displaced people who left their homes and are staying in camps,” Layla. “We hope it comes back so that we can live.” Her family not only relied on food aid for sustenance, but sold portions to buy bread, medicine and other foods not included in it.\xa0', 'The WFP estimates that 12.9 million Syrians—more than half of the country’s total population—will go hungry this year, with another 2.6 million people at risk of becoming food insecure.\xa0', 'One humanitarian source involved in cross-border WFP aid delivery, who asked to remain anonymous, speculated that food assistance could resume, albeit with a reduced scope. The source suggested aid could be delivered directly by the UN body, rather than implementing partners—organizations on the ground funded to distribute assistance on its behalf—as has previously been the case.\xa0', 'Some direct WFP cross-line shipments to northern Syria, originating from regime-controlled areas rather than delivered across the border from Turkey, have taken place in the past, but with little visibility, the source added. If the WFP were to directly deliver aid, it could mean that all food assistance would be distributed cross-line.', 'For Syria’s opposition-held northwest, where an estimated 75 percent of the population relies on UN assistance, the way that aid arrives is crucial. Cross-border aid deliveries, rather than cross-line aid originating in regime-held territory, are a way to ensure that vital supplies are not withheld or diverted by Damascus. But the future of cross-border access, by the WFP or other UN agencies, is far from certain.\xa0', 'Starting in July 2014, a cross-border mechanism established by a UN Security Council (UNSC) resolution authorized UN aid shipments into Syria from Turkey without the need for regime authorization. But in July 2023, Russia vetoed the most recent vote to extend the mechanism, leaving authorization for cross-border deliveries under Damascus’ discretion. Following the Russian veto, the Syrian regime granted cross-border access for six months until January 13, 2024.', 'On January 12, one day before it was set to expire, the UN reached an agreement with Damascus to extend cross-border access for UN aid shipments from Turkey by six months until July 13, 2024.\xa0', 'However, under the latest agreement, only one border crossing—Bab al-Hawa—will remain open for the full six months. The Bab al-Rai and Bab al-Salama crossings, which were opened with the regime’s authorization following last year’s earthquake, will only be open until February 13, 2024. Their closure will impact the “ease and speed” of aid deliveries, Algerk said, in what he called an “increasingly difficult humanitarian situation.”\xa0', 'For nearly a decade, UN cross-border access renewals have progressively been shortened: from two years to one year, and finally to six months. Uncertainty about sustained access leads donors to issue short grants that do not allow for “sustainability or long-term planning,” Algerk explained.', 'In the absence of cross-border access, UN aid shipments would come exclusively via areas controlled by Damascus, which has “a long history in politicizing, manipulating, and weaponizing aid,” Mohamad Katoub, a humanitarian expert, explained.\xa0', 'Algerk feared “the regime would starve everyone.” In a 2019 report, Human Rights Watch exposed the systematic diversion of aid in favor of regime-affiliated figures and armed groups, as well as the blocking of aid to opposition-controlled areas.', 'In-kind food assistance is not the only support threatened by dwindling funding and aid cuts. In northwestern Syria, cuts disproportionately affect displacement camps where 1.7 million people reside, Hark of Mercy Corps said. Her organization is among those that have been forced to cease services in camps there.\xa0', 'The implications for water and sanitation are particularly dire, Hark said. With the end of support for services displaced people rely on such as water pumping stations, “families have no other recourse but to use their limited income to buy water from private sources that lack quality control,” with risks for renewed cholera outbreaks. “Northwestern Syria is facing spikes and ebbs in cholera cases and this will likely exacerbate conditions towards an increase,” she added.\xa0', 'Since September 2022, when the first cholera outbreak since the Syrian conflict was recorded, northwestern Syria has faced several cholera surges—especially in the aftermath of the February earthquake, culminating in more than 13,000 cases last April.', 'Read more: Northern Syria’s cholera outbreak spreads, reaches Idlib displacement camps', 'Amid US aid cuts, one European diplomatic source said he expected regional actors, such as Turkey, to step in to fill the gap. This could mean funding projects by institutional actors like the UN or USAID. For example, Saudi Arabia has provided $100 million in funding to US stabilization projects in northeastern Syria.', 'It could also mean bypassing international institutions altogether. In the case of the latter, certain areas could be deprived of aid for political reasons. For example, northeastern Syria, which is controlled by Kurdish-led forces that Turkey considers terrorists, would likely be deprived of aid coming directly from Ankara.', 'The persistent threat posed by aid cuts is a symptom of a larger, structural problem: For nearly 13 years, the international response to the Syrian crisis has been frozen in an emergency phase, without sustainable solutions.', '“The crisis has been ongoing for more than a decade and [yet] we’re still talking about emergency assistance,” one of the humanitarian sources in northeastern Syria said.\xa0', '“Donors and UN agencies knew this moment would come one day and did nothing over the years,” Katoub said. He said that Syria programming is still in an emergency phase, rather than early recovery, because the humanitarian aid system is “very bureaucratic and very politicized.”', 'Early recovery is the transition from humanitarian aid towards development that allows for a “sustainable process of recovery from crisis,” according to the United Nations Development Programme (UNDP). This includes the rehabilitation of “markets, livelihoods and services and the state capacities to foster them.”\xa0', 'According to the senior humanitarian worker in northeastern Syria, resilience programming is “a key exit strategy for regular food assistance” to achieve “sustainability.” This could include “livelihood transitions, capacity development projects, better quality cash programming, and more focus on climate change adaptation,” notably “making agriculture sustainable” in a country that was self-sufficient in food production before the war.\xa0', 'However, according to Katoub, early recovery programming today accounts for less than 9 percent of what is needed. In its absence, he warned vulnerable communities will remain “dependent on aid,” making the consequences of any aid cuts “horrific.”', 'Syria Direct is an independent journalism organization that produces timely, credible coverage of Syria while training a small group of highly talented, aspiring Syrian and international journalists in professional news-gathering and accurate, in-depth reporting. As a result of agenda-free funding, our focus is on providing credible, original, relevant and immediate news and analysis of the conflict.\n\xa0', '\xa0', 'Syria’s is a crisis in transition, rather than one in retreat. Support original reporting on Syria by donating to Syria Direct, so our team of Syrian and international journalists can continue bringing you the stories that matter from on the ground inside Syria.', 'Donate Here']",16-01-2024,Syria,Syrians lose WFP lifeline as US slashes funding,"['un security council', 'united states', 'bureau of humanitarian assistance', 'united nations development programme', 'united states agency for international aid', 'world food program', 'human rights watch']","['bab al-rai', 'mohamad katoub', 'saleem algerk', 'bab al-salama']","ADAPTATION', 'AGRICULTURE', '']" +" +" +"['Lava from a volcanic eruption in southwestern Iceland has streamed into a nearby town, engulfing homes and forcing the evacuation of local residents.\n', 'The Icelandic Meteorological Office said the eruption began just before 8:00 a.m. local time Sunday about a half mile from the town of Grindavík after a series of intense earthquakes.\n', 'A second fissure opened after noon Sunday and sent lava flows into the town, officials said.\n', 'Iceland\'s president, Guðni Th. Jóhannesson, said in an address to the nation on Sunday that a ""daunting period of upheaval"" had begun for those in the Reykjanes peninsula.\n', '""We continue to hope for as good an outcome as possible, in the face of these tremendous forces of nature,"" he said.\n', 'This is the second time in a month that a volcano has erupted just outside Grindavík, a coastal town about 25 miles from the Icelandic capital of Reykjavík.\n', 'An eruption on December 18 sent lava spewing into the air, but residents had already been told to leave due to heightened seismic activity.\n', ""Iceland's government said Sunday that Grindavík had been evacuated early that morning and that the eruption isn't expected to reach other populated areas.\n"", 'So far no flights have been disrupted, and officials are monitoring threats to infrastructure. At least three homes have either burned down or been overtaken by lava, according to the Icelandic broadcaster RUV.\n', ""The government also said the eruption doesn't present a threat to life.\n"", 'But in his speech, Jóhannesson offered his sympathy to the loved ones of Lúðvík Pétursson, a man who went missing in a work accident in Grindavík last week.\n', ""According to Sky News, the 50-year-old Pétursson was filling crevasses formed by volcanic activity and earthquakes when he fell in a crack that had opened after last month's eruption.\n"", 'Iceland is a hotspot for seismic activity, with 32 active volcanoes. A volcano erupts roughly every five years in the country, though eruptions have occurred more frequently recently. \n', ' Copyright 2024 NPR. To see more, visit ', '\n']",16-01-2024,Iceland,An Icelandic town is evacuated after a volcanic eruption sends lava into nearby homes,['icelandic meteorological office'],nan,"REGULATION', '']" " " diff --git a/DOTS/output/dots_feats.csv b/DOTS/output/dots_feats.csv deleted file mode 100644 index fd5b847..0000000 --- a/DOTS/output/dots_feats.csv +++ /dev/null @@ -1,5 +0,0 @@ -"[""['Yemen'"", "" '16-01-2024'""]","['abdulrahman al', 'european union', 'yehya sinwar']" -"[""['Gaza Strip'"", "" '16-01-2024'""]","['widespread famine', 'rebels', 'disease']" -"[""['Miseno"", ' Campania']","['disasters', 'ancient cataclysm', 'beach']" -"[""['Clarendon"", ' Clarendon']","['whatsapp', 'jamaica', 'st catherine']" -"[""['Philadelphia"", ' Pennsylvania']","['snow shovels', 'snowstorm', 'accuweather meteorologists']" diff --git a/DOTS/output/google_dots_feats.csv b/DOTS/output/google_dots_feats.csv new file mode 100644 index 0000000..862f9a6 --- /dev/null +++ b/DOTS/output/google_dots_feats.csv @@ 